aboutsummaryrefslogtreecommitdiff
path: root/Minecraft.Client/PS3/PS3Extras/DirectX
diff options
context:
space:
mode:
authordaoge_cmd <3523206925@qq.com>2026-03-01 12:16:08 +0800
committerdaoge_cmd <3523206925@qq.com>2026-03-01 12:16:08 +0800
commitb691c43c44ff180d10e7d4a9afc83b98551ff586 (patch)
tree3e9849222cbc6ba49f2f1fc6e5fe7179632c7390 /Minecraft.Client/PS3/PS3Extras/DirectX
parentdef8cb415354ac390b7e89052a50605285f1aca9 (diff)
Initial commit
Diffstat (limited to 'Minecraft.Client/PS3/PS3Extras/DirectX')
-rw-r--r--Minecraft.Client/PS3/PS3Extras/DirectX/DirectXCollision.h339
-rw-r--r--Minecraft.Client/PS3/PS3Extras/DirectX/DirectXCollision.inl4801
-rw-r--r--Minecraft.Client/PS3/PS3Extras/DirectX/DirectXColors.h168
-rw-r--r--Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMath.h1861
-rw-r--r--Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathConvert.inl1962
-rw-r--r--Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMatrix.inl3414
-rw-r--r--Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl2501
-rw-r--r--Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathVector.inl10596
-rw-r--r--Minecraft.Client/PS3/PS3Extras/DirectX/DirectXPackedVector.h995
-rw-r--r--Minecraft.Client/PS3/PS3Extras/DirectX/DirectXPackedVector.inl3545
-rw-r--r--Minecraft.Client/PS3/PS3Extras/DirectX/no_sal2.h1022
-rw-r--r--Minecraft.Client/PS3/PS3Extras/DirectX/sal.h1998
12 files changed, 33202 insertions, 0 deletions
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXCollision.h b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXCollision.h
new file mode 100644
index 00000000..d411432a
--- /dev/null
+++ b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXCollision.h
@@ -0,0 +1,339 @@
+//-------------------------------------------------------------------------------------
+// DirectXCollision.h -- C++ Collision Math library
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//-------------------------------------------------------------------------------------
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+#include "DirectXMath.h"
+
+namespace DirectX
+{
+
+enum ContainmentType
+{
+ DISJOINT = 0,
+ INTERSECTS = 1,
+ CONTAINS = 2,
+};
+
+enum PlaneIntersectionType
+{
+ FRONT = 0,
+ INTERSECTING = 1,
+ BACK = 2,
+};
+
+struct BoundingBox;
+struct BoundingOrientedBox;
+struct BoundingFrustum;
+
+#pragma warning(push)
+#pragma warning(disable:4324 4820)
+
+//-------------------------------------------------------------------------------------
+// Bounding sphere
+//-------------------------------------------------------------------------------------
+struct BoundingSphere
+{
+ XMFLOAT3 Center; // Center of the sphere.
+ float Radius; // Radius of the sphere.
+
+ // Creators
+ BoundingSphere() : Center(0,0,0), Radius( 1.f ) {}
+ BoundingSphere( _In_ const XMFLOAT3& center, _In_ float radius )
+ : Center(center), Radius(radius) { assert( radius >= 0.f ); };
+ BoundingSphere( _In_ const BoundingSphere& sp )
+ : Center(sp.Center), Radius(sp.Radius) {}
+
+ // Methods
+ BoundingSphere& operator=( _In_ const BoundingSphere& sp ) { Center = sp.Center; Radius = sp.Radius; return *this; }
+
+ void Transform( _Out_ BoundingSphere& Out, _In_ CXMMATRIX M ) const;
+ void Transform( _Out_ BoundingSphere& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation ) const;
+ // Transform the sphere
+
+ ContainmentType Contains( _In_ FXMVECTOR Point ) const;
+ ContainmentType Contains( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
+ ContainmentType Contains( _In_ const BoundingSphere& sh ) const;
+ ContainmentType Contains( _In_ const BoundingBox& box ) const;
+ ContainmentType Contains( _In_ const BoundingOrientedBox& box ) const;
+ ContainmentType Contains( _In_ const BoundingFrustum& fr ) const;
+
+ bool Intersects( _In_ const BoundingSphere& sh ) const;
+ bool Intersects( _In_ const BoundingBox& box ) const;
+ bool Intersects( _In_ const BoundingOrientedBox& box ) const;
+ bool Intersects( _In_ const BoundingFrustum& fr ) const;
+
+ bool Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
+ // Triangle-sphere test
+
+ PlaneIntersectionType Intersects( _In_ FXMVECTOR Plane ) const;
+ // Plane-sphere test
+
+ bool Intersects( _In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist ) const;
+ // Ray-sphere test
+
+ ContainmentType ContainedBy( _In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
+ _In_ GXMVECTOR Plane3, _In_ CXMVECTOR Plane4, _In_ CXMVECTOR Plane5 ) const;
+ // Test sphere against six planes (see BoundingFrustum::GetPlanes)
+
+ // Static methods
+ static void CreateMerged( _Out_ BoundingSphere& Out, _In_ const BoundingSphere& S1, _In_ const BoundingSphere& S2 );
+
+ static void CreateFromBoundingBox( _Out_ BoundingSphere& Out, _In_ const BoundingBox& box );
+ static void CreateFromBoundingBox( _Out_ BoundingSphere& Out, _In_ const BoundingOrientedBox& box );
+
+ static void CreateFromPoints( _Out_ BoundingSphere& Out, _In_ size_t Count,
+ _In_reads_bytes_(sizeof(XMFLOAT3)+Stride*(Count-1)) const XMFLOAT3* pPoints, _In_ size_t Stride );
+
+ static void CreateFromFrustum( _Out_ BoundingSphere& Out, _In_ const BoundingFrustum& fr );
+};
+
+//-------------------------------------------------------------------------------------
+// Axis-aligned bounding box
+//-------------------------------------------------------------------------------------
+struct BoundingBox
+{
+ static const size_t CORNER_COUNT = 8;
+
+ XMFLOAT3 Center; // Center of the box.
+ XMFLOAT3 Extents; // Distance from the center to each side.
+
+ // Creators
+ BoundingBox() : Center(0,0,0), Extents( 1.f, 1.f, 1.f ) {}
+ BoundingBox( _In_ const XMFLOAT3& center, _In_ const XMFLOAT3& extents )
+ : Center(center), Extents(extents) { assert(extents.x >= 0 && extents.y >= 0 && extents.z >= 0); }
+ BoundingBox( _In_ const BoundingBox& box ) : Center(box.Center), Extents(box.Extents) {}
+
+ // Methods
+ BoundingBox& operator=( _In_ const BoundingBox& box) { Center = box.Center; Extents = box.Extents; return *this; }
+
+ void Transform( _Out_ BoundingBox& Out, _In_ CXMMATRIX M ) const;
+ void Transform( _Out_ BoundingBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation ) const;
+
+ void GetCorners( _Out_writes_(8) XMFLOAT3* Corners ) const;
+ // Gets the 8 corners of the box
+
+ ContainmentType Contains( _In_ FXMVECTOR Point ) const;
+ ContainmentType Contains( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
+ ContainmentType Contains( _In_ const BoundingSphere& sh ) const;
+ ContainmentType Contains( _In_ const BoundingBox& box ) const;
+ ContainmentType Contains( _In_ const BoundingOrientedBox& box ) const;
+ ContainmentType Contains( _In_ const BoundingFrustum& fr ) const;
+
+ bool Intersects( _In_ const BoundingSphere& sh ) const;
+ bool Intersects( _In_ const BoundingBox& box ) const;
+ bool Intersects( _In_ const BoundingOrientedBox& box ) const;
+ bool Intersects( _In_ const BoundingFrustum& fr ) const;
+
+ bool Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
+ // Triangle-Box test
+
+ PlaneIntersectionType Intersects( _In_ FXMVECTOR Plane ) const;
+ // Plane-box test
+
+ bool Intersects( _In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist ) const;
+ // Ray-Box test
+
+ ContainmentType ContainedBy( _In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
+ _In_ GXMVECTOR Plane3, _In_ CXMVECTOR Plane4, _In_ CXMVECTOR Plane5 ) const;
+ // Test box against six planes (see BoundingFrustum::GetPlanes)
+
+ // Static methods
+ static void CreateMerged( _Out_ BoundingBox& Out, _In_ const BoundingBox& b1, _In_ const BoundingBox& b2 );
+
+ static void CreateFromSphere( _Out_ BoundingBox& Out, _In_ const BoundingSphere& sh );
+
+ static void CreateFromPoints( _Out_ BoundingBox& Out, _In_ FXMVECTOR pt1, _In_ FXMVECTOR pt2 );
+ static void CreateFromPoints( _Out_ BoundingBox& Out, _In_ size_t Count,
+ _In_reads_bytes_(sizeof(XMFLOAT3)+Stride*(Count-1)) const XMFLOAT3* pPoints, _In_ size_t Stride );
+};
+
+//-------------------------------------------------------------------------------------
+// Oriented bounding box
+//-------------------------------------------------------------------------------------
+struct BoundingOrientedBox
+{
+ static const size_t CORNER_COUNT = 8;
+
+ XMFLOAT3 Center; // Center of the box.
+ XMFLOAT3 Extents; // Distance from the center to each side.
+ XMFLOAT4 Orientation; // Unit quaternion representing rotation (box -> world).
+
+ // Creators
+ BoundingOrientedBox() : Center(0,0,0), Extents( 1.f, 1.f, 1.f ), Orientation(0,0,0, 1.f ) {}
+ BoundingOrientedBox( _In_ const XMFLOAT3& _Center, _In_ const XMFLOAT3& _Extents, _In_ const XMFLOAT4& _Orientation )
+ : Center(_Center), Extents(_Extents), Orientation(_Orientation)
+ {
+ assert(_Extents.x >= 0 && _Extents.y >= 0 && _Extents.z >= 0);
+ }
+ BoundingOrientedBox( _In_ const BoundingOrientedBox& box )
+ : Center(box.Center), Extents(box.Extents), Orientation(box.Orientation) {}
+
+ // Methods
+ BoundingOrientedBox& operator=( _In_ const BoundingOrientedBox& box ) { Center = box.Center; Extents = box.Extents; Orientation = box.Orientation; return *this; }
+
+ void Transform( _Out_ BoundingOrientedBox& Out, _In_ CXMMATRIX M ) const;
+ void Transform( _Out_ BoundingOrientedBox& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation ) const;
+
+ void GetCorners( _Out_writes_(8) XMFLOAT3* Corners ) const;
+ // Gets the 8 corners of the box
+
+ ContainmentType Contains( _In_ FXMVECTOR Point ) const;
+ ContainmentType Contains( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
+ ContainmentType Contains( _In_ const BoundingSphere& sh ) const;
+ ContainmentType Contains( _In_ const BoundingBox& box ) const;
+ ContainmentType Contains( _In_ const BoundingOrientedBox& box ) const;
+ ContainmentType Contains( _In_ const BoundingFrustum& fr ) const;
+
+ bool Intersects( _In_ const BoundingSphere& sh ) const;
+ bool Intersects( _In_ const BoundingBox& box ) const;
+ bool Intersects( _In_ const BoundingOrientedBox& box ) const;
+ bool Intersects( _In_ const BoundingFrustum& fr ) const;
+
+ bool Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
+ // Triangle-OrientedBox test
+
+ PlaneIntersectionType Intersects( _In_ FXMVECTOR Plane ) const;
+ // Plane-OrientedBox test
+
+ bool Intersects( _In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist ) const;
+ // Ray-OrientedBox test
+
+ ContainmentType ContainedBy( _In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
+ _In_ GXMVECTOR Plane3, _In_ CXMVECTOR Plane4, _In_ CXMVECTOR Plane5 ) const;
+ // Test OrientedBox against six planes (see BoundingFrustum::GetPlanes)
+
+ // Static methods
+ static void CreateFromBoundingBox( _Out_ BoundingOrientedBox& Out, _In_ const BoundingBox& box );
+
+ static void CreateFromPoints( _Out_ BoundingOrientedBox& Out, _In_ size_t Count,
+ _In_reads_bytes_(sizeof(XMFLOAT3)+Stride*(Count-1)) const XMFLOAT3* pPoints, _In_ size_t Stride );
+};
+
+//-------------------------------------------------------------------------------------
+// Bounding frustum
+//-------------------------------------------------------------------------------------
+struct BoundingFrustum
+{
+ static const size_t CORNER_COUNT = 8;
+
+ XMFLOAT3 Origin; // Origin of the frustum (and projection).
+ XMFLOAT4 Orientation; // Quaternion representing rotation.
+
+ float RightSlope; // Positive X slope (X/Z).
+ float LeftSlope; // Negative X slope.
+ float TopSlope; // Positive Y slope (Y/Z).
+ float BottomSlope; // Negative Y slope.
+ float Near, Far; // Z of the near plane and far plane.
+
+ // Creators
+ BoundingFrustum() : Origin(0,0,0), Orientation(0,0,0, 1.f), RightSlope( 1.f ), LeftSlope( -1.f ),
+ TopSlope( 1.f ), BottomSlope( -1.f ), Near(0), Far( 1.f ) {}
+ BoundingFrustum( _In_ const XMFLOAT3& _Origin, _In_ const XMFLOAT4& _Orientation,
+ _In_ float _RightSlope, _In_ float _LeftSlope, _In_ float _TopSlope, _In_ float _BottomSlope,
+ _In_ float _Near, _In_ float _Far )
+ : Origin(_Origin), Orientation(_Orientation),
+ RightSlope(_RightSlope), LeftSlope(_LeftSlope), TopSlope(_TopSlope), BottomSlope(_BottomSlope),
+ Near(_Near), Far(_Far) { assert( _Near <= _Far ); }
+ BoundingFrustum( _In_ const BoundingFrustum& fr )
+ : Origin(fr.Origin), Orientation(fr.Orientation), RightSlope(fr.RightSlope), LeftSlope(fr.LeftSlope),
+ TopSlope(fr.TopSlope), BottomSlope(fr.BottomSlope), Near(fr.Near), Far(fr.Far) {}
+ BoundingFrustum( _In_ CXMMATRIX Projection ) { CreateFromMatrix( *this, Projection ); }
+
+ // Methods
+ BoundingFrustum& operator=( _In_ const BoundingFrustum& fr ) { Origin=fr.Origin; Orientation=fr.Orientation;
+ RightSlope=fr.RightSlope; LeftSlope=fr.LeftSlope;
+ TopSlope=fr.TopSlope; BottomSlope=fr.BottomSlope;
+ Near=fr.Near; Far=fr.Far; return *this; }
+
+ void Transform( _Out_ BoundingFrustum& Out, _In_ CXMMATRIX M ) const;
+ void Transform( _Out_ BoundingFrustum& Out, _In_ float Scale, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation ) const;
+
+ void GetCorners( _Out_writes_(8) XMFLOAT3* Corners ) const;
+ // Gets the 8 corners of the frustum
+
+ ContainmentType Contains( _In_ FXMVECTOR Point ) const;
+ ContainmentType Contains( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
+ ContainmentType Contains( _In_ const BoundingSphere& sp ) const;
+ ContainmentType Contains( _In_ const BoundingBox& box ) const;
+ ContainmentType Contains( _In_ const BoundingOrientedBox& box ) const;
+ ContainmentType Contains( _In_ const BoundingFrustum& fr ) const;
+ // Frustum-Frustum test
+
+ bool Intersects( _In_ const BoundingSphere& sh ) const;
+ bool Intersects( _In_ const BoundingBox& box ) const;
+ bool Intersects( _In_ const BoundingOrientedBox& box ) const;
+ bool Intersects( _In_ const BoundingFrustum& fr ) const;
+
+ bool Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2 ) const;
+ // Triangle-Frustum test
+
+ PlaneIntersectionType Intersects( _In_ FXMVECTOR Plane ) const;
+ // Plane-Frustum test
+
+ bool Intersects( _In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _Out_ float& Dist ) const;
+ // Ray-Frustum test
+
+ ContainmentType ContainedBy( _In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, _In_ FXMVECTOR Plane2,
+ _In_ GXMVECTOR Plane3, _In_ CXMVECTOR Plane4, _In_ CXMVECTOR Plane5 ) const;
+ // Test frustum against six planes (see BoundingFrustum::GetPlanes)
+
+ void GetPlanes( _Out_opt_ XMVECTOR* NearPlane, _Out_opt_ XMVECTOR* FarPlane, _Out_opt_ XMVECTOR* RightPlane,
+ _Out_opt_ XMVECTOR* LeftPlane, _Out_opt_ XMVECTOR* TopPlane, _Out_opt_ XMVECTOR* BottomPlane ) const;
+ // Create 6 Planes representation of Frustum
+
+ // Static methods
+ static void CreateFromMatrix( _Out_ BoundingFrustum& Out, _In_ CXMMATRIX Projection );
+};
+
+//-----------------------------------------------------------------------------
+// Triangle intersection testing routines.
+//-----------------------------------------------------------------------------
+namespace TriangleTests
+{
+ bool Intersects( _In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, _In_ FXMVECTOR V0, _In_ GXMVECTOR V1, _In_ CXMVECTOR V2, _Out_ float& Dist );
+ // Ray-Triangle
+
+ bool Intersects( _In_ FXMVECTOR A0, _In_ FXMVECTOR A1, _In_ FXMVECTOR A2, _In_ GXMVECTOR B0, _In_ CXMVECTOR B1, _In_ CXMVECTOR B2 );
+ // Triangle-Triangle
+
+ PlaneIntersectionType Intersects( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2, _In_ GXMVECTOR Plane );
+ // Plane-Triangle
+
+ ContainmentType ContainedBy( _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2,
+ _In_ GXMVECTOR Plane0, _In_ CXMVECTOR Plane1, _In_ CXMVECTOR Plane2,
+ _In_ CXMVECTOR Plane3, _In_ CXMVECTOR Plane4, _In_ CXMVECTOR Plane5 );
+ // Test a triangle against six planes at once (see BoundingFrustum::GetPlanes)
+};
+
+#pragma warning(pop)
+
+/****************************************************************************
+ *
+ * Implementation
+ *
+ ****************************************************************************/
+
+#pragma warning(push)
+#pragma warning(disable : 4068 4616 6001)
+
+#pragma prefast(push)
+#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes")
+
+#include "DirectXCollision.inl"
+
+#pragma prefast(pop)
+#pragma warning(pop)
+
+}; // namespace DirectX
+
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXCollision.inl b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXCollision.inl
new file mode 100644
index 00000000..34d44382
--- /dev/null
+++ b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXCollision.inl
@@ -0,0 +1,4801 @@
+//-------------------------------------------------------------------------------------
+// DirectXCollision.inl -- C++ Collision Math library
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//-------------------------------------------------------------------------------------
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+XMGLOBALCONST XMVECTORF32 g_BoxOffset[8] =
+{
+ { -1.0f, -1.0f, 1.0f, 0.0f },
+ { 1.0f, -1.0f, 1.0f, 0.0f },
+ { 1.0f, 1.0f, 1.0f, 0.0f },
+ { -1.0f, 1.0f, 1.0f, 0.0f },
+ { -1.0f, -1.0f, -1.0f, 0.0f },
+ { 1.0f, -1.0f, -1.0f, 0.0f },
+ { 1.0f, 1.0f, -1.0f, 0.0f },
+ { -1.0f, 1.0f, -1.0f, 0.0f },
+};
+
+XMGLOBALCONST XMVECTORF32 g_RayEpsilon = { 1e-20f, 1e-20f, 1e-20f, 1e-20f };
+XMGLOBALCONST XMVECTORF32 g_RayNegEpsilon = { -1e-20f, -1e-20f, -1e-20f, -1e-20f };
+XMGLOBALCONST XMVECTORF32 g_FltMin = { -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX };
+XMGLOBALCONST XMVECTORF32 g_FltMax = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+
+namespace Internal
+{
+
+//-----------------------------------------------------------------------------
+// Return true if any of the elements of a 3 vector are equal to 0xffffffff.
+// Slightly more efficient than using XMVector3EqualInt.
+//-----------------------------------------------------------------------------
+inline bool XMVector3AnyTrue( _In_ FXMVECTOR V )
+{
+ // Duplicate the fourth element from the first element.
+ XMVECTOR C = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X>(V);
+
+ return XMComparisonAnyTrue( XMVector4EqualIntR( C, XMVectorTrueInt() ) );
+}
+
+
+//-----------------------------------------------------------------------------
+// Return true if all of the elements of a 3 vector are equal to 0xffffffff.
+// Slightly more efficient than using XMVector3EqualInt.
+//-----------------------------------------------------------------------------
+inline bool XMVector3AllTrue( _In_ FXMVECTOR V )
+{
+ // Duplicate the fourth element from the first element.
+ XMVECTOR C = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X>( V );
+
+ return XMComparisonAllTrue( XMVector4EqualIntR( C, XMVectorTrueInt() ) );
+}
+
+#if defined(_PREFAST) || !defined(NDEBUG)
+
+XMGLOBALCONST XMVECTORF32 g_UnitVectorEpsilon = { 1.0e-4f, 1.0e-4f, 1.0e-4f, 1.0e-4f };
+XMGLOBALCONST XMVECTORF32 g_UnitQuaternionEpsilon = { 1.0e-4f, 1.0e-4f, 1.0e-4f, 1.0e-4f };
+XMGLOBALCONST XMVECTORF32 g_UnitPlaneEpsilon = { 1.0e-4f, 1.0e-4f, 1.0e-4f, 1.0e-4f };
+
+//-----------------------------------------------------------------------------
+// Return true if the vector is a unit vector (length == 1).
+//-----------------------------------------------------------------------------
+inline bool XMVector3IsUnit( _In_ FXMVECTOR V )
+{
+ XMVECTOR Difference = XMVector3Length( V ) - XMVectorSplatOne();
+ return XMVector4Less( XMVectorAbs( Difference ), g_UnitVectorEpsilon );
+}
+
+//-----------------------------------------------------------------------------
+// Return true if the quaterion is a unit quaternion.
+//-----------------------------------------------------------------------------
+inline bool XMQuaternionIsUnit( _In_ FXMVECTOR Q )
+{
+ XMVECTOR Difference = XMVector4Length( Q ) - XMVectorSplatOne();
+ return XMVector4Less( XMVectorAbs( Difference ), g_UnitQuaternionEpsilon );
+}
+
+//-----------------------------------------------------------------------------
+// Return true if the plane is a unit plane.
+//-----------------------------------------------------------------------------
+inline bool XMPlaneIsUnit( _In_ FXMVECTOR Plane )
+{
+ XMVECTOR Difference = XMVector3Length( Plane ) - XMVectorSplatOne();
+ return XMVector4Less( XMVectorAbs( Difference ), g_UnitPlaneEpsilon );
+}
+
+#endif // __PREFAST__ || !NDEBUG
+
+//-----------------------------------------------------------------------------
+inline XMVECTOR XMPlaneTransform( _In_ FXMVECTOR Plane, _In_ FXMVECTOR Rotation, _In_ FXMVECTOR Translation )
+{
+ XMVECTOR vNormal = XMVector3Rotate( Plane, Rotation );
+ XMVECTOR vD = XMVectorSplatW( Plane ) - XMVector3Dot( vNormal, Translation );
+
+ return XMVectorInsert<0, 0, 0, 0, 1>( vNormal, vD );
+}
+
+//-----------------------------------------------------------------------------
+// Return the point on the line segement (S1, S2) nearest the point P.
+//-----------------------------------------------------------------------------
+inline XMVECTOR PointOnLineSegmentNearestPoint( _In_ FXMVECTOR S1, _In_ FXMVECTOR S2, _In_ FXMVECTOR P )
+{
+ XMVECTOR Dir = S2 - S1;
+ XMVECTOR Projection = ( XMVector3Dot( P, Dir ) - XMVector3Dot( S1, Dir ) );
+ XMVECTOR LengthSq = XMVector3Dot( Dir, Dir );
+
+ XMVECTOR t = Projection * XMVectorReciprocal( LengthSq );
+ XMVECTOR Point = S1 + t * Dir;
+
+ // t < 0
+ XMVECTOR SelectS1 = XMVectorLess( Projection, XMVectorZero() );
+ Point = XMVectorSelect( Point, S1, SelectS1 );
+
+ // t > 1
+ XMVECTOR SelectS2 = XMVectorGreater( Projection, LengthSq );
+ Point = XMVectorSelect( Point, S2, SelectS2 );
+
+ return Point;
+}
+
+//-----------------------------------------------------------------------------
+// Test if the point (P) on the plane of the triangle is inside the triangle
+// (V0, V1, V2).
+//-----------------------------------------------------------------------------
+inline XMVECTOR PointOnPlaneInsideTriangle( _In_ FXMVECTOR P, _In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ GXMVECTOR V2 )
+{
+ // Compute the triangle normal.
+ XMVECTOR N = XMVector3Cross( V2 - V0, V1 - V0 );
+
+ // Compute the cross products of the vector from the base of each edge to
+ // the point with each edge vector.
+ XMVECTOR C0 = XMVector3Cross( P - V0, V1 - V0 );
+ XMVECTOR C1 = XMVector3Cross( P - V1, V2 - V1 );
+ XMVECTOR C2 = XMVector3Cross( P - V2, V0 - V2 );
+
+ // If the cross product points in the same direction as the normal the the
+ // point is inside the edge (it is zero if is on the edge).
+ XMVECTOR Zero = XMVectorZero();
+ XMVECTOR Inside0 = XMVectorGreaterOrEqual( XMVector3Dot( C0, N ), Zero );
+ XMVECTOR Inside1 = XMVectorGreaterOrEqual( XMVector3Dot( C1, N ), Zero );
+ XMVECTOR Inside2 = XMVectorGreaterOrEqual( XMVector3Dot( C2, N ), Zero );
+
+ // If the point inside all of the edges it is inside.
+ return XMVectorAndInt( XMVectorAndInt( Inside0, Inside1 ), Inside2 );
+}
+
+//-----------------------------------------------------------------------------
+inline bool SolveCubic( _In_ float e, _In_ float f, _In_ float g, _Out_ float* t, _Out_ float* u, _Out_ float* v )
+{
+ float p, q, h, rc, d, theta, costh3, sinth3;
+
+ p = f - e * e / 3.0f;
+ q = g - e * f / 3.0f + e * e * e * 2.0f / 27.0f;
+ h = q * q / 4.0f + p * p * p / 27.0f;
+
+ if( h > 0.0 )
+ {
+ *t = *u = *v = 0.f;
+ return false; // only one real root
+ }
+
+ if( ( h == 0.0 ) && ( q == 0.0 ) ) // all the same root
+ {
+ *t = - e / 3;
+ *u = - e / 3;
+ *v = - e / 3;
+
+ return true;
+ }
+
+ d = sqrtf( q * q / 4.0f - h );
+ if( d < 0 )
+ rc = -powf( -d, 1.0f / 3.0f );
+ else
+ rc = powf( d, 1.0f / 3.0f );
+
+ theta = XMScalarACos( -q / ( 2.0f * d ) );
+ costh3 = XMScalarCos( theta / 3.0f );
+ sinth3 = sqrtf( 3.0f ) * XMScalarSin( theta / 3.0f );
+ *t = 2.0f * rc * costh3 - e / 3.0f;
+ *u = -rc * ( costh3 + sinth3 ) - e / 3.0f;
+ *v = -rc * ( costh3 - sinth3 ) - e / 3.0f;
+
+ return true;
+}
+
+//-----------------------------------------------------------------------------
+inline XMVECTOR CalculateEigenVector( _In_ float m11, _In_ float m12, _In_ float m13,
+ _In_ float m22, _In_ float m23, _In_ float m33, _In_ float e )
+{
+ float fTmp[3];
+ fTmp[0] = ( float )( m12 * m23 - m13 * ( m22 - e ) );
+ fTmp[1] = ( float )( m13 * m12 - m23 * ( m11 - e ) );
+ fTmp[2] = ( float )( ( m11 - e ) * ( m22 - e ) - m12 * m12 );
+
+ XMVECTOR vTmp = XMLoadFloat3( (XMFLOAT3*)fTmp );
+
+ if( XMVector3Equal( vTmp, XMVectorZero() ) ) // planar or linear
+ {
+ float f1, f2, f3;
+
+ // we only have one equation - find a valid one
+ if( ( m11 - e != 0.0 ) || ( m12 != 0.0 ) || ( m13 != 0.0 ) )
+ {
+ f1 = m11 - e; f2 = m12; f3 = m13;
+ }
+ else if( ( m12 != 0.0 ) || ( m22 - e != 0.0 ) || ( m23 != 0.0 ) )
+ {
+ f1 = m12; f2 = m22 - e; f3 = m23;
+ }
+ else if( ( m13 != 0.0 ) || ( m23 != 0.0 ) || ( m33 - e != 0.0 ) )
+ {
+ f1 = m13; f2 = m23; f3 = m33 - e;
+ }
+ else
+ {
+ // error, we'll just make something up - we have NO context
+ f1 = 1.0; f2 = 0.0; f3 = 0.0;
+ }
+
+ if( f1 == 0.0 )
+ vTmp = XMVectorSetX( vTmp, 0.0f );
+ else
+ vTmp = XMVectorSetX( vTmp, 1.0f );
+
+ if( f2 == 0.0 )
+ vTmp = XMVectorSetY( vTmp, 0.0f );
+ else
+ vTmp = XMVectorSetY( vTmp, 1.0f );
+
+ if( f3 == 0.0 )
+ {
+ vTmp = XMVectorSetZ( vTmp, 0.0f );
+ // recalculate y to make equation work
+ if( m12 != 0.0 )
+ vTmp = XMVectorSetY( vTmp, ( float )( -f1 / f2 ) );
+ }
+ else
+ {
+ vTmp = XMVectorSetZ( vTmp, ( float )( ( f2 - f1 ) / f3 ) );
+ }
+ }
+
+ if( XMVectorGetX( XMVector3LengthSq( vTmp ) ) > 1e-5f )
+ {
+ return XMVector3Normalize( vTmp );
+ }
+ else
+ {
+ // Multiply by a value large enough to make the vector non-zero.
+ vTmp *= 1e5f;
+ return XMVector3Normalize( vTmp );
+ }
+}
+
+//-----------------------------------------------------------------------------
+inline bool CalculateEigenVectors( _In_ float m11, _In_ float m12, _In_ float m13,
+ _In_ float m22, _In_ float m23, _In_ float m33,
+ _In_ float e1, _In_ float e2, _In_ float e3,
+ _Out_ XMVECTOR* pV1, _Out_ XMVECTOR* pV2, _Out_ XMVECTOR* pV3 )
+{
+ *pV1 = DirectX::Internal::CalculateEigenVector( m11, m12, m13, m22, m23, m33, e1 );
+ *pV2 = DirectX::Internal::CalculateEigenVector( m11, m12, m13, m22, m23, m33, e2 );
+ *pV3 = DirectX::Internal::CalculateEigenVector( m11, m12, m13, m22, m23, m33, e3 );
+
+ bool v1z = false;
+ bool v2z = false;
+ bool v3z = false;
+
+ XMVECTOR Zero = XMVectorZero();
+
+ if ( XMVector3Equal( *pV1, Zero ) )
+ v1z = true;
+
+ if ( XMVector3Equal( *pV2, Zero ) )
+ v2z = true;
+
+ if ( XMVector3Equal( *pV3, Zero ))
+ v3z = true;
+
+ bool e12 = ( fabsf( XMVectorGetX( XMVector3Dot( *pV1, *pV2 ) ) ) > 0.1f ); // check for non-orthogonal vectors
+ bool e13 = ( fabsf( XMVectorGetX( XMVector3Dot( *pV1, *pV3 ) ) ) > 0.1f );
+ bool e23 = ( fabsf( XMVectorGetX( XMVector3Dot( *pV2, *pV3 ) ) ) > 0.1f );
+
+ if( ( v1z && v2z && v3z ) || ( e12 && e13 && e23 ) ||
+ ( e12 && v3z ) || ( e13 && v2z ) || ( e23 && v1z ) ) // all eigenvectors are 0- any basis set
+ {
+ *pV1 = g_XMIdentityR0.v;
+ *pV2 = g_XMIdentityR1.v;
+ *pV3 = g_XMIdentityR2.v;
+ return true;
+ }
+
+ if( v1z && v2z )
+ {
+ XMVECTOR vTmp = XMVector3Cross( g_XMIdentityR1, *pV3 );
+ if( XMVectorGetX( XMVector3LengthSq( vTmp ) ) < 1e-5f )
+ {
+ vTmp = XMVector3Cross( g_XMIdentityR0, *pV3 );
+ }
+ *pV1 = XMVector3Normalize( vTmp );
+ *pV2 = XMVector3Cross( *pV3, *pV1 );
+ return true;
+ }
+
+ if( v3z && v1z )
+ {
+ XMVECTOR vTmp = XMVector3Cross( g_XMIdentityR1, *pV2 );
+ if( XMVectorGetX( XMVector3LengthSq( vTmp ) ) < 1e-5f )
+ {
+ vTmp = XMVector3Cross( g_XMIdentityR0, *pV2 );
+ }
+ *pV3 = XMVector3Normalize( vTmp );
+ *pV1 = XMVector3Cross( *pV2, *pV3 );
+ return true;
+ }
+
+ if( v2z && v3z )
+ {
+ XMVECTOR vTmp = XMVector3Cross( g_XMIdentityR1, *pV1 );
+ if( XMVectorGetX( XMVector3LengthSq( vTmp ) ) < 1e-5f )
+ {
+ vTmp = XMVector3Cross( g_XMIdentityR0, *pV1 );
+ }
+ *pV2 = XMVector3Normalize( vTmp );
+ *pV3 = XMVector3Cross( *pV1, *pV2 );
+ return true;
+ }
+
+ if( ( v1z ) || e12 )
+ {
+ *pV1 = XMVector3Cross( *pV2, *pV3 );
+ return true;
+ }
+
+ if( ( v2z ) || e23 )
+ {
+ *pV2 = XMVector3Cross( *pV3, *pV1 );
+ return true;
+ }
+
+ if( ( v3z ) || e13 )
+ {
+ *pV3 = XMVector3Cross( *pV1, *pV2 );
+ return true;
+ }
+
+ return true;
+}
+
+//-----------------------------------------------------------------------------
+inline bool CalculateEigenVectorsFromCovarianceMatrix( _In_ float Cxx, _In_ float Cyy, _In_ float Czz,
+ _In_ float Cxy, _In_ float Cxz, _In_ float Cyz,
+ _Out_ XMVECTOR* pV1, _Out_ XMVECTOR* pV2, _Out_ XMVECTOR* pV3 )
+{
+ // Calculate the eigenvalues by solving a cubic equation.
+ float e = -( Cxx + Cyy + Czz );
+ float f = Cxx * Cyy + Cyy * Czz + Czz * Cxx - Cxy * Cxy - Cxz * Cxz - Cyz * Cyz;
+ float g = Cxy * Cxy * Czz + Cxz * Cxz * Cyy + Cyz * Cyz * Cxx - Cxy * Cyz * Cxz * 2.0f - Cxx * Cyy * Czz;
+
+ float ev1, ev2, ev3;
+ if( !DirectX::Internal::SolveCubic( e, f, g, &ev1, &ev2, &ev3 ) )
+ {
+ // set them to arbitrary orthonormal basis set
+ *pV1 = g_XMIdentityR0.v;
+ *pV2 = g_XMIdentityR1.v;
+ *pV3 = g_XMIdentityR2.v;
+ return false;
+ }
+
+ return DirectX::Internal::CalculateEigenVectors( Cxx, Cxy, Cxz, Cyy, Cyz, Czz, ev1, ev2, ev3, pV1, pV2, pV3 );
+}
+
+//-----------------------------------------------------------------------------
+inline void FastIntersectTrianglePlane( FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2, GXMVECTOR Plane,
+ XMVECTOR& Outside, XMVECTOR& Inside )
+{
+ // Plane0
+ XMVECTOR Dist0 = XMVector4Dot( V0, Plane );
+ XMVECTOR Dist1 = XMVector4Dot( V1, Plane );
+ XMVECTOR Dist2 = XMVector4Dot( V2, Plane );
+
+ XMVECTOR MinDist = XMVectorMin( Dist0, Dist1 );
+ MinDist = XMVectorMin( MinDist, Dist2 );
+
+ XMVECTOR MaxDist = XMVectorMax( Dist0, Dist1 );
+ MaxDist = XMVectorMax( MaxDist, Dist2 );
+
+ XMVECTOR Zero = XMVectorZero();
+
+ // Outside the plane?
+ Outside = XMVectorGreater( MinDist, Zero );
+
+ // Fully inside the plane?
+ Inside = XMVectorLess( MaxDist, Zero );
+}
+
+//-----------------------------------------------------------------------------
+inline void FastIntersectSpherePlane( _In_ FXMVECTOR Center, _In_ FXMVECTOR Radius, _In_ FXMVECTOR Plane,
+ _Out_ XMVECTOR& Outside, _Out_ XMVECTOR& Inside )
+{
+ XMVECTOR Dist = XMVector4Dot( Center, Plane );
+
+ // Outside the plane?
+ Outside = XMVectorGreater( Dist, Radius );
+
+ // Fully inside the plane?
+ Inside = XMVectorLess( Dist, -Radius );
+}
+
+//-----------------------------------------------------------------------------
+inline void FastIntersectAxisAlignedBoxPlane( _In_ FXMVECTOR Center, _In_ FXMVECTOR Extents, _In_ FXMVECTOR Plane,
+ _Out_ XMVECTOR& Outside, _Out_ XMVECTOR& Inside )
+{
+ // Compute the distance to the center of the box.
+ XMVECTOR Dist = XMVector4Dot( Center, Plane );
+
+ // Project the axes of the box onto the normal of the plane. Half the
+ // length of the projection (sometime called the "radius") is equal to
+ // h(u) * abs(n dot b(u))) + h(v) * abs(n dot b(v)) + h(w) * abs(n dot b(w))
+ // where h(i) are extents of the box, n is the plane normal, and b(i) are the
+ // axes of the box. In this case b(i) = [(1,0,0), (0,1,0), (0,0,1)].
+ XMVECTOR Radius = XMVector3Dot( Extents, XMVectorAbs( Plane ) );
+
+ // Outside the plane?
+ Outside = XMVectorGreater( Dist, Radius );
+
+ // Fully inside the plane?
+ Inside = XMVectorLess( Dist, -Radius );
+}
+
+//-----------------------------------------------------------------------------
+inline void FastIntersectOrientedBoxPlane( _In_ FXMVECTOR Center, _In_ FXMVECTOR Extents, _In_ FXMVECTOR Axis0, _In_ GXMVECTOR Axis1,
+ _In_ CXMVECTOR Axis2, _In_ CXMVECTOR Plane, _Out_ XMVECTOR& Outside, _Out_ XMVECTOR& Inside )
+{
+ // Compute the distance to the center of the box.
+ XMVECTOR Dist = XMVector4Dot( Center, Plane );
+
+ // Project the axes of the box onto the normal of the plane. Half the
+ // length of the projection (sometime called the "radius") is equal to
+ // h(u) * abs(n dot b(u))) + h(v) * abs(n dot b(v)) + h(w) * abs(n dot b(w))
+ // where h(i) are extents of the box, n is the plane normal, and b(i) are the
+ // axes of the box.
+ XMVECTOR Radius = XMVector3Dot( Plane, Axis0 );
+ Radius = XMVectorInsert<0, 0, 1, 0, 0>( Radius, XMVector3Dot( Plane, Axis1 ) );
+ Radius = XMVectorInsert<0, 0, 0, 1, 0>( Radius, XMVector3Dot( Plane, Axis2 ) );
+ Radius = XMVector3Dot( Extents, XMVectorAbs( Radius ) );
+
+ // Outside the plane?
+ Outside = XMVectorGreater( Dist, Radius );
+
+ // Fully inside the plane?
+ Inside = XMVectorLess( Dist, -Radius );
+}
+
+//-----------------------------------------------------------------------------
+inline void FastIntersectFrustumPlane( _In_ FXMVECTOR Point0, _In_ FXMVECTOR Point1, _In_ FXMVECTOR Point2, _In_ GXMVECTOR Point3,
+ _In_ CXMVECTOR Point4, _In_ CXMVECTOR Point5, _In_ CXMVECTOR Point6, _In_ CXMVECTOR Point7,
+ _In_ CXMVECTOR Plane, _Out_ XMVECTOR& Outside, _Out_ XMVECTOR& Inside )
+{
+ // Find the min/max projection of the frustum onto the plane normal.
+ XMVECTOR Min, Max, Dist;
+
+ Min = Max = XMVector3Dot( Plane, Point0 );
+
+ Dist = XMVector3Dot( Plane, Point1 );
+ Min = XMVectorMin( Min, Dist );
+ Max = XMVectorMax( Max, Dist );
+
+ Dist = XMVector3Dot( Plane, Point2 );
+ Min = XMVectorMin( Min, Dist );
+ Max = XMVectorMax( Max, Dist );
+
+ Dist = XMVector3Dot( Plane, Point3 );
+ Min = XMVectorMin( Min, Dist );
+ Max = XMVectorMax( Max, Dist );
+
+ Dist = XMVector3Dot( Plane, Point4 );
+ Min = XMVectorMin( Min, Dist );
+ Max = XMVectorMax( Max, Dist );
+
+ Dist = XMVector3Dot( Plane, Point5 );
+ Min = XMVectorMin( Min, Dist );
+ Max = XMVectorMax( Max, Dist );
+
+ Dist = XMVector3Dot( Plane, Point6 );
+ Min = XMVectorMin( Min, Dist );
+ Max = XMVectorMax( Max, Dist );
+
+ Dist = XMVector3Dot( Plane, Point7 );
+ Min = XMVectorMin( Min, Dist );
+ Max = XMVectorMax( Max, Dist );
+
+ XMVECTOR PlaneDist = -XMVectorSplatW( Plane );
+
+ // Outside the plane?
+ Outside = XMVectorGreater( Min, PlaneDist );
+
+ // Fully inside the plane?
+ Inside = XMVectorLess( Max, PlaneDist );
+}
+
+}; // namespace Internal
+
+
+/****************************************************************************
+ *
+ * BoundingSphere
+ *
+ ****************************************************************************/
+
+//-----------------------------------------------------------------------------
+// Transform a sphere by an angle preserving transform.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingSphere::Transform( BoundingSphere& Out, CXMMATRIX M ) const
+{
+ // Load the center of the sphere.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+
+ // Transform the center of the sphere.
+ XMVECTOR C = XMVector3Transform( vCenter, M );
+
+ XMVECTOR dX = XMVector3Dot( M.r[0], M.r[0] );
+ XMVECTOR dY = XMVector3Dot( M.r[1], M.r[1] );
+ XMVECTOR dZ = XMVector3Dot( M.r[2], M.r[2] );
+
+ XMVECTOR d = XMVectorMax( dX, XMVectorMax( dY, dZ ) );
+
+ // Store the center sphere.
+ XMStoreFloat3( &Out.Center, C );
+
+ // Scale the radius of the pshere.
+ float Scale = sqrtf( XMVectorGetX(d) );
+ Out.Radius = Radius * Scale;
+}
+
+_Use_decl_annotations_
+inline void BoundingSphere::Transform( BoundingSphere& Out, float Scale, FXMVECTOR Rotation, FXMVECTOR Translation ) const
+{
+ // Load the center of the sphere.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+
+ // Transform the center of the sphere.
+ vCenter = XMVector3Rotate( vCenter * XMVectorReplicate( Scale ), Rotation ) + Translation;
+
+ // Store the center sphere.
+ XMStoreFloat3( &Out.Center, vCenter );
+
+ // Scale the radius of the pshere.
+ Out.Radius = Radius * Scale;
+}
+
+
+//-----------------------------------------------------------------------------
+// Point in sphere test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingSphere::Contains( FXMVECTOR Point ) const
+{
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vRadius = XMVectorReplicatePtr( &Radius );
+
+ XMVECTOR DistanceSquared = XMVector3LengthSq( Point - vCenter );
+ XMVECTOR RadiusSquared = XMVectorMultiply( vRadius, vRadius );
+
+ return XMVector3LessOrEqual( DistanceSquared, RadiusSquared ) ? CONTAINS : DISJOINT;
+}
+
+
+//-----------------------------------------------------------------------------
+// Triangle in sphere test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingSphere::Contains( FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2 ) const
+{
+ if ( !Intersects(V0,V1,V2) )
+ return DISJOINT;
+
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vRadius = XMVectorReplicatePtr( &Radius );
+ XMVECTOR RadiusSquared = XMVectorMultiply( vRadius, vRadius );
+
+ XMVECTOR DistanceSquared = XMVector3LengthSq( V0 - vCenter );
+ XMVECTOR Inside = XMVectorLessOrEqual(DistanceSquared, RadiusSquared);
+
+ DistanceSquared = XMVector3LengthSq( V1 - vCenter );
+ Inside = XMVectorAndInt( Inside, XMVectorLessOrEqual(DistanceSquared, RadiusSquared) );
+
+ DistanceSquared = XMVector3LengthSq( V2 - vCenter );
+ Inside = XMVectorAndInt( Inside, XMVectorLessOrEqual(DistanceSquared, RadiusSquared) );
+
+ return ( XMVector3EqualInt( Inside, XMVectorTrueInt() ) ) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Sphere in sphere test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingSphere::Contains( const BoundingSphere& sh ) const
+{
+ XMVECTOR Center1 = XMLoadFloat3( &Center );
+ float r1 = Radius;
+
+ XMVECTOR Center2 = XMLoadFloat3( &sh.Center );
+ float r2 = sh.Radius;
+
+ XMVECTOR V = XMVectorSubtract( Center2, Center1 );
+
+ XMVECTOR Dist = XMVector3Length( V );
+
+ float d = XMVectorGetX( Dist );
+
+ return (r1 + r2 >= d) ? ((r1 - r2 >= d) ? CONTAINS : INTERSECTS) : DISJOINT;
+}
+
+
+//-----------------------------------------------------------------------------
+// Axis-aligned box in sphere test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingSphere::Contains( const BoundingBox& box ) const
+{
+ if ( !box.Intersects(*this) )
+ return DISJOINT;
+
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vRadius = XMVectorReplicatePtr( &Radius );
+ XMVECTOR RadiusSq = vRadius * vRadius;
+
+ XMVECTOR boxCenter = XMLoadFloat3( &box.Center );
+ XMVECTOR boxExtents = XMLoadFloat3( &box.Extents );
+
+ XMVECTOR InsideAll = XMVectorTrueInt();
+
+ XMVECTOR offset = boxCenter - vCenter;
+
+ for( size_t i = 0; i < BoundingBox::CORNER_COUNT; ++i )
+ {
+ XMVECTOR C = XMVectorMultiplyAdd( boxExtents, g_BoxOffset[i], offset );
+ XMVECTOR d = XMVector3LengthSq( C );
+ InsideAll = XMVectorAndInt( InsideAll, XMVectorLessOrEqual( d, RadiusSq ) );
+ }
+
+ return ( XMVector3EqualInt( InsideAll, XMVectorTrueInt() ) ) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Oriented box in sphere test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingSphere::Contains( const BoundingOrientedBox& box ) const
+{
+ if ( !box.Intersects(*this) )
+ return DISJOINT;
+
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vRadius = XMVectorReplicatePtr( &Radius );
+ XMVECTOR RadiusSq = vRadius * vRadius;
+
+ XMVECTOR boxCenter = XMLoadFloat3( &box.Center );
+ XMVECTOR boxExtents = XMLoadFloat3( &box.Extents );
+ XMVECTOR boxOrientation = XMLoadFloat4( &box.Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( boxOrientation ) );
+
+ XMVECTOR InsideAll = XMVectorTrueInt();
+
+ for( size_t i = 0; i < BoundingOrientedBox::CORNER_COUNT; ++i )
+ {
+ XMVECTOR C = XMVector3Rotate( boxExtents * g_BoxOffset[i], boxOrientation ) + boxCenter;
+ XMVECTOR d = XMVector3LengthSq( XMVectorSubtract( vCenter, C ) );
+ InsideAll = XMVectorAndInt( InsideAll, XMVectorLessOrEqual( d, RadiusSq ) );
+ }
+
+ return ( XMVector3EqualInt( InsideAll, XMVectorTrueInt() ) ) ? CONTAINS : INTERSECTS;
+
+}
+
+
+//-----------------------------------------------------------------------------
+// Frustum in sphere test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingSphere::Contains( const BoundingFrustum& fr ) const
+{
+ if ( !fr.Intersects(*this) )
+ return DISJOINT;
+
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vRadius = XMVectorReplicatePtr( &Radius );
+ XMVECTOR RadiusSq = vRadius * vRadius;
+
+ XMVECTOR vOrigin = XMLoadFloat3( &fr.Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &fr.Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( vOrientation ) );
+
+ // Build the corners of the frustum.
+ XMVECTOR vRightTop = XMVectorSet( fr.RightSlope, fr.TopSlope, 1.0f, 0.0f );
+ XMVECTOR vRightBottom = XMVectorSet( fr.RightSlope, fr.BottomSlope, 1.0f, 0.0f );
+ XMVECTOR vLeftTop = XMVectorSet( fr.LeftSlope, fr.TopSlope, 1.0f, 0.0f );
+ XMVECTOR vLeftBottom = XMVectorSet( fr.LeftSlope, fr.BottomSlope, 1.0f, 0.0f );
+ XMVECTOR vNear = XMVectorReplicatePtr( &fr.Near );
+ XMVECTOR vFar = XMVectorReplicatePtr( &fr.Far );
+
+ XMVECTOR Corners[BoundingFrustum::CORNER_COUNT];
+ Corners[0] = vRightTop * vNear;
+ Corners[1] = vRightBottom * vNear;
+ Corners[2] = vLeftTop * vNear;
+ Corners[3] = vLeftBottom * vNear;
+ Corners[4] = vRightTop * vFar;
+ Corners[5] = vRightBottom * vFar;
+ Corners[6] = vLeftTop * vFar;
+ Corners[7] = vLeftBottom * vFar;
+
+ XMVECTOR InsideAll = XMVectorTrueInt();
+ for( size_t i = 0; i < BoundingFrustum::CORNER_COUNT; ++i )
+ {
+ XMVECTOR C = XMVector3Rotate( Corners[i], vOrientation ) + vOrigin;
+ XMVECTOR d = XMVector3LengthSq( XMVectorSubtract( vCenter, C ) );
+ InsideAll = XMVectorAndInt( InsideAll, XMVectorLessOrEqual( d, RadiusSq ) );
+ }
+
+ return ( XMVector3EqualInt( InsideAll, XMVectorTrueInt() ) ) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Sphere vs. sphere test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingSphere::Intersects( const BoundingSphere& sh ) const
+{
+ // Load A.
+ XMVECTOR vCenterA = XMLoadFloat3( &Center );
+ XMVECTOR vRadiusA = XMVectorReplicatePtr( &Radius );
+
+ // Load B.
+ XMVECTOR vCenterB = XMLoadFloat3( &sh.Center );
+ XMVECTOR vRadiusB = XMVectorReplicatePtr( &sh.Radius );
+
+ // Distance squared between centers.
+ XMVECTOR Delta = vCenterB - vCenterA;
+ XMVECTOR DistanceSquared = XMVector3LengthSq( Delta );
+
+ // Sum of the radii squared.
+ XMVECTOR RadiusSquared = XMVectorAdd( vRadiusA, vRadiusB );
+ RadiusSquared = XMVectorMultiply( RadiusSquared, RadiusSquared );
+
+ return XMVector3LessOrEqual( DistanceSquared, RadiusSquared );
+}
+
+
+//-----------------------------------------------------------------------------
+// Box vs. sphere test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingSphere::Intersects( const BoundingBox& box ) const
+{
+ return box.Intersects( *this );
+}
+
+_Use_decl_annotations_
+inline bool BoundingSphere::Intersects( const BoundingOrientedBox& box ) const
+{
+ return box.Intersects( *this );
+}
+
+
+//-----------------------------------------------------------------------------
+// Frustum vs. sphere test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingSphere::Intersects( const BoundingFrustum& fr ) const
+{
+ return fr.Intersects( *this );
+}
+
+
+//-----------------------------------------------------------------------------
+// Triangle vs sphere test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingSphere::Intersects( FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2 ) const
+{
+ // Load the sphere.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vRadius = XMVectorReplicatePtr( &Radius );
+
+ // Compute the plane of the triangle (has to be normalized).
+ XMVECTOR N = XMVector3Normalize( XMVector3Cross( V1 - V0, V2 - V0 ) );
+
+ // Assert that the triangle is not degenerate.
+ assert( !XMVector3Equal( N, XMVectorZero() ) );
+
+ // Find the nearest feature on the triangle to the sphere.
+ XMVECTOR Dist = XMVector3Dot( vCenter - V0, N );
+
+ // If the center of the sphere is farther from the plane of the triangle than
+ // the radius of the sphere, then there cannot be an intersection.
+ XMVECTOR NoIntersection = XMVectorLess( Dist, -vRadius );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( Dist, vRadius ) );
+
+ // Project the center of the sphere onto the plane of the triangle.
+ XMVECTOR Point = vCenter - ( N * Dist );
+
+ // Is it inside all the edges? If so we intersect because the distance
+ // to the plane is less than the radius.
+ XMVECTOR Intersection = DirectX::Internal::PointOnPlaneInsideTriangle( Point, V0, V1, V2 );
+
+ // Find the nearest point on each edge.
+ XMVECTOR RadiusSq = vRadius * vRadius;
+
+ // Edge 0,1
+ Point = DirectX::Internal::PointOnLineSegmentNearestPoint( V0, V1, vCenter );
+
+ // If the distance to the center of the sphere to the point is less than
+ // the radius of the sphere then it must intersect.
+ Intersection = XMVectorOrInt( Intersection, XMVectorLessOrEqual( XMVector3LengthSq( vCenter - Point ), RadiusSq ) );
+
+ // Edge 1,2
+ Point = DirectX::Internal::PointOnLineSegmentNearestPoint( V1, V2, vCenter );
+
+ // If the distance to the center of the sphere to the point is less than
+ // the radius of the sphere then it must intersect.
+ Intersection = XMVectorOrInt( Intersection, XMVectorLessOrEqual( XMVector3LengthSq( vCenter - Point ), RadiusSq ) );
+
+ // Edge 2,0
+ Point = DirectX::Internal::PointOnLineSegmentNearestPoint( V2, V0, vCenter );
+
+ // If the distance to the center of the sphere to the point is less than
+ // the radius of the sphere then it must intersect.
+ Intersection = XMVectorOrInt( Intersection, XMVectorLessOrEqual( XMVector3LengthSq( vCenter - Point ), RadiusSq ) );
+
+ return XMVector4EqualInt( XMVectorAndCInt( Intersection, NoIntersection ), XMVectorTrueInt() );
+}
+
+
+//-----------------------------------------------------------------------------
+// Sphere-plane intersection
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PlaneIntersectionType BoundingSphere::Intersects( FXMVECTOR Plane ) const
+{
+ assert( DirectX::Internal::XMPlaneIsUnit( Plane ) );
+
+ // Load the sphere.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vRadius = XMVectorReplicatePtr( &Radius );
+
+ // Set w of the center to one so we can dot4 with a plane.
+ vCenter = XMVectorInsert<0, 0, 0, 0, 1>( vCenter, XMVectorSplatOne() );
+
+ XMVECTOR Outside, Inside;
+ DirectX::Internal::FastIntersectSpherePlane( vCenter, vRadius, Plane, Outside, Inside );
+
+ // If the sphere is outside any plane it is outside.
+ if ( XMVector4EqualInt( Outside, XMVectorTrueInt() ) )
+ return FRONT;
+
+ // If the sphere is inside all planes it is inside.
+ if ( XMVector4EqualInt( Inside, XMVectorTrueInt() ) )
+ return BACK;
+
+ // The sphere is not inside all planes or outside a plane it intersects.
+ return INTERSECTING;
+}
+
+
+//-----------------------------------------------------------------------------
+// Compute the intersection of a ray (Origin, Direction) with a sphere.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingSphere::Intersects( FXMVECTOR Origin, FXMVECTOR Direction, float& Dist ) const
+{
+ assert( DirectX::Internal::XMVector3IsUnit( Direction ) );
+
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vRadius = XMVectorReplicatePtr( &Radius );
+
+ // l is the vector from the ray origin to the center of the sphere.
+ XMVECTOR l = vCenter - Origin;
+
+ // s is the projection of the l onto the ray direction.
+ XMVECTOR s = XMVector3Dot( l, Direction );
+
+ XMVECTOR l2 = XMVector3Dot( l, l );
+
+ XMVECTOR r2 = vRadius * vRadius;
+
+ // m2 is squared distance from the center of the sphere to the projection.
+ XMVECTOR m2 = l2 - s * s;
+
+ XMVECTOR NoIntersection;
+
+ // If the ray origin is outside the sphere and the center of the sphere is
+ // behind the ray origin there is no intersection.
+ NoIntersection = XMVectorAndInt( XMVectorLess( s, XMVectorZero() ), XMVectorGreater( l2, r2 ) );
+
+ // If the squared distance from the center of the sphere to the projection
+ // is greater than the radius squared the ray will miss the sphere.
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( m2, r2 ) );
+
+ // The ray hits the sphere, compute the nearest intersection point.
+ XMVECTOR q = XMVectorSqrt( r2 - m2 );
+ XMVECTOR t1 = s - q;
+ XMVECTOR t2 = s + q;
+
+ XMVECTOR OriginInside = XMVectorLessOrEqual( l2, r2 );
+ XMVECTOR t = XMVectorSelect( t1, t2, OriginInside );
+
+ if( XMVector4NotEqualInt( NoIntersection, XMVectorTrueInt() ) )
+ {
+ // Store the x-component to *pDist.
+ XMStoreFloat( &Dist, t );
+ return true;
+ }
+
+ Dist = 0.f;
+ return false;
+}
+
+
+//-----------------------------------------------------------------------------
+// Test a sphere vs 6 planes (typically forming a frustum).
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingSphere::ContainedBy( FXMVECTOR Plane0, FXMVECTOR Plane1, FXMVECTOR Plane2,
+ GXMVECTOR Plane3, CXMVECTOR Plane4, CXMVECTOR Plane5 ) const
+{
+ // Load the sphere.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vRadius = XMVectorReplicatePtr( &Radius );
+
+ // Set w of the center to one so we can dot4 with a plane.
+ vCenter = XMVectorInsert<0, 0, 0, 0, 1>( vCenter, XMVectorSplatOne() );
+
+ XMVECTOR Outside, Inside;
+
+ // Test against each plane.
+ DirectX::Internal::FastIntersectSpherePlane( vCenter, vRadius, Plane0, Outside, Inside );
+
+ XMVECTOR AnyOutside = Outside;
+ XMVECTOR AllInside = Inside;
+
+ DirectX::Internal::FastIntersectSpherePlane( vCenter, vRadius, Plane1, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectSpherePlane( vCenter, vRadius, Plane2, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectSpherePlane( vCenter, vRadius, Plane3, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectSpherePlane( vCenter, vRadius, Plane4, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectSpherePlane( vCenter, vRadius, Plane5, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ // If the sphere is outside any plane it is outside.
+ if ( XMVector4EqualInt( AnyOutside, XMVectorTrueInt() ) )
+ return DISJOINT;
+
+ // If the sphere is inside all planes it is inside.
+ if ( XMVector4EqualInt( AllInside, XMVectorTrueInt() ) )
+ return CONTAINS;
+
+ // The sphere is not inside all planes or outside a plane, it may intersect.
+ return INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Creates a bounding sphere that contains two other bounding spheres
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingSphere::CreateMerged( BoundingSphere& Out, const BoundingSphere& S1, const BoundingSphere& S2 )
+{
+ XMVECTOR Center1 = XMLoadFloat3( &S1.Center );
+ float r1 = S1.Radius;
+
+ XMVECTOR Center2 = XMLoadFloat3( &S2.Center );
+ float r2 = S2.Radius;
+
+ XMVECTOR V = XMVectorSubtract( Center2, Center1 );
+
+ XMVECTOR Dist = XMVector3Length( V );
+
+ float d = XMVectorGetX(Dist);
+
+ if ( r1 + r2 >= d )
+ {
+ if ( r1 - r2 >= d )
+ {
+ Out = S1;
+ return;
+ }
+ else if ( r2 - r1 >= d )
+ {
+ Out = S2;
+ return;
+ }
+ }
+
+ XMVECTOR N = XMVectorDivide( V, Dist );
+
+ float t1 = XMMin( -r1, d-r2 );
+ float t2 = XMMax( r1, d+r2 );
+ float t_5 = (t2 - t1) * 0.5f;
+
+ XMVECTOR NCenter = XMVectorAdd( Center1, XMVectorMultiply( N, XMVectorReplicate( t_5 + t1 ) ) );
+
+ XMStoreFloat3( &Out.Center, NCenter );
+ Out.Radius = t_5;
+}
+
+
+//-----------------------------------------------------------------------------
+// Create sphere enscribing bounding box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingSphere::CreateFromBoundingBox( BoundingSphere& Out, const BoundingBox& box )
+{
+ Out.Center = box.Center;
+ XMVECTOR vExtents = XMLoadFloat3( &box.Extents );
+ Out.Radius = XMVectorGetX( XMVector3Length( vExtents ) );
+}
+
+_Use_decl_annotations_
+inline void BoundingSphere::CreateFromBoundingBox( BoundingSphere& Out, const BoundingOrientedBox& box )
+{
+ // Bounding box orientation is irrelevant because a sphere is rotationally invariant
+ Out.Center = box.Center;
+ XMVECTOR vExtents = XMLoadFloat3( &box.Extents );
+ Out.Radius = XMVectorGetX( XMVector3Length( vExtents ) );
+}
+
+
+//-----------------------------------------------------------------------------
+// Find the approximate smallest enclosing bounding sphere for a set of
+// points. Exact computation of the smallest enclosing bounding sphere is
+// possible but is slower and requires a more complex algorithm.
+// The algorithm is based on Jack Ritter, "An Efficient Bounding Sphere",
+// Graphics Gems.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingSphere::CreateFromPoints( BoundingSphere& Out, size_t Count, const XMFLOAT3* pPoints, size_t Stride )
+{
+ assert( Count > 0 );
+ assert( pPoints );
+
+ // Find the points with minimum and maximum x, y, and z
+ XMVECTOR MinX, MaxX, MinY, MaxY, MinZ, MaxZ;
+
+ MinX = MaxX = MinY = MaxY = MinZ = MaxZ = XMLoadFloat3( pPoints );
+
+ for( size_t i = 1; i < Count; ++i )
+ {
+ XMVECTOR Point = XMLoadFloat3( reinterpret_cast<const XMFLOAT3*>( reinterpret_cast<const uint8_t*>(pPoints) + i * Stride ) );
+
+ float px = XMVectorGetX( Point );
+ float py = XMVectorGetY( Point );
+ float pz = XMVectorGetZ( Point );
+
+ if( px < XMVectorGetX( MinX ) )
+ MinX = Point;
+
+ if( px > XMVectorGetX( MaxX ) )
+ MaxX = Point;
+
+ if( py < XMVectorGetY( MinY ) )
+ MinY = Point;
+
+ if( py > XMVectorGetY( MaxY ) )
+ MaxY = Point;
+
+ if( pz < XMVectorGetZ( MinZ ) )
+ MinZ = Point;
+
+ if( pz > XMVectorGetZ( MaxZ ) )
+ MaxZ = Point;
+ }
+
+ // Use the min/max pair that are farthest apart to form the initial sphere.
+ XMVECTOR DeltaX = MaxX - MinX;
+ XMVECTOR DistX = XMVector3Length( DeltaX );
+
+ XMVECTOR DeltaY = MaxY - MinY;
+ XMVECTOR DistY = XMVector3Length( DeltaY );
+
+ XMVECTOR DeltaZ = MaxZ - MinZ;
+ XMVECTOR DistZ = XMVector3Length( DeltaZ );
+
+ XMVECTOR vCenter;
+ XMVECTOR vRadius;
+
+ if( XMVector3Greater( DistX, DistY ) )
+ {
+ if( XMVector3Greater( DistX, DistZ ) )
+ {
+ // Use min/max x.
+ vCenter = XMVectorLerp(MaxX,MinX,0.5f);
+ vRadius = DistX * 0.5f;
+ }
+ else
+ {
+ // Use min/max z.
+ vCenter = XMVectorLerp(MaxZ,MinZ,0.5f);
+ vRadius = DistZ * 0.5f;
+ }
+ }
+ else // Y >= X
+ {
+ if( XMVector3Greater( DistY, DistZ ) )
+ {
+ // Use min/max y.
+ vCenter = XMVectorLerp(MaxY,MinY,0.5f);
+ vRadius = DistY * 0.5f;
+ }
+ else
+ {
+ // Use min/max z.
+ vCenter = XMVectorLerp(MaxZ,MinZ,0.5f);
+ vRadius = DistZ * 0.5f;
+ }
+ }
+
+ // Add any points not inside the sphere.
+ for( size_t i = 0; i < Count; ++i )
+ {
+ XMVECTOR Point = XMLoadFloat3( reinterpret_cast<const XMFLOAT3*>( reinterpret_cast<const uint8_t*>(pPoints) + i * Stride ) );
+
+ XMVECTOR Delta = Point - vCenter;
+
+ XMVECTOR Dist = XMVector3Length( Delta );
+
+ if( XMVector3Greater( Dist, vRadius ) )
+ {
+ // Adjust sphere to include the new point.
+ vRadius = ( vRadius + Dist ) * 0.5f;
+ vCenter += ( XMVectorReplicate( 1.0f ) - XMVectorDivide(vRadius,Dist) ) * Delta;
+ }
+ }
+
+ XMStoreFloat3( &Out.Center, vCenter );
+ XMStoreFloat( &Out.Radius, vRadius );
+}
+
+
+//-----------------------------------------------------------------------------
+// Create sphere containing frustum
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingSphere::CreateFromFrustum( BoundingSphere& Out, const BoundingFrustum& fr )
+{
+ XMFLOAT3 Corners[BoundingFrustum::CORNER_COUNT];
+ fr.GetCorners( Corners );
+ CreateFromPoints( Out, BoundingFrustum::CORNER_COUNT, Corners, sizeof(XMFLOAT3) );
+}
+
+
+/****************************************************************************
+ *
+ * BoundingBox
+ *
+ ****************************************************************************/
+
+//-----------------------------------------------------------------------------
+// Transform an axis aligned box by an angle preserving transform.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingBox::Transform( BoundingBox& Out, CXMMATRIX M ) const
+{
+ // Load center and extents.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+
+ // Compute and transform the corners and find new min/max bounds.
+ XMVECTOR Corner = XMVectorMultiplyAdd( vExtents, g_BoxOffset[0], vCenter );
+ Corner = XMVector3Transform( Corner, M );
+
+ XMVECTOR Min, Max;
+ Min = Max = Corner;
+
+ for( size_t i = 1; i < CORNER_COUNT; ++i )
+ {
+ Corner = XMVectorMultiplyAdd( vExtents, g_BoxOffset[i], vCenter );
+ Corner = XMVector3Transform( Corner, M );
+
+ Min = XMVectorMin( Min, Corner );
+ Max = XMVectorMax( Max, Corner );
+ }
+
+ // Store center and extents.
+ XMStoreFloat3( &Out.Center, ( Min + Max ) * 0.5f );
+ XMStoreFloat3( &Out.Extents, ( Max - Min ) * 0.5f );
+}
+
+_Use_decl_annotations_
+inline void BoundingBox::Transform( BoundingBox& Out, float Scale, FXMVECTOR Rotation, FXMVECTOR Translation ) const
+{
+ assert( DirectX::Internal::XMQuaternionIsUnit( Rotation ) );
+
+ // Load center and extents.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+
+ XMVECTOR VectorScale = XMVectorReplicate( Scale );
+
+ // Compute and transform the corners and find new min/max bounds.
+ XMVECTOR Corner = XMVectorMultiplyAdd( vExtents, g_BoxOffset[0], vCenter );
+ Corner = XMVector3Rotate( Corner * VectorScale, Rotation ) + Translation;
+
+ XMVECTOR Min, Max;
+ Min = Max = Corner;
+
+ for( size_t i = 1; i < CORNER_COUNT; ++i )
+ {
+ Corner = XMVectorMultiplyAdd( vExtents, g_BoxOffset[i], vCenter );
+ Corner = XMVector3Rotate( Corner * VectorScale, Rotation ) + Translation;
+
+ Min = XMVectorMin( Min, Corner );
+ Max = XMVectorMax( Max, Corner );
+ }
+
+ // Store center and extents.
+ XMStoreFloat3( &Out.Center, ( Min + Max ) * 0.5f );
+ XMStoreFloat3( &Out.Extents, ( Max - Min ) * 0.5f );
+}
+
+
+//-----------------------------------------------------------------------------
+// Get the corner points of the box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingBox::GetCorners( XMFLOAT3* Corners ) const
+{
+ assert( Corners != nullptr );
+
+ // Load the box
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+
+ for( size_t i = 0; i < CORNER_COUNT; ++i )
+ {
+ XMVECTOR C = XMVectorMultiplyAdd( vExtents, g_BoxOffset[i], vCenter );
+ XMStoreFloat3( &Corners[i], C );
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// Point in axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingBox::Contains( FXMVECTOR Point ) const
+{
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+
+ return XMVector3InBounds( Point - vCenter, vExtents ) ? CONTAINS : DISJOINT;
+}
+
+
+//-----------------------------------------------------------------------------
+// Triangle in axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingBox::Contains( FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2 ) const
+{
+ if ( !Intersects(V0,V1,V2) )
+ return DISJOINT;
+
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+
+ XMVECTOR d = XMVector3LengthSq( V0 - vCenter );
+ XMVECTOR Inside = XMVectorLessOrEqual( d, vExtents );
+
+ d = XMVector3LengthSq( V1 - vCenter );
+ Inside = XMVectorAndInt( Inside, XMVectorLessOrEqual( d, vExtents ) );
+
+ d = XMVector3LengthSq( V2 - vCenter );
+ Inside = XMVectorAndInt( Inside, XMVectorLessOrEqual( d, vExtents ) );
+
+ return ( XMVector3EqualInt( Inside, XMVectorTrueInt() ) ) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Sphere in axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingBox::Contains( const BoundingSphere& sh ) const
+{
+ XMVECTOR SphereCenter = XMLoadFloat3( &sh.Center );
+ XMVECTOR SphereRadius = XMVectorReplicatePtr( &sh.Radius );
+
+ XMVECTOR BoxCenter = XMLoadFloat3( &Center );
+ XMVECTOR BoxExtents = XMLoadFloat3( &Extents );
+
+ XMVECTOR BoxMin = BoxCenter - BoxExtents;
+ XMVECTOR BoxMax = BoxCenter + BoxExtents;
+
+ // Find the distance to the nearest point on the box.
+ // for each i in (x, y, z)
+ // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2
+ // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) ^ 2
+
+ XMVECTOR d = XMVectorZero();
+
+ // Compute d for each dimension.
+ XMVECTOR LessThanMin = XMVectorLess( SphereCenter, BoxMin );
+ XMVECTOR GreaterThanMax = XMVectorGreater( SphereCenter, BoxMax );
+
+ XMVECTOR MinDelta = SphereCenter - BoxMin;
+ XMVECTOR MaxDelta = SphereCenter - BoxMax;
+
+ // Choose value for each dimension based on the comparison.
+ d = XMVectorSelect( d, MinDelta, LessThanMin );
+ d = XMVectorSelect( d, MaxDelta, GreaterThanMax );
+
+ // Use a dot-product to square them and sum them together.
+ XMVECTOR d2 = XMVector3Dot( d, d );
+
+ if ( XMVector3Greater( d2, XMVectorMultiply( SphereRadius, SphereRadius ) ) )
+ return DISJOINT;
+
+ XMVECTOR InsideAll = XMVectorLessOrEqual( BoxMin + SphereRadius, SphereCenter );
+ InsideAll = XMVectorAndInt( InsideAll, XMVectorLessOrEqual( SphereCenter, BoxMax - SphereRadius ) );
+ InsideAll = XMVectorAndInt( InsideAll, XMVectorGreater( BoxMax - BoxMin, SphereRadius ) );
+
+ return ( XMVector3EqualInt( InsideAll, XMVectorTrueInt() ) ) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Axis-aligned box in axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingBox::Contains( const BoundingBox& box ) const
+{
+ XMVECTOR CenterA = XMLoadFloat3( &Center );
+ XMVECTOR ExtentsA = XMLoadFloat3( &Extents );
+
+ XMVECTOR CenterB = XMLoadFloat3( &box.Center );
+ XMVECTOR ExtentsB = XMLoadFloat3( &box.Extents );
+
+ XMVECTOR MinA = CenterA - ExtentsA;
+ XMVECTOR MaxA = CenterA + ExtentsA;
+
+ XMVECTOR MinB = CenterB - ExtentsB;
+ XMVECTOR MaxB = CenterB + ExtentsB;
+
+ // for each i in (x, y, z) if a_min(i) > b_max(i) or b_min(i) > a_max(i) then return false
+ XMVECTOR Disjoint = XMVectorOrInt( XMVectorGreater( MinA, MaxB ), XMVectorGreater( MinB, MaxA ) );
+
+ if ( DirectX::Internal::XMVector3AnyTrue( Disjoint ) )
+ return DISJOINT;
+
+ // for each i in (x, y, z) if a_min(i) <= b_min(i) and b_max(i) <= a_max(i) then A contains B
+ XMVECTOR Inside = XMVectorAndInt( XMVectorLessOrEqual( MinA, MinB ), XMVectorLessOrEqual( MaxB, MaxA ) );
+
+ return DirectX::Internal::XMVector3AllTrue( Inside ) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Oriented box in axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingBox::Contains( const BoundingOrientedBox& box ) const
+{
+ if ( !box.Intersects( *this ) )
+ return DISJOINT;
+
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+
+ // Subtract off the AABB center to remove a subtract below
+ XMVECTOR oCenter = XMLoadFloat3( &box.Center ) - vCenter;
+
+ XMVECTOR oExtents = XMLoadFloat3( &box.Extents );
+ XMVECTOR oOrientation = XMLoadFloat4( &box.Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( oOrientation ) );
+
+ XMVECTOR Inside = XMVectorTrueInt();
+
+ for( size_t i=0; i < BoundingOrientedBox::CORNER_COUNT; ++i )
+ {
+ XMVECTOR C = XMVector3Rotate( oExtents * g_BoxOffset[i], oOrientation ) + oCenter;
+ XMVECTOR d = XMVector3LengthSq( C );
+ Inside = XMVectorAndInt( Inside, XMVectorLessOrEqual( d, vExtents ) );
+ }
+
+ return ( XMVector3EqualInt( Inside, XMVectorTrueInt() ) ) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Frustum in axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingBox::Contains( const BoundingFrustum& fr ) const
+{
+ if ( !fr.Intersects( *this ) )
+ return DISJOINT;
+
+ XMFLOAT3 Corners[BoundingFrustum::CORNER_COUNT];
+ fr.GetCorners( Corners );
+
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+
+ XMVECTOR Inside = XMVectorTrueInt();
+
+ for( size_t i=0; i < BoundingFrustum::CORNER_COUNT; ++i )
+ {
+ XMVECTOR Point = XMLoadFloat3( &Corners[i] );
+ XMVECTOR d = XMVector3LengthSq( Point - vCenter );
+ Inside = XMVectorAndInt( Inside, XMVectorLessOrEqual( d, vExtents ) );
+ }
+
+ return ( XMVector3EqualInt( Inside, XMVectorTrueInt() ) ) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Sphere vs axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingBox::Intersects( const BoundingSphere& sh ) const
+{
+ XMVECTOR SphereCenter = XMLoadFloat3( &sh.Center );
+ XMVECTOR SphereRadius = XMVectorReplicatePtr( &sh.Radius );
+
+ XMVECTOR BoxCenter = XMLoadFloat3( &Center );
+ XMVECTOR BoxExtents = XMLoadFloat3( &Extents );
+
+ XMVECTOR BoxMin = BoxCenter - BoxExtents;
+ XMVECTOR BoxMax = BoxCenter + BoxExtents;
+
+ // Find the distance to the nearest point on the box.
+ // for each i in (x, y, z)
+ // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2
+ // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) ^ 2
+
+ XMVECTOR d = XMVectorZero();
+
+ // Compute d for each dimension.
+ XMVECTOR LessThanMin = XMVectorLess( SphereCenter, BoxMin );
+ XMVECTOR GreaterThanMax = XMVectorGreater( SphereCenter, BoxMax );
+
+ XMVECTOR MinDelta = SphereCenter - BoxMin;
+ XMVECTOR MaxDelta = SphereCenter - BoxMax;
+
+ // Choose value for each dimension based on the comparison.
+ d = XMVectorSelect( d, MinDelta, LessThanMin );
+ d = XMVectorSelect( d, MaxDelta, GreaterThanMax );
+
+ // Use a dot-product to square them and sum them together.
+ XMVECTOR d2 = XMVector3Dot( d, d );
+
+ return XMVector3LessOrEqual( d2, XMVectorMultiply( SphereRadius, SphereRadius ) );
+}
+
+
+//-----------------------------------------------------------------------------
+// Axis-aligned box vs. axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingBox::Intersects( const BoundingBox& box ) const
+{
+ XMVECTOR CenterA = XMLoadFloat3( &Center );
+ XMVECTOR ExtentsA = XMLoadFloat3( &Extents );
+
+ XMVECTOR CenterB = XMLoadFloat3( &box.Center );
+ XMVECTOR ExtentsB = XMLoadFloat3( &box.Extents );
+
+ XMVECTOR MinA = CenterA - ExtentsA;
+ XMVECTOR MaxA = CenterA + ExtentsA;
+
+ XMVECTOR MinB = CenterB - ExtentsB;
+ XMVECTOR MaxB = CenterB + ExtentsB;
+
+ // for each i in (x, y, z) if a_min(i) > b_max(i) or b_min(i) > a_max(i) then return false
+ XMVECTOR Disjoint = XMVectorOrInt( XMVectorGreater( MinA, MaxB ), XMVectorGreater( MinB, MaxA ) );
+
+ return !DirectX::Internal::XMVector3AnyTrue( Disjoint );
+}
+
+
+//-----------------------------------------------------------------------------
+// Oriented box vs. axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingBox::Intersects( const BoundingOrientedBox& box ) const
+{
+ return box.Intersects( *this );
+}
+
+
+//-----------------------------------------------------------------------------
+// Frustum vs. axis-aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingBox::Intersects( const BoundingFrustum& fr ) const
+{
+ return fr.Intersects( *this );
+}
+
+
+//-----------------------------------------------------------------------------
+// Triangle vs. axis aligned box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingBox::Intersects( FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2 ) const
+{
+ XMVECTOR Zero = XMVectorZero();
+
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+
+ XMVECTOR BoxMin = vCenter - vExtents;
+ XMVECTOR BoxMax = vCenter + vExtents;
+
+ // Test the axes of the box (in effect test the AAB against the minimal AAB
+ // around the triangle).
+ XMVECTOR TriMin = XMVectorMin( XMVectorMin( V0, V1 ), V2 );
+ XMVECTOR TriMax = XMVectorMax( XMVectorMax( V0, V1 ), V2 );
+
+ // for each i in (x, y, z) if a_min(i) > b_max(i) or b_min(i) > a_max(i) then disjoint
+ XMVECTOR Disjoint = XMVectorOrInt( XMVectorGreater( TriMin, BoxMax ), XMVectorGreater( BoxMin, TriMax ) );
+ if( DirectX::Internal::XMVector3AnyTrue( Disjoint ) )
+ return false;
+
+ // Test the plane of the triangle.
+ XMVECTOR Normal = XMVector3Cross( V1 - V0, V2 - V0 );
+ XMVECTOR Dist = XMVector3Dot( Normal, V0 );
+
+ // Assert that the triangle is not degenerate.
+ assert( !XMVector3Equal( Normal, Zero ) );
+
+ // for each i in (x, y, z) if n(i) >= 0 then v_min(i)=b_min(i), v_max(i)=b_max(i)
+ // else v_min(i)=b_max(i), v_max(i)=b_min(i)
+ XMVECTOR NormalSelect = XMVectorGreater( Normal, Zero );
+ XMVECTOR V_Min = XMVectorSelect( BoxMax, BoxMin, NormalSelect );
+ XMVECTOR V_Max = XMVectorSelect( BoxMin, BoxMax, NormalSelect );
+
+ // if n dot v_min + d > 0 || n dot v_max + d < 0 then disjoint
+ XMVECTOR MinDist = XMVector3Dot( V_Min, Normal );
+ XMVECTOR MaxDist = XMVector3Dot( V_Max, Normal );
+
+ XMVECTOR NoIntersection = XMVectorGreater( MinDist, Dist );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( MaxDist, Dist ) );
+
+ // Move the box center to zero to simplify the following tests.
+ XMVECTOR TV0 = V0 - vCenter;
+ XMVECTOR TV1 = V1 - vCenter;
+ XMVECTOR TV2 = V2 - vCenter;
+
+ // Test the edge/edge axes (3*3).
+ XMVECTOR e0 = TV1 - TV0;
+ XMVECTOR e1 = TV2 - TV1;
+ XMVECTOR e2 = TV0 - TV2;
+
+ // Make w zero.
+ e0 = XMVectorInsert<0, 0, 0, 0, 1>( e0, Zero );
+ e1 = XMVectorInsert<0, 0, 0, 0, 1>( e1, Zero );
+ e2 = XMVectorInsert<0, 0, 0, 0, 1>( e2, Zero );
+
+ XMVECTOR Axis;
+ XMVECTOR p0, p1, p2;
+ XMVECTOR Min, Max;
+ XMVECTOR Radius;
+
+ // Axis == (1,0,0) x e0 = (0, -e0.z, e0.y)
+ Axis = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_0X>( e0, -e0 );
+ p0 = XMVector3Dot( TV0, Axis );
+ // p1 = XMVector3Dot( V1, Axis ); // p1 = p0;
+ p2 = XMVector3Dot( TV2, Axis );
+ Min = XMVectorMin( p0, p2 );
+ Max = XMVectorMax( p0, p2 );
+ Radius = XMVector3Dot( vExtents, XMVectorAbs( Axis ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( Min, Radius ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( Max, -Radius ) );
+
+ // Axis == (1,0,0) x e1 = (0, -e1.z, e1.y)
+ Axis = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_0X>( e1, -e1 );
+ p0 = XMVector3Dot( TV0, Axis );
+ p1 = XMVector3Dot( TV1, Axis );
+ // p2 = XMVector3Dot( V2, Axis ); // p2 = p1;
+ Min = XMVectorMin( p0, p1 );
+ Max = XMVectorMax( p0, p1 );
+ Radius = XMVector3Dot( vExtents, XMVectorAbs( Axis ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( Min, Radius ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( Max, -Radius ) );
+
+ // Axis == (1,0,0) x e2 = (0, -e2.z, e2.y)
+ Axis = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_0X>( e2, -e2 );
+ p0 = XMVector3Dot( TV0, Axis );
+ p1 = XMVector3Dot( TV1, Axis );
+ // p2 = XMVector3Dot( V2, Axis ); // p2 = p0;
+ Min = XMVectorMin( p0, p1 );
+ Max = XMVectorMax( p0, p1 );
+ Radius = XMVector3Dot( vExtents, XMVectorAbs( Axis ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( Min, Radius ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( Max, -Radius ) );
+
+ // Axis == (0,1,0) x e0 = (e0.z, 0, -e0.x)
+ Axis = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_0Y>( e0, -e0 );
+ p0 = XMVector3Dot( TV0, Axis );
+ // p1 = XMVector3Dot( V1, Axis ); // p1 = p0;
+ p2 = XMVector3Dot( TV2, Axis );
+ Min = XMVectorMin( p0, p2 );
+ Max = XMVectorMax( p0, p2 );
+ Radius = XMVector3Dot( vExtents, XMVectorAbs( Axis ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( Min, Radius ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( Max, -Radius ) );
+
+ // Axis == (0,1,0) x e1 = (e1.z, 0, -e1.x)
+ Axis = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_0Y>( e1, -e1 );
+ p0 = XMVector3Dot( TV0, Axis );
+ p1 = XMVector3Dot( TV1, Axis );
+ // p2 = XMVector3Dot( V2, Axis ); // p2 = p1;
+ Min = XMVectorMin( p0, p1 );
+ Max = XMVectorMax( p0, p1 );
+ Radius = XMVector3Dot( vExtents, XMVectorAbs( Axis ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( Min, Radius ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( Max, -Radius ) );
+
+ // Axis == (0,0,1) x e2 = (e2.z, 0, -e2.x)
+ Axis = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_0Y>( e2, -e2 );
+ p0 = XMVector3Dot( TV0, Axis );
+ p1 = XMVector3Dot( TV1, Axis );
+ // p2 = XMVector3Dot( V2, Axis ); // p2 = p0;
+ Min = XMVectorMin( p0, p1 );
+ Max = XMVectorMax( p0, p1 );
+ Radius = XMVector3Dot( vExtents, XMVectorAbs( Axis ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( Min, Radius ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( Max, -Radius ) );
+
+ // Axis == (0,0,1) x e0 = (-e0.y, e0.x, 0)
+ Axis = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_0Z>( e0, -e0 );
+ p0 = XMVector3Dot( TV0, Axis );
+ // p1 = XMVector3Dot( V1, Axis ); // p1 = p0;
+ p2 = XMVector3Dot( TV2, Axis );
+ Min = XMVectorMin( p0, p2 );
+ Max = XMVectorMax( p0, p2 );
+ Radius = XMVector3Dot( vExtents, XMVectorAbs( Axis ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( Min, Radius ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( Max, -Radius ) );
+
+ // Axis == (0,0,1) x e1 = (-e1.y, e1.x, 0)
+ Axis = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_0Z>( e1, -e1 );
+ p0 = XMVector3Dot( TV0, Axis );
+ p1 = XMVector3Dot( TV1, Axis );
+ // p2 = XMVector3Dot( V2, Axis ); // p2 = p1;
+ Min = XMVectorMin( p0, p1 );
+ Max = XMVectorMax( p0, p1 );
+ Radius = XMVector3Dot( vExtents, XMVectorAbs( Axis ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( Min, Radius ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( Max, -Radius ) );
+
+ // Axis == (0,0,1) x e2 = (-e2.y, e2.x, 0)
+ Axis = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_0Z>( e2, -e2 );
+ p0 = XMVector3Dot( TV0, Axis );
+ p1 = XMVector3Dot( TV1, Axis );
+ // p2 = XMVector3Dot( V2, Axis ); // p2 = p0;
+ Min = XMVectorMin( p0, p1 );
+ Max = XMVectorMax( p0, p1 );
+ Radius = XMVector3Dot( vExtents, XMVectorAbs( Axis ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( Min, Radius ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( Max, -Radius ) );
+
+ return XMVector4NotEqualInt( NoIntersection, XMVectorTrueInt() );
+}
+
+
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PlaneIntersectionType BoundingBox::Intersects( FXMVECTOR Plane ) const
+{
+ assert( DirectX::Internal::XMPlaneIsUnit( Plane ) );
+
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+
+ // Set w of the center to one so we can dot4 with a plane.
+ vCenter = XMVectorInsert<0, 0, 0, 0, 1>( vCenter, XMVectorSplatOne() );
+
+ XMVECTOR Outside, Inside;
+ DirectX::Internal::FastIntersectAxisAlignedBoxPlane( vCenter, vExtents, Plane, Outside, Inside );
+
+ // If the box is outside any plane it is outside.
+ if ( XMVector4EqualInt( Outside, XMVectorTrueInt() ) )
+ return FRONT;
+
+ // If the box is inside all planes it is inside.
+ if ( XMVector4EqualInt( Inside, XMVectorTrueInt() ) )
+ return BACK;
+
+ // The box is not inside all planes or outside a plane it intersects.
+ return INTERSECTING;
+}
+
+
+//-----------------------------------------------------------------------------
+// Compute the intersection of a ray (Origin, Direction) with an axis aligned
+// box using the slabs method.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingBox::Intersects( FXMVECTOR Origin, FXMVECTOR Direction, float& Dist ) const
+{
+ assert( DirectX::Internal::XMVector3IsUnit( Direction ) );
+
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+
+ // Adjust ray origin to be relative to center of the box.
+ XMVECTOR TOrigin = vCenter - Origin;
+
+ // Compute the dot product againt each axis of the box.
+ // Since the axii are (1,0,0), (0,1,0), (0,0,1) no computation is necessary.
+ XMVECTOR AxisDotOrigin = TOrigin;
+ XMVECTOR AxisDotDirection = Direction;
+
+ // if (fabs(AxisDotDirection) <= Epsilon) the ray is nearly parallel to the slab.
+ XMVECTOR IsParallel = XMVectorLessOrEqual( XMVectorAbs( AxisDotDirection ), g_RayEpsilon );
+
+ // Test against all three axii simultaneously.
+ XMVECTOR InverseAxisDotDirection = XMVectorReciprocal( AxisDotDirection );
+ XMVECTOR t1 = ( AxisDotOrigin - vExtents ) * InverseAxisDotDirection;
+ XMVECTOR t2 = ( AxisDotOrigin + vExtents ) * InverseAxisDotDirection;
+
+ // Compute the max of min(t1,t2) and the min of max(t1,t2) ensuring we don't
+ // use the results from any directions parallel to the slab.
+ XMVECTOR t_min = XMVectorSelect( XMVectorMin( t1, t2 ), g_FltMin, IsParallel );
+ XMVECTOR t_max = XMVectorSelect( XMVectorMax( t1, t2 ), g_FltMax, IsParallel );
+
+ // t_min.x = maximum( t_min.x, t_min.y, t_min.z );
+ // t_max.x = minimum( t_max.x, t_max.y, t_max.z );
+ t_min = XMVectorMax( t_min, XMVectorSplatY( t_min ) ); // x = max(x,y)
+ t_min = XMVectorMax( t_min, XMVectorSplatZ( t_min ) ); // x = max(max(x,y),z)
+ t_max = XMVectorMin( t_max, XMVectorSplatY( t_max ) ); // x = min(x,y)
+ t_max = XMVectorMin( t_max, XMVectorSplatZ( t_max ) ); // x = min(min(x,y),z)
+
+ // if ( t_min > t_max ) return false;
+ XMVECTOR NoIntersection = XMVectorGreater( XMVectorSplatX( t_min ), XMVectorSplatX( t_max ) );
+
+ // if ( t_max < 0.0f ) return false;
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( XMVectorSplatX( t_max ), XMVectorZero() ) );
+
+ // if (IsParallel && (-Extents > AxisDotOrigin || Extents < AxisDotOrigin)) return false;
+ XMVECTOR ParallelOverlap = XMVectorInBounds( AxisDotOrigin, vExtents );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorAndCInt( IsParallel, ParallelOverlap ) );
+
+ if( !DirectX::Internal::XMVector3AnyTrue( NoIntersection ) )
+ {
+ // Store the x-component to *pDist
+ XMStoreFloat( &Dist, t_min );
+ return true;
+ }
+
+ Dist = 0.f;
+ return false;
+}
+
+
+//-----------------------------------------------------------------------------
+// Test an axis alinged box vs 6 planes (typically forming a frustum).
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingBox::ContainedBy( FXMVECTOR Plane0, FXMVECTOR Plane1, FXMVECTOR Plane2,
+ GXMVECTOR Plane3, CXMVECTOR Plane4, CXMVECTOR Plane5 ) const
+{
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+
+ // Set w of the center to one so we can dot4 with a plane.
+ vCenter = XMVectorInsert<0, 0, 0, 0, 1>( vCenter, XMVectorSplatOne() );
+
+ XMVECTOR Outside, Inside;
+
+ // Test against each plane.
+ DirectX::Internal::FastIntersectAxisAlignedBoxPlane( vCenter, vExtents, Plane0, Outside, Inside );
+
+ XMVECTOR AnyOutside = Outside;
+ XMVECTOR AllInside = Inside;
+
+ DirectX::Internal::FastIntersectAxisAlignedBoxPlane( vCenter, vExtents, Plane1, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectAxisAlignedBoxPlane( vCenter, vExtents, Plane2, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectAxisAlignedBoxPlane( vCenter, vExtents, Plane3, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectAxisAlignedBoxPlane( vCenter, vExtents, Plane4, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectAxisAlignedBoxPlane( vCenter, vExtents, Plane5, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ // If the box is outside any plane it is outside.
+ if ( XMVector4EqualInt( AnyOutside, XMVectorTrueInt() ) )
+ return DISJOINT;
+
+ // If the box is inside all planes it is inside.
+ if ( XMVector4EqualInt( AllInside, XMVectorTrueInt() ) )
+ return CONTAINS;
+
+ // The box is not inside all planes or outside a plane, it may intersect.
+ return INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Create axis-aligned box that contains two other bounding boxes
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingBox::CreateMerged( BoundingBox& Out, const BoundingBox& b1, const BoundingBox& b2 )
+{
+ XMVECTOR b1Center = XMLoadFloat3( &b1.Center );
+ XMVECTOR b1Extents = XMLoadFloat3( &b1.Extents );
+
+ XMVECTOR b2Center = XMLoadFloat3( &b2.Center );
+ XMVECTOR b2Extents = XMLoadFloat3( &b2.Extents );
+
+ XMVECTOR Min = XMVectorSubtract( b1Center, b1Extents );
+ Min = XMVectorMin( Min, XMVectorSubtract( b2Center, b2Extents ) );
+
+ XMVECTOR Max = XMVectorAdd( b1Center, b1Extents );
+ Max = XMVectorMax( Max, XMVectorAdd( b2Center, b2Extents ) );
+
+ assert( XMVector3LessOrEqual( Min, Max ) );
+
+ XMStoreFloat3( &Out.Center, ( Min + Max ) * 0.5f );
+ XMStoreFloat3( &Out.Extents, ( Max - Min ) * 0.5f );
+}
+
+
+//-----------------------------------------------------------------------------
+// Create axis-aligned box that contains a bounding sphere
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingBox::CreateFromSphere( BoundingBox& Out, const BoundingSphere& sh )
+{
+ XMVECTOR spCenter = XMLoadFloat3( &sh.Center );
+ XMVECTOR shRadius = XMVectorReplicatePtr( &sh.Radius );
+
+ XMVECTOR Min = XMVectorSubtract( spCenter, shRadius );
+ XMVECTOR Max = XMVectorAdd( spCenter, shRadius );
+
+ assert( XMVector3LessOrEqual( Min, Max ) );
+
+ XMStoreFloat3( &Out.Center, ( Min + Max ) * 0.5f );
+ XMStoreFloat3( &Out.Extents, ( Max - Min ) * 0.5f );
+}
+
+
+//-----------------------------------------------------------------------------
+// Create axis-aligned box from min/max points
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingBox::CreateFromPoints( BoundingBox& Out, FXMVECTOR pt1, FXMVECTOR pt2 )
+{
+ XMVECTOR Min = XMVectorMin( pt1, pt2 );
+ XMVECTOR Max = XMVectorMax( pt1, pt2 );
+
+ // Store center and extents.
+ XMStoreFloat3( &Out.Center, ( Min + Max ) * 0.5f );
+ XMStoreFloat3( &Out.Extents, ( Max - Min ) * 0.5f );
+}
+
+
+//-----------------------------------------------------------------------------
+// Find the minimum axis aligned bounding box containing a set of points.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingBox::CreateFromPoints( BoundingBox& Out, size_t Count, const XMFLOAT3* pPoints, size_t Stride )
+{
+ assert( Count > 0 );
+ assert( pPoints );
+
+ // Find the minimum and maximum x, y, and z
+ XMVECTOR vMin, vMax;
+
+ vMin = vMax = XMLoadFloat3( pPoints );
+
+ for( size_t i = 1; i < Count; ++i )
+ {
+ XMVECTOR Point = XMLoadFloat3( reinterpret_cast<const XMFLOAT3*>( reinterpret_cast<const uint8_t*>(pPoints) + i * Stride ) );
+
+ vMin = XMVectorMin( vMin, Point );
+ vMax = XMVectorMax( vMax, Point );
+ }
+
+ // Store center and extents.
+ XMStoreFloat3( &Out.Center, ( vMin + vMax ) * 0.5f );
+ XMStoreFloat3( &Out.Extents, ( vMax - vMin ) * 0.5f );
+}
+
+
+/****************************************************************************
+ *
+ * BoundingOrientedBox
+ *
+ ****************************************************************************/
+
+//-----------------------------------------------------------------------------
+// Transform an oriented box by an angle preserving transform.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingOrientedBox::Transform( BoundingOrientedBox& Out, CXMMATRIX M ) const
+{
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( vOrientation ) );
+
+ // Composite the box rotation and the transform rotation.
+ XMVECTOR Rotation = XMQuaternionRotationMatrix( M );
+ vOrientation = XMQuaternionMultiply( vOrientation, Rotation );
+
+ // Transform the center.
+ vCenter = XMVector3Transform( vCenter, M );
+
+ // Scale the box extents.
+ XMVECTOR dX = XMVector3Length( M.r[0] );
+ XMVECTOR dY = XMVector3Length( M.r[1] );
+ XMVECTOR dZ = XMVector3Length( M.r[2] );
+
+ XMVECTOR VectorScale = XMVectorSelect( dX, dY, g_XMSelect1000 );
+ VectorScale = XMVectorSelect( VectorScale, dZ, g_XMSelect1100 );
+ vExtents = vExtents * VectorScale;
+
+ // Store the box.
+ XMStoreFloat3( &Out.Center, vCenter );
+ XMStoreFloat3( &Out.Extents, vExtents );
+ XMStoreFloat4( &Out.Orientation, vOrientation );
+}
+
+_Use_decl_annotations_
+inline void BoundingOrientedBox::Transform( BoundingOrientedBox& Out, float Scale, FXMVECTOR Rotation, FXMVECTOR Translation ) const
+{
+ assert( DirectX::Internal::XMQuaternionIsUnit( Rotation ) );
+
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( vOrientation ) );
+
+ // Composite the box rotation and the transform rotation.
+ vOrientation = XMQuaternionMultiply( vOrientation, Rotation );
+
+ // Transform the center.
+ XMVECTOR VectorScale = XMVectorReplicate( Scale );
+ vCenter = XMVector3Rotate( vCenter * VectorScale, Rotation ) + Translation;
+
+ // Scale the box extents.
+ vExtents = vExtents * VectorScale;
+
+ // Store the box.
+ XMStoreFloat3( &Out.Center, vCenter );
+ XMStoreFloat3( &Out.Extents, vExtents );
+ XMStoreFloat4( &Out.Orientation, vOrientation );
+}
+
+
+//-----------------------------------------------------------------------------
+// Get the corner points of the box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingOrientedBox::GetCorners( XMFLOAT3* Corners ) const
+{
+ assert( Corners != 0 );
+
+ // Load the box
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( vOrientation ) );
+
+ for( size_t i = 0; i < CORNER_COUNT; ++i )
+ {
+ XMVECTOR C = XMVector3Rotate( vExtents * g_BoxOffset[i], vOrientation ) + vCenter;
+ XMStoreFloat3( &Corners[i], C );
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// Point in oriented box test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingOrientedBox::Contains( FXMVECTOR Point ) const
+{
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ // Transform the point to be local to the box.
+ XMVECTOR TPoint = XMVector3InverseRotate( Point - vCenter, vOrientation );
+
+ return XMVector3InBounds( TPoint, vExtents ) ? CONTAINS : DISJOINT;
+}
+
+
+//-----------------------------------------------------------------------------
+// Triangle in oriented bounding box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingOrientedBox::Contains( FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2 ) const
+{
+ // Load the box center & orientation.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ // Transform the triangle vertices into the space of the box.
+ XMVECTOR TV0 = XMVector3InverseRotate( V0 - vCenter, vOrientation );
+ XMVECTOR TV1 = XMVector3InverseRotate( V1 - vCenter, vOrientation );
+ XMVECTOR TV2 = XMVector3InverseRotate( V2 - vCenter, vOrientation );
+
+ BoundingBox box;
+ box.Center = XMFLOAT3( 0.0f, 0.0f, 0.0f );
+ box.Extents = Extents;
+
+ // Use the triangle vs axis aligned box intersection routine.
+ return box.Contains( TV0, TV1, TV2 );
+}
+
+
+//-----------------------------------------------------------------------------
+// Sphere in oriented bounding box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingOrientedBox::Contains( const BoundingSphere& sh ) const
+{
+ XMVECTOR SphereCenter = XMLoadFloat3( &sh.Center );
+ XMVECTOR SphereRadius = XMVectorReplicatePtr( &sh.Radius );
+
+ XMVECTOR BoxCenter = XMLoadFloat3( &Center );
+ XMVECTOR BoxExtents = XMLoadFloat3( &Extents );
+ XMVECTOR BoxOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( BoxOrientation ) );
+
+ // Transform the center of the sphere to be local to the box.
+ // BoxMin = -BoxExtents
+ // BoxMax = +BoxExtents
+ SphereCenter = XMVector3InverseRotate( SphereCenter - BoxCenter, BoxOrientation );
+
+ // Find the distance to the nearest point on the box.
+ // for each i in (x, y, z)
+ // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2
+ // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) ^ 2
+
+ XMVECTOR d = XMVectorZero();
+
+ // Compute d for each dimension.
+ XMVECTOR LessThanMin = XMVectorLess( SphereCenter, -BoxExtents );
+ XMVECTOR GreaterThanMax = XMVectorGreater( SphereCenter, BoxExtents );
+
+ XMVECTOR MinDelta = SphereCenter + BoxExtents;
+ XMVECTOR MaxDelta = SphereCenter - BoxExtents;
+
+ // Choose value for each dimension based on the comparison.
+ d = XMVectorSelect( d, MinDelta, LessThanMin );
+ d = XMVectorSelect( d, MaxDelta, GreaterThanMax );
+
+ // Use a dot-product to square them and sum them together.
+ XMVECTOR d2 = XMVector3Dot( d, d );
+ XMVECTOR SphereRadiusSq = XMVectorMultiply( SphereRadius, SphereRadius );
+
+ if ( XMVector4Greater( d2, SphereRadiusSq ) )
+ return DISJOINT;
+
+ // See if we are completely inside the box
+ XMVECTOR SMin = SphereCenter - SphereRadius;
+ XMVECTOR SMax = SphereCenter + SphereRadius;
+
+ return ( XMVector3InBounds( SMin, BoxExtents ) && XMVector3InBounds( SMax, BoxExtents ) ) ? CONTAINS : INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Axis aligned box vs. oriented box. Constructs an oriented box and uses
+// the oriented box vs. oriented box test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingOrientedBox::Contains( const BoundingBox& box ) const
+{
+ // Make the axis aligned box oriented and do an OBB vs OBB test.
+ BoundingOrientedBox obox( box.Center, box.Extents, XMFLOAT4( 0.f, 0.f, 0.f, 1.f ) );
+ return Contains( obox );
+}
+
+
+//-----------------------------------------------------------------------------
+// Oriented bounding box in oriented bounding box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingOrientedBox::Contains( const BoundingOrientedBox& box ) const
+{
+ if ( !Intersects(box) )
+ return DISJOINT;
+
+ // Load the boxes
+ XMVECTOR aCenter = XMLoadFloat3( &Center );
+ XMVECTOR aExtents = XMLoadFloat3( &Extents );
+ XMVECTOR aOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( aOrientation ) );
+
+ XMVECTOR bCenter = XMLoadFloat3( &box.Center );
+ XMVECTOR bExtents = XMLoadFloat3( &box.Extents );
+ XMVECTOR bOrientation = XMLoadFloat4( &box.Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( bOrientation ) );
+
+ XMVECTOR offset = bCenter - aCenter;
+
+ for( size_t i = 0; i < CORNER_COUNT; ++i )
+ {
+ // Cb = rotate( bExtents * corneroffset[i], bOrientation ) + bcenter
+ // Ca = invrotate( Cb - aCenter, aOrientation )
+
+ XMVECTOR C = XMVector3Rotate( bExtents * g_BoxOffset[i], bOrientation ) + offset;
+ C = XMVector3InverseRotate( C , aOrientation );
+
+ if ( !XMVector3InBounds( C, aExtents ) )
+ return INTERSECTS;
+ }
+
+ return CONTAINS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Frustum in oriented bounding box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingOrientedBox::Contains( const BoundingFrustum& fr ) const
+{
+ if ( !fr.Intersects(*this) )
+ return DISJOINT;
+
+ XMFLOAT3 Corners[BoundingFrustum::CORNER_COUNT];
+ fr.GetCorners( Corners );
+
+ // Load the box
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( vOrientation ) );
+
+ for( size_t i = 0; i < BoundingFrustum::CORNER_COUNT; ++i )
+ {
+ XMVECTOR C = XMVector3InverseRotate( XMLoadFloat3( &Corners[i] ) - vCenter, vOrientation );
+
+ if ( !XMVector3InBounds( C, vExtents ) )
+ return INTERSECTS;
+ }
+
+ return CONTAINS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Sphere vs. oriented box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingOrientedBox::Intersects( const BoundingSphere& sh ) const
+{
+ XMVECTOR SphereCenter = XMLoadFloat3( &sh.Center );
+ XMVECTOR SphereRadius = XMVectorReplicatePtr( &sh.Radius );
+
+ XMVECTOR BoxCenter = XMLoadFloat3( &Center );
+ XMVECTOR BoxExtents = XMLoadFloat3( &Extents );
+ XMVECTOR BoxOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( BoxOrientation ) );
+
+ // Transform the center of the sphere to be local to the box.
+ // BoxMin = -BoxExtents
+ // BoxMax = +BoxExtents
+ SphereCenter = XMVector3InverseRotate( SphereCenter - BoxCenter, BoxOrientation );
+
+ // Find the distance to the nearest point on the box.
+ // for each i in (x, y, z)
+ // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2
+ // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) ^ 2
+
+ XMVECTOR d = XMVectorZero();
+
+ // Compute d for each dimension.
+ XMVECTOR LessThanMin = XMVectorLess( SphereCenter, -BoxExtents );
+ XMVECTOR GreaterThanMax = XMVectorGreater( SphereCenter, BoxExtents );
+
+ XMVECTOR MinDelta = SphereCenter + BoxExtents;
+ XMVECTOR MaxDelta = SphereCenter - BoxExtents;
+
+ // Choose value for each dimension based on the comparison.
+ d = XMVectorSelect( d, MinDelta, LessThanMin );
+ d = XMVectorSelect( d, MaxDelta, GreaterThanMax );
+
+ // Use a dot-product to square them and sum them together.
+ XMVECTOR d2 = XMVector3Dot( d, d );
+
+ return XMVector4LessOrEqual( d2, XMVectorMultiply( SphereRadius, SphereRadius ) ) ? true : false;
+}
+
+
+//-----------------------------------------------------------------------------
+// Axis aligned box vs. oriented box. Constructs an oriented box and uses
+// the oriented box vs. oriented box test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingOrientedBox::Intersects( const BoundingBox& box ) const
+{
+ // Make the axis aligned box oriented and do an OBB vs OBB test.
+ BoundingOrientedBox obox( box.Center, box.Extents, XMFLOAT4( 0.f, 0.f, 0.f, 1.f ) );
+ return Intersects( obox );
+}
+
+
+//-----------------------------------------------------------------------------
+// Fast oriented box / oriented box intersection test using the separating axis
+// theorem.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingOrientedBox::Intersects( const BoundingOrientedBox& box ) const
+{
+ // Build the 3x3 rotation matrix that defines the orientation of B relative to A.
+ XMVECTOR A_quat = XMLoadFloat4( &Orientation );
+ XMVECTOR B_quat = XMLoadFloat4( &box.Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( A_quat ) );
+ assert( DirectX::Internal::XMQuaternionIsUnit( B_quat ) );
+
+ XMVECTOR Q = XMQuaternionMultiply( A_quat, XMQuaternionConjugate( B_quat ) );
+ XMMATRIX R = XMMatrixRotationQuaternion( Q );
+
+ // Compute the translation of B relative to A.
+ XMVECTOR A_cent = XMLoadFloat3( &Center );
+ XMVECTOR B_cent = XMLoadFloat3( &box.Center );
+ XMVECTOR t = XMVector3InverseRotate( B_cent - A_cent, A_quat );
+
+ //
+ // h(A) = extents of A.
+ // h(B) = extents of B.
+ //
+ // a(u) = axes of A = (1,0,0), (0,1,0), (0,0,1)
+ // b(u) = axes of B relative to A = (r00,r10,r20), (r01,r11,r21), (r02,r12,r22)
+ //
+ // For each possible separating axis l:
+ // d(A) = sum (for i = u,v,w) h(A)(i) * abs( a(i) dot l )
+ // d(B) = sum (for i = u,v,w) h(B)(i) * abs( b(i) dot l )
+ // if abs( t dot l ) > d(A) + d(B) then disjoint
+ //
+
+ // Load extents of A and B.
+ XMVECTOR h_A = XMLoadFloat3( &Extents );
+ XMVECTOR h_B = XMLoadFloat3( &box.Extents );
+
+ // Rows. Note R[0,1,2]X.w = 0.
+ XMVECTOR R0X = R.r[0];
+ XMVECTOR R1X = R.r[1];
+ XMVECTOR R2X = R.r[2];
+
+ R = XMMatrixTranspose( R );
+
+ // Columns. Note RX[0,1,2].w = 0.
+ XMVECTOR RX0 = R.r[0];
+ XMVECTOR RX1 = R.r[1];
+ XMVECTOR RX2 = R.r[2];
+
+ // Absolute value of rows.
+ XMVECTOR AR0X = XMVectorAbs( R0X );
+ XMVECTOR AR1X = XMVectorAbs( R1X );
+ XMVECTOR AR2X = XMVectorAbs( R2X );
+
+ // Absolute value of columns.
+ XMVECTOR ARX0 = XMVectorAbs( RX0 );
+ XMVECTOR ARX1 = XMVectorAbs( RX1 );
+ XMVECTOR ARX2 = XMVectorAbs( RX2 );
+
+ // Test each of the 15 possible seperating axii.
+ XMVECTOR d, d_A, d_B;
+
+ // l = a(u) = (1, 0, 0)
+ // t dot l = t.x
+ // d(A) = h(A).x
+ // d(B) = h(B) dot abs(r00, r01, r02)
+ d = XMVectorSplatX( t );
+ d_A = XMVectorSplatX( h_A );
+ d_B = XMVector3Dot( h_B, AR0X );
+ XMVECTOR NoIntersection = XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) );
+
+ // l = a(v) = (0, 1, 0)
+ // t dot l = t.y
+ // d(A) = h(A).y
+ // d(B) = h(B) dot abs(r10, r11, r12)
+ d = XMVectorSplatY( t );
+ d_A = XMVectorSplatY( h_A );
+ d_B = XMVector3Dot( h_B, AR1X );
+ NoIntersection = XMVectorOrInt( NoIntersection,
+ XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) ) );
+
+ // l = a(w) = (0, 0, 1)
+ // t dot l = t.z
+ // d(A) = h(A).z
+ // d(B) = h(B) dot abs(r20, r21, r22)
+ d = XMVectorSplatZ( t );
+ d_A = XMVectorSplatZ( h_A );
+ d_B = XMVector3Dot( h_B, AR2X );
+ NoIntersection = XMVectorOrInt( NoIntersection,
+ XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) ) );
+
+ // l = b(u) = (r00, r10, r20)
+ // d(A) = h(A) dot abs(r00, r10, r20)
+ // d(B) = h(B).x
+ d = XMVector3Dot( t, RX0 );
+ d_A = XMVector3Dot( h_A, ARX0 );
+ d_B = XMVectorSplatX( h_B );
+ NoIntersection = XMVectorOrInt( NoIntersection,
+ XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) ) );
+
+ // l = b(v) = (r01, r11, r21)
+ // d(A) = h(A) dot abs(r01, r11, r21)
+ // d(B) = h(B).y
+ d = XMVector3Dot( t, RX1 );
+ d_A = XMVector3Dot( h_A, ARX1 );
+ d_B = XMVectorSplatY( h_B );
+ NoIntersection = XMVectorOrInt( NoIntersection,
+ XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) ) );
+
+ // l = b(w) = (r02, r12, r22)
+ // d(A) = h(A) dot abs(r02, r12, r22)
+ // d(B) = h(B).z
+ d = XMVector3Dot( t, RX2 );
+ d_A = XMVector3Dot( h_A, ARX2 );
+ d_B = XMVectorSplatZ( h_B );
+ NoIntersection = XMVectorOrInt( NoIntersection,
+ XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) ) );
+
+ // l = a(u) x b(u) = (0, -r20, r10)
+ // d(A) = h(A) dot abs(0, r20, r10)
+ // d(B) = h(B) dot abs(0, r02, r01)
+ d = XMVector3Dot( t, XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_0X>( RX0, -RX0 ) );
+ d_A = XMVector3Dot( h_A, XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_X>( ARX0 ) );
+ d_B = XMVector3Dot( h_B, XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_X>( AR0X ) );
+ NoIntersection = XMVectorOrInt( NoIntersection,
+ XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) ) );
+
+ // l = a(u) x b(v) = (0, -r21, r11)
+ // d(A) = h(A) dot abs(0, r21, r11)
+ // d(B) = h(B) dot abs(r02, 0, r00)
+ d = XMVector3Dot( t, XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_0X>( RX1, -RX1 ) );
+ d_A = XMVector3Dot( h_A, XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_X>( ARX1 ) );
+ d_B = XMVector3Dot( h_B, XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Y>( AR0X ) );
+ NoIntersection = XMVectorOrInt( NoIntersection,
+ XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) ) );
+
+ // l = a(u) x b(w) = (0, -r22, r12)
+ // d(A) = h(A) dot abs(0, r22, r12)
+ // d(B) = h(B) dot abs(r01, r00, 0)
+ d = XMVector3Dot( t, XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_0X>( RX2, -RX2 ) );
+ d_A = XMVector3Dot( h_A, XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_X>( ARX2 ) );
+ d_B = XMVector3Dot( h_B, XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_Z>( AR0X ) );
+ NoIntersection = XMVectorOrInt( NoIntersection,
+ XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) ) );
+
+ // l = a(v) x b(u) = (r20, 0, -r00)
+ // d(A) = h(A) dot abs(r20, 0, r00)
+ // d(B) = h(B) dot abs(0, r12, r11)
+ d = XMVector3Dot( t, XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_0Y>( RX0, -RX0 ) );
+ d_A = XMVector3Dot( h_A, XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Y>( ARX0 ) );
+ d_B = XMVector3Dot( h_B, XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_X>( AR1X ) );
+ NoIntersection = XMVectorOrInt( NoIntersection,
+ XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) ) );
+
+ // l = a(v) x b(v) = (r21, 0, -r01)
+ // d(A) = h(A) dot abs(r21, 0, r01)
+ // d(B) = h(B) dot abs(r12, 0, r10)
+ d = XMVector3Dot( t, XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_0Y>( RX1, -RX1 ) );
+ d_A = XMVector3Dot( h_A, XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Y>( ARX1 ) );
+ d_B = XMVector3Dot( h_B, XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Y>( AR1X ) );
+ NoIntersection = XMVectorOrInt( NoIntersection,
+ XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) ) );
+
+ // l = a(v) x b(w) = (r22, 0, -r02)
+ // d(A) = h(A) dot abs(r22, 0, r02)
+ // d(B) = h(B) dot abs(r11, r10, 0)
+ d = XMVector3Dot( t, XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_0Y>( RX2, -RX2 ) );
+ d_A = XMVector3Dot( h_A, XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Y>( ARX2 ) );
+ d_B = XMVector3Dot( h_B, XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_Z>( AR1X ) );
+ NoIntersection = XMVectorOrInt( NoIntersection,
+ XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) ) );
+
+ // l = a(w) x b(u) = (-r10, r00, 0)
+ // d(A) = h(A) dot abs(r10, r00, 0)
+ // d(B) = h(B) dot abs(0, r22, r21)
+ d = XMVector3Dot( t, XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_0Z>( RX0, -RX0 ) );
+ d_A = XMVector3Dot( h_A, XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_Z>( ARX0 ) );
+ d_B = XMVector3Dot( h_B, XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_X>( AR2X ) );
+ NoIntersection = XMVectorOrInt( NoIntersection,
+ XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) ) );
+
+ // l = a(w) x b(v) = (-r11, r01, 0)
+ // d(A) = h(A) dot abs(r11, r01, 0)
+ // d(B) = h(B) dot abs(r22, 0, r20)
+ d = XMVector3Dot( t, XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_0Z>( RX1, -RX1 ) );
+ d_A = XMVector3Dot( h_A, XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_Z>( ARX1 ) );
+ d_B = XMVector3Dot( h_B, XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Y>( AR2X ) );
+ NoIntersection = XMVectorOrInt( NoIntersection,
+ XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) ) );
+
+ // l = a(w) x b(w) = (-r12, r02, 0)
+ // d(A) = h(A) dot abs(r12, r02, 0)
+ // d(B) = h(B) dot abs(r21, r20, 0)
+ d = XMVector3Dot( t, XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_0Z>( RX2, -RX2 ) );
+ d_A = XMVector3Dot( h_A, XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_Z>( ARX2 ) );
+ d_B = XMVector3Dot( h_B, XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_Z>( AR2X ) );
+ NoIntersection = XMVectorOrInt( NoIntersection,
+ XMVectorGreater( XMVectorAbs(d), XMVectorAdd( d_A, d_B ) ) );
+
+ // No seperating axis found, boxes must intersect.
+ return XMVector4NotEqualInt( NoIntersection, XMVectorTrueInt() ) ? true : false;
+}
+
+
+//-----------------------------------------------------------------------------
+// Frustum vs. oriented box test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingOrientedBox::Intersects( const BoundingFrustum& fr ) const
+{
+ return fr.Intersects( *this );
+}
+
+
+//-----------------------------------------------------------------------------
+// Triangle vs. oriented box test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingOrientedBox::Intersects( FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2 ) const
+{
+ // Load the box center & orientation.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ // Transform the triangle vertices into the space of the box.
+ XMVECTOR TV0 = XMVector3InverseRotate( V0 - vCenter, vOrientation );
+ XMVECTOR TV1 = XMVector3InverseRotate( V1 - vCenter, vOrientation );
+ XMVECTOR TV2 = XMVector3InverseRotate( V2 - vCenter, vOrientation );
+
+ BoundingBox box;
+ box.Center = XMFLOAT3( 0.0f, 0.0f, 0.0f );
+ box.Extents = Extents;
+
+ // Use the triangle vs axis aligned box intersection routine.
+ return box.Intersects( TV0, TV1, TV2 );
+}
+
+
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PlaneIntersectionType BoundingOrientedBox::Intersects( FXMVECTOR Plane ) const
+{
+ assert( DirectX::Internal::XMPlaneIsUnit( Plane ) );
+
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+ XMVECTOR BoxOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( BoxOrientation ) );
+
+ // Set w of the center to one so we can dot4 with a plane.
+ vCenter = XMVectorInsert<0, 0, 0, 0, 1>( vCenter, XMVectorSplatOne() );
+
+ // Build the 3x3 rotation matrix that defines the box axes.
+ XMMATRIX R = XMMatrixRotationQuaternion( BoxOrientation );
+
+ XMVECTOR Outside, Inside;
+ DirectX::Internal::FastIntersectOrientedBoxPlane( vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane, Outside, Inside );
+
+ // If the box is outside any plane it is outside.
+ if ( XMVector4EqualInt( Outside, XMVectorTrueInt() ) )
+ return FRONT;
+
+ // If the box is inside all planes it is inside.
+ if ( XMVector4EqualInt( Inside, XMVectorTrueInt() ) )
+ return BACK;
+
+ // The box is not inside all planes or outside a plane it intersects.
+ return INTERSECTING;
+}
+
+
+//-----------------------------------------------------------------------------
+// Compute the intersection of a ray (Origin, Direction) with an oriented box
+// using the slabs method.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingOrientedBox::Intersects( FXMVECTOR Origin, FXMVECTOR Direction, float& Dist ) const
+{
+ assert( DirectX::Internal::XMVector3IsUnit( Direction ) );
+
+ static const XMVECTORI32 SelectY =
+ {
+ XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0
+ };
+ static const XMVECTORI32 SelectZ =
+ {
+ XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0
+ };
+
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( vOrientation ) );
+
+ // Get the boxes normalized side directions.
+ XMMATRIX R = XMMatrixRotationQuaternion( vOrientation );
+
+ // Adjust ray origin to be relative to center of the box.
+ XMVECTOR TOrigin = vCenter - Origin;
+
+ // Compute the dot product againt each axis of the box.
+ XMVECTOR AxisDotOrigin = XMVector3Dot( R.r[0], TOrigin );
+ AxisDotOrigin = XMVectorSelect( AxisDotOrigin, XMVector3Dot( R.r[1], TOrigin ), SelectY );
+ AxisDotOrigin = XMVectorSelect( AxisDotOrigin, XMVector3Dot( R.r[2], TOrigin ), SelectZ );
+
+ XMVECTOR AxisDotDirection = XMVector3Dot( R.r[0], Direction );
+ AxisDotDirection = XMVectorSelect( AxisDotDirection, XMVector3Dot( R.r[1], Direction ), SelectY );
+ AxisDotDirection = XMVectorSelect( AxisDotDirection, XMVector3Dot( R.r[2], Direction ), SelectZ );
+
+ // if (fabs(AxisDotDirection) <= Epsilon) the ray is nearly parallel to the slab.
+ XMVECTOR IsParallel = XMVectorLessOrEqual( XMVectorAbs( AxisDotDirection ), g_RayEpsilon );
+
+ // Test against all three axes simultaneously.
+ XMVECTOR InverseAxisDotDirection = XMVectorReciprocal( AxisDotDirection );
+ XMVECTOR t1 = ( AxisDotOrigin - vExtents ) * InverseAxisDotDirection;
+ XMVECTOR t2 = ( AxisDotOrigin + vExtents ) * InverseAxisDotDirection;
+
+ // Compute the max of min(t1,t2) and the min of max(t1,t2) ensuring we don't
+ // use the results from any directions parallel to the slab.
+ XMVECTOR t_min = XMVectorSelect( XMVectorMin( t1, t2 ), g_FltMin, IsParallel );
+ XMVECTOR t_max = XMVectorSelect( XMVectorMax( t1, t2 ), g_FltMax, IsParallel );
+
+ // t_min.x = maximum( t_min.x, t_min.y, t_min.z );
+ // t_max.x = minimum( t_max.x, t_max.y, t_max.z );
+ t_min = XMVectorMax( t_min, XMVectorSplatY( t_min ) ); // x = max(x,y)
+ t_min = XMVectorMax( t_min, XMVectorSplatZ( t_min ) ); // x = max(max(x,y),z)
+ t_max = XMVectorMin( t_max, XMVectorSplatY( t_max ) ); // x = min(x,y)
+ t_max = XMVectorMin( t_max, XMVectorSplatZ( t_max ) ); // x = min(min(x,y),z)
+
+ // if ( t_min > t_max ) return false;
+ XMVECTOR NoIntersection = XMVectorGreater( XMVectorSplatX( t_min ), XMVectorSplatX( t_max ) );
+
+ // if ( t_max < 0.0f ) return false;
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( XMVectorSplatX( t_max ), XMVectorZero() ) );
+
+ // if (IsParallel && (-Extents > AxisDotOrigin || Extents < AxisDotOrigin)) return false;
+ XMVECTOR ParallelOverlap = XMVectorInBounds( AxisDotOrigin, vExtents );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorAndCInt( IsParallel, ParallelOverlap ) );
+
+ if( !DirectX::Internal::XMVector3AnyTrue( NoIntersection ) )
+ {
+ // Store the x-component to *pDist
+ XMStoreFloat( &Dist, t_min );
+ return true;
+ }
+
+ Dist = 0.f;
+ return false;
+}
+
+
+//-----------------------------------------------------------------------------
+// Test an oriented box vs 6 planes (typically forming a frustum).
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingOrientedBox::ContainedBy( FXMVECTOR Plane0, FXMVECTOR Plane1, FXMVECTOR Plane2,
+ GXMVECTOR Plane3, CXMVECTOR Plane4, CXMVECTOR Plane5 ) const
+{
+ // Load the box.
+ XMVECTOR vCenter = XMLoadFloat3( &Center );
+ XMVECTOR vExtents = XMLoadFloat3( &Extents );
+ XMVECTOR BoxOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( BoxOrientation ) );
+
+ // Set w of the center to one so we can dot4 with a plane.
+ vCenter = XMVectorInsert<0, 0, 0, 0, 1>( vCenter, XMVectorSplatOne() );
+
+ // Build the 3x3 rotation matrix that defines the box axes.
+ XMMATRIX R = XMMatrixRotationQuaternion( BoxOrientation );
+
+ XMVECTOR Outside, Inside;
+
+ // Test against each plane.
+ DirectX::Internal::FastIntersectOrientedBoxPlane( vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane0, Outside, Inside );
+
+ XMVECTOR AnyOutside = Outside;
+ XMVECTOR AllInside = Inside;
+
+ DirectX::Internal::FastIntersectOrientedBoxPlane( vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane1, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectOrientedBoxPlane( vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane2, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectOrientedBoxPlane( vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane3, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectOrientedBoxPlane( vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane4, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectOrientedBoxPlane( vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane5, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ // If the box is outside any plane it is outside.
+ if ( XMVector4EqualInt( AnyOutside, XMVectorTrueInt() ) )
+ return DISJOINT;
+
+ // If the box is inside all planes it is inside.
+ if ( XMVector4EqualInt( AllInside, XMVectorTrueInt() ) )
+ return CONTAINS;
+
+ // The box is not inside all planes or outside a plane, it may intersect.
+ return INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Create oriented bounding box from axis-aligned bounding box
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingOrientedBox::CreateFromBoundingBox( BoundingOrientedBox& Out, const BoundingBox& box )
+{
+ Out.Center = box.Center;
+ Out.Extents = box.Extents;
+ Out.Orientation = XMFLOAT4( 0.f, 0.f, 0.f, 1.f );
+}
+
+
+//-----------------------------------------------------------------------------
+// Find the approximate minimum oriented bounding box containing a set of
+// points. Exact computation of minimum oriented bounding box is possible but
+// is slower and requires a more complex algorithm.
+// The algorithm works by computing the inertia tensor of the points and then
+// using the eigenvectors of the intertia tensor as the axes of the box.
+// Computing the intertia tensor of the convex hull of the points will usually
+// result in better bounding box but the computation is more complex.
+// Exact computation of the minimum oriented bounding box is possible but the
+// best know algorithm is O(N^3) and is significanly more complex to implement.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingOrientedBox::CreateFromPoints( BoundingOrientedBox& Out, size_t Count, const XMFLOAT3* pPoints, size_t Stride )
+{
+ assert( Count > 0 );
+ assert( pPoints != 0 );
+
+ XMVECTOR CenterOfMass = XMVectorZero();
+
+ // Compute the center of mass and inertia tensor of the points.
+ for( size_t i = 0; i < Count; ++i )
+ {
+ XMVECTOR Point = XMLoadFloat3( reinterpret_cast<const XMFLOAT3*>( reinterpret_cast<const uint8_t*>(pPoints) + i * Stride ) );
+
+ CenterOfMass += Point;
+ }
+
+ CenterOfMass *= XMVectorReciprocal( XMVectorReplicate( float( Count ) ) );
+
+ // Compute the inertia tensor of the points around the center of mass.
+ // Using the center of mass is not strictly necessary, but will hopefully
+ // improve the stability of finding the eigenvectors.
+ XMVECTOR XX_YY_ZZ = XMVectorZero();
+ XMVECTOR XY_XZ_YZ = XMVectorZero();
+
+ for( size_t i = 0; i < Count; ++i )
+ {
+ XMVECTOR Point = XMLoadFloat3( reinterpret_cast<const XMFLOAT3*>( reinterpret_cast<const uint8_t*>(pPoints) + i * Stride ) ) - CenterOfMass;
+
+ XX_YY_ZZ += Point * Point;
+
+ XMVECTOR XXY = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>( Point );
+ XMVECTOR YZZ = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_W>( Point );
+
+ XY_XZ_YZ += XXY * YZZ;
+ }
+
+ XMVECTOR v1, v2, v3;
+
+ // Compute the eigenvectors of the inertia tensor.
+ DirectX::Internal::CalculateEigenVectorsFromCovarianceMatrix( XMVectorGetX( XX_YY_ZZ ), XMVectorGetY( XX_YY_ZZ ),
+ XMVectorGetZ( XX_YY_ZZ ),
+ XMVectorGetX( XY_XZ_YZ ), XMVectorGetY( XY_XZ_YZ ),
+ XMVectorGetZ( XY_XZ_YZ ),
+ &v1, &v2, &v3 );
+
+ // Put them in a matrix.
+ XMMATRIX R;
+
+ R.r[0] = XMVectorSetW( v1, 0.f );
+ R.r[1] = XMVectorSetW( v2, 0.f );
+ R.r[2] = XMVectorSetW( v3, 0.f );
+ R.r[3] = g_XMIdentityR3.v;
+
+ // Multiply by -1 to convert the matrix into a right handed coordinate
+ // system (Det ~= 1) in case the eigenvectors form a left handed
+ // coordinate system (Det ~= -1) because XMQuaternionRotationMatrix only
+ // works on right handed matrices.
+ XMVECTOR Det = XMMatrixDeterminant( R );
+
+ if( XMVector4Less( Det, XMVectorZero() ) )
+ {
+ R.r[0] *= g_XMNegativeOne.v;
+ R.r[1] *= g_XMNegativeOne.v;
+ R.r[2] *= g_XMNegativeOne.v;
+ }
+
+ // Get the rotation quaternion from the matrix.
+ XMVECTOR vOrientation = XMQuaternionRotationMatrix( R );
+
+ // Make sure it is normal (in case the vectors are slightly non-orthogonal).
+ vOrientation = XMQuaternionNormalize( vOrientation );
+
+ // Rebuild the rotation matrix from the quaternion.
+ R = XMMatrixRotationQuaternion( vOrientation );
+
+ // Build the rotation into the rotated space.
+ XMMATRIX InverseR = XMMatrixTranspose( R );
+
+ // Find the minimum OBB using the eigenvectors as the axes.
+ XMVECTOR vMin, vMax;
+
+ vMin = vMax = XMVector3TransformNormal( XMLoadFloat3( pPoints ), InverseR );
+
+ for( size_t i = 1; i < Count; ++i )
+ {
+ XMVECTOR Point = XMVector3TransformNormal( XMLoadFloat3( reinterpret_cast<const XMFLOAT3*>( reinterpret_cast<const uint8_t*>(pPoints) + i * Stride ) ),
+ InverseR );
+
+ vMin = XMVectorMin( vMin, Point );
+ vMax = XMVectorMax( vMax, Point );
+ }
+
+ // Rotate the center into world space.
+ XMVECTOR vCenter = ( vMin + vMax ) * 0.5f;
+ vCenter = XMVector3TransformNormal( vCenter, R );
+
+ // Store center, extents, and orientation.
+ XMStoreFloat3( &Out.Center, vCenter );
+ XMStoreFloat3( &Out.Extents, ( vMax - vMin ) * 0.5f );
+ XMStoreFloat4( &Out.Orientation, vOrientation );
+}
+
+
+/****************************************************************************
+ *
+ * BoundingFrustum
+ *
+ ****************************************************************************/
+
+//-----------------------------------------------------------------------------
+// Transform a frustum by an angle preserving transform.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingFrustum::Transform( BoundingFrustum& Out, CXMMATRIX M ) const
+{
+ // Load the frustum.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( vOrientation ) );
+
+ // Composite the frustum rotation and the transform rotation.
+ XMVECTOR Rotation = XMQuaternionRotationMatrix( M );
+ vOrientation = XMQuaternionMultiply( vOrientation, Rotation );
+
+ // Transform the center.
+ vOrigin = XMVector3Transform( vOrigin, M );
+
+ // Store the frustum.
+ XMStoreFloat3( &Out.Origin, vOrigin );
+ XMStoreFloat4( &Out.Orientation, vOrientation );
+
+ // Scale the near and far distances (the slopes remain the same).
+ XMVECTOR dX = XMVector3Dot( M.r[0], M.r[0] );
+ XMVECTOR dY = XMVector3Dot( M.r[1], M.r[1] );
+ XMVECTOR dZ = XMVector3Dot( M.r[2], M.r[2] );
+
+ XMVECTOR d = XMVectorMax( dX, XMVectorMax( dY, dZ ) );
+ float Scale = sqrtf( XMVectorGetX(d) );
+
+ Out.Near = Near * Scale;
+ Out.Far = Far * Scale;
+
+ // Copy the slopes.
+ Out.RightSlope = RightSlope;
+ Out.LeftSlope = LeftSlope;
+ Out.TopSlope = TopSlope;
+ Out.BottomSlope = BottomSlope;
+}
+
+_Use_decl_annotations_
+inline void BoundingFrustum::Transform( BoundingFrustum& Out, float Scale, FXMVECTOR Rotation, FXMVECTOR Translation ) const
+{
+ assert( DirectX::Internal::XMQuaternionIsUnit( Rotation ) );
+
+ // Load the frustum.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( vOrientation ) );
+
+ // Composite the frustum rotation and the transform rotation.
+ vOrientation = XMQuaternionMultiply( vOrientation, Rotation );
+
+ // Transform the origin.
+ vOrigin = XMVector3Rotate( vOrigin * XMVectorReplicate( Scale ), Rotation ) + Translation;
+
+ // Store the frustum.
+ XMStoreFloat3( &Out.Origin, vOrigin );
+ XMStoreFloat4( &Out.Orientation, vOrientation );
+
+ // Scale the near and far distances (the slopes remain the same).
+ Out.Near = Near * Scale;
+ Out.Far = Far * Scale;
+
+ // Copy the slopes.
+ Out.RightSlope = RightSlope;
+ Out.LeftSlope = LeftSlope;
+ Out.TopSlope = TopSlope;
+ Out.BottomSlope = BottomSlope;
+}
+
+
+//-----------------------------------------------------------------------------
+// Get the corner points of the frustum
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingFrustum::GetCorners( XMFLOAT3* Corners ) const
+{
+ assert( Corners != 0 );
+
+ // Load origin and orientation of the frustum.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( vOrientation ) );
+
+ // Build the corners of the frustum.
+ XMVECTOR vRightTop = XMVectorSet( RightSlope, TopSlope, 1.0f, 0.0f );
+ XMVECTOR vRightBottom = XMVectorSet( RightSlope, BottomSlope, 1.0f, 0.0f );
+ XMVECTOR vLeftTop = XMVectorSet( LeftSlope, TopSlope, 1.0f, 0.0f );
+ XMVECTOR vLeftBottom = XMVectorSet( LeftSlope, BottomSlope, 1.0f, 0.0f );
+ XMVECTOR vNear = XMVectorReplicatePtr( &Near );
+ XMVECTOR vFar = XMVectorReplicatePtr( &Far );
+
+ // Returns 8 corners position of bounding frustum.
+ // Near Far
+ // 0----1 4----5
+ // | | | |
+ // | | | |
+ // 3----2 7----6
+
+ XMVECTOR vCorners[CORNER_COUNT];
+ vCorners[0] = vLeftTop * vNear;
+ vCorners[1] = vRightTop * vNear;
+ vCorners[2] = vRightBottom * vNear;
+ vCorners[3] = vLeftBottom * vNear;
+ vCorners[4] = vLeftTop * vFar;
+ vCorners[5] = vRightTop * vFar;
+ vCorners[6] = vRightBottom * vFar;
+ vCorners[7] = vLeftBottom * vFar;
+
+ for( size_t i=0; i < CORNER_COUNT; ++i )
+ {
+ XMVECTOR C = XMVector3Rotate( vCorners[i], vOrientation ) + vOrigin;
+ XMStoreFloat3( &Corners[i], C );
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// Point in frustum test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingFrustum::Contains( FXMVECTOR Point ) const
+{
+ // Build frustum planes.
+ XMVECTOR Planes[6];
+ Planes[0] = XMVectorSet( 0.0f, 0.0f, -1.0f, Near );
+ Planes[1] = XMVectorSet( 0.0f, 0.0f, 1.0f, -Far );
+ Planes[2] = XMVectorSet( 1.0f, 0.0f, -RightSlope, 0.0f );
+ Planes[3] = XMVectorSet( -1.0f, 0.0f, LeftSlope, 0.0f );
+ Planes[4] = XMVectorSet( 0.0f, 1.0f, -TopSlope, 0.0f );
+ Planes[5] = XMVectorSet( 0.0f, -1.0f, BottomSlope, 0.0f );
+
+ // Load origin and orientation.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( vOrientation ) );
+
+ // Transform point into local space of frustum.
+ XMVECTOR TPoint = XMVector3InverseRotate( Point - vOrigin, vOrientation );
+
+ // Set w to one.
+ TPoint = XMVectorInsert<0, 0, 0, 0, 1>( TPoint, XMVectorSplatOne() );
+
+ XMVECTOR Zero = XMVectorZero();
+ XMVECTOR Outside = Zero;
+
+ // Test point against each plane of the frustum.
+ for( size_t i = 0; i < 6; ++i )
+ {
+ XMVECTOR Dot = XMVector4Dot( TPoint, Planes[i] );
+ Outside = XMVectorOrInt( Outside, XMVectorGreater( Dot, Zero ) );
+ }
+
+ return XMVector4NotEqualInt( Outside, XMVectorTrueInt() ) ? CONTAINS : DISJOINT;
+}
+
+
+//-----------------------------------------------------------------------------
+// Triangle vs frustum test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingFrustum::Contains( FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2 ) const
+{
+ // Load origin and orientation of the frustum.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ // Create 6 planes (do it inline to encourage use of registers)
+ XMVECTOR NearPlane = XMVectorSet( 0.0f, 0.0f, -1.0f, Near );
+ NearPlane = DirectX::Internal::XMPlaneTransform( NearPlane, vOrientation, vOrigin );
+ NearPlane = XMPlaneNormalize( NearPlane );
+
+ XMVECTOR FarPlane = XMVectorSet( 0.0f, 0.0f, 1.0f, -Far );
+ FarPlane = DirectX::Internal::XMPlaneTransform( FarPlane, vOrientation, vOrigin );
+ FarPlane = XMPlaneNormalize( FarPlane );
+
+ XMVECTOR RightPlane = XMVectorSet( 1.0f, 0.0f, -RightSlope, 0.0f );
+ RightPlane = DirectX::Internal::XMPlaneTransform( RightPlane, vOrientation, vOrigin );
+ RightPlane = XMPlaneNormalize( RightPlane );
+
+ XMVECTOR LeftPlane = XMVectorSet( -1.0f, 0.0f, LeftSlope, 0.0f );
+ LeftPlane = DirectX::Internal::XMPlaneTransform( LeftPlane, vOrientation, vOrigin );
+ LeftPlane = XMPlaneNormalize( LeftPlane );
+
+ XMVECTOR TopPlane = XMVectorSet( 0.0f, 1.0f, -TopSlope, 0.0f );
+ TopPlane = DirectX::Internal::XMPlaneTransform( TopPlane, vOrientation, vOrigin );
+ TopPlane = XMPlaneNormalize( TopPlane );
+
+ XMVECTOR BottomPlane = XMVectorSet( 0.0f, -1.0f, BottomSlope, 0.0f );
+ BottomPlane = DirectX::Internal::XMPlaneTransform( BottomPlane, vOrientation, vOrigin );
+ BottomPlane = XMPlaneNormalize( BottomPlane );
+
+ return TriangleTests::ContainedBy( V0, V1, V2, NearPlane, FarPlane, RightPlane, LeftPlane, TopPlane, BottomPlane );
+}
+
+
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingFrustum::Contains( const BoundingSphere& sh ) const
+{
+ // Load origin and orientation of the frustum.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ // Create 6 planes (do it inline to encourage use of registers)
+ XMVECTOR NearPlane = XMVectorSet( 0.0f, 0.0f, -1.0f, Near );
+ NearPlane = DirectX::Internal::XMPlaneTransform( NearPlane, vOrientation, vOrigin );
+ NearPlane = XMPlaneNormalize( NearPlane );
+
+ XMVECTOR FarPlane = XMVectorSet( 0.0f, 0.0f, 1.0f, -Far );
+ FarPlane = DirectX::Internal::XMPlaneTransform( FarPlane, vOrientation, vOrigin );
+ FarPlane = XMPlaneNormalize( FarPlane );
+
+ XMVECTOR RightPlane = XMVectorSet( 1.0f, 0.0f, -RightSlope, 0.0f );
+ RightPlane = DirectX::Internal::XMPlaneTransform( RightPlane, vOrientation, vOrigin );
+ RightPlane = XMPlaneNormalize( RightPlane );
+
+ XMVECTOR LeftPlane = XMVectorSet( -1.0f, 0.0f, LeftSlope, 0.0f );
+ LeftPlane = DirectX::Internal::XMPlaneTransform( LeftPlane, vOrientation, vOrigin );
+ LeftPlane = XMPlaneNormalize( LeftPlane );
+
+ XMVECTOR TopPlane = XMVectorSet( 0.0f, 1.0f, -TopSlope, 0.0f );
+ TopPlane = DirectX::Internal::XMPlaneTransform( TopPlane, vOrientation, vOrigin );
+ TopPlane = XMPlaneNormalize( TopPlane );
+
+ XMVECTOR BottomPlane = XMVectorSet( 0.0f, -1.0f, BottomSlope, 0.0f );
+ BottomPlane = DirectX::Internal::XMPlaneTransform( BottomPlane, vOrientation, vOrigin );
+ BottomPlane = XMPlaneNormalize( BottomPlane );
+
+ return sh.ContainedBy( NearPlane, FarPlane, RightPlane, LeftPlane, TopPlane, BottomPlane );
+}
+
+
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingFrustum::Contains( const BoundingBox& box ) const
+{
+ // Load origin and orientation of the frustum.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ // Create 6 planes (do it inline to encourage use of registers)
+ XMVECTOR NearPlane = XMVectorSet( 0.0f, 0.0f, -1.0f, Near );
+ NearPlane = DirectX::Internal::XMPlaneTransform( NearPlane, vOrientation, vOrigin );
+ NearPlane = XMPlaneNormalize( NearPlane );
+
+ XMVECTOR FarPlane = XMVectorSet( 0.0f, 0.0f, 1.0f, -Far );
+ FarPlane = DirectX::Internal::XMPlaneTransform( FarPlane, vOrientation, vOrigin );
+ FarPlane = XMPlaneNormalize( FarPlane );
+
+ XMVECTOR RightPlane = XMVectorSet( 1.0f, 0.0f, -RightSlope, 0.0f );
+ RightPlane = DirectX::Internal::XMPlaneTransform( RightPlane, vOrientation, vOrigin );
+ RightPlane = XMPlaneNormalize( RightPlane );
+
+ XMVECTOR LeftPlane = XMVectorSet( -1.0f, 0.0f, LeftSlope, 0.0f );
+ LeftPlane = DirectX::Internal::XMPlaneTransform( LeftPlane, vOrientation, vOrigin );
+ LeftPlane = XMPlaneNormalize( LeftPlane );
+
+ XMVECTOR TopPlane = XMVectorSet( 0.0f, 1.0f, -TopSlope, 0.0f );
+ TopPlane = DirectX::Internal::XMPlaneTransform( TopPlane, vOrientation, vOrigin );
+ TopPlane = XMPlaneNormalize( TopPlane );
+
+ XMVECTOR BottomPlane = XMVectorSet( 0.0f, -1.0f, BottomSlope, 0.0f );
+ BottomPlane = DirectX::Internal::XMPlaneTransform( BottomPlane, vOrientation, vOrigin );
+ BottomPlane = XMPlaneNormalize( BottomPlane );
+
+ return box.ContainedBy( NearPlane, FarPlane, RightPlane, LeftPlane, TopPlane, BottomPlane );
+}
+
+
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingFrustum::Contains( const BoundingOrientedBox& box ) const
+{
+ // Load origin and orientation of the frustum.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ // Create 6 planes (do it inline to encourage use of registers)
+ XMVECTOR NearPlane = XMVectorSet( 0.0f, 0.0f, -1.0f, Near );
+ NearPlane = DirectX::Internal::XMPlaneTransform( NearPlane, vOrientation, vOrigin );
+ NearPlane = XMPlaneNormalize( NearPlane );
+
+ XMVECTOR FarPlane = XMVectorSet( 0.0f, 0.0f, 1.0f, -Far );
+ FarPlane = DirectX::Internal::XMPlaneTransform( FarPlane, vOrientation, vOrigin );
+ FarPlane = XMPlaneNormalize( FarPlane );
+
+ XMVECTOR RightPlane = XMVectorSet( 1.0f, 0.0f, -RightSlope, 0.0f );
+ RightPlane = DirectX::Internal::XMPlaneTransform( RightPlane, vOrientation, vOrigin );
+ RightPlane = XMPlaneNormalize( RightPlane );
+
+ XMVECTOR LeftPlane = XMVectorSet( -1.0f, 0.0f, LeftSlope, 0.0f );
+ LeftPlane = DirectX::Internal::XMPlaneTransform( LeftPlane, vOrientation, vOrigin );
+ LeftPlane = XMPlaneNormalize( LeftPlane );
+
+ XMVECTOR TopPlane = XMVectorSet( 0.0f, 1.0f, -TopSlope, 0.0f );
+ TopPlane = DirectX::Internal::XMPlaneTransform( TopPlane, vOrientation, vOrigin );
+ TopPlane = XMPlaneNormalize( TopPlane );
+
+ XMVECTOR BottomPlane = XMVectorSet( 0.0f, -1.0f, BottomSlope, 0.0f );
+ BottomPlane = DirectX::Internal::XMPlaneTransform( BottomPlane, vOrientation, vOrigin );
+ BottomPlane = XMPlaneNormalize( BottomPlane );
+
+ return box.ContainedBy( NearPlane, FarPlane, RightPlane, LeftPlane, TopPlane, BottomPlane );
+}
+
+
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingFrustum::Contains( const BoundingFrustum& fr ) const
+{
+ // Load origin and orientation of the frustum.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ // Create 6 planes (do it inline to encourage use of registers)
+ XMVECTOR NearPlane = XMVectorSet( 0.0f, 0.0f, -1.0f, Near );
+ NearPlane = DirectX::Internal::XMPlaneTransform( NearPlane, vOrientation, vOrigin );
+ NearPlane = XMPlaneNormalize( NearPlane );
+
+ XMVECTOR FarPlane = XMVectorSet( 0.0f, 0.0f, 1.0f, -Far );
+ FarPlane = DirectX::Internal::XMPlaneTransform( FarPlane, vOrientation, vOrigin );
+ FarPlane = XMPlaneNormalize( FarPlane );
+
+ XMVECTOR RightPlane = XMVectorSet( 1.0f, 0.0f, -RightSlope, 0.0f );
+ RightPlane = DirectX::Internal::XMPlaneTransform( RightPlane, vOrientation, vOrigin );
+ RightPlane = XMPlaneNormalize( RightPlane );
+
+ XMVECTOR LeftPlane = XMVectorSet( -1.0f, 0.0f, LeftSlope, 0.0f );
+ LeftPlane = DirectX::Internal::XMPlaneTransform( LeftPlane, vOrientation, vOrigin );
+ LeftPlane = XMPlaneNormalize( LeftPlane );
+
+ XMVECTOR TopPlane = XMVectorSet( 0.0f, 1.0f, -TopSlope, 0.0f );
+ TopPlane = DirectX::Internal::XMPlaneTransform( TopPlane, vOrientation, vOrigin );
+ TopPlane = XMPlaneNormalize( TopPlane );
+
+ XMVECTOR BottomPlane = XMVectorSet( 0.0f, -1.0f, BottomSlope, 0.0f );
+ BottomPlane = DirectX::Internal::XMPlaneTransform( BottomPlane, vOrientation, vOrigin );
+ BottomPlane = XMPlaneNormalize( BottomPlane );
+
+ return fr.ContainedBy( NearPlane, FarPlane, RightPlane, LeftPlane, TopPlane, BottomPlane );
+}
+
+
+//-----------------------------------------------------------------------------
+// Exact sphere vs frustum test. The algorithm first checks the sphere against
+// the planes of the frustum, then if the plane checks were indeterminate finds
+// the nearest feature (plane, line, point) on the frustum to the center of the
+// sphere and compares the distance to the nearest feature to the radius of the
+// sphere
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingFrustum::Intersects( const BoundingSphere& sh ) const
+{
+ XMVECTOR Zero = XMVectorZero();
+
+ // Build the frustum planes.
+ XMVECTOR Planes[6];
+ Planes[0] = XMVectorSet( 0.0f, 0.0f, -1.0f, Near );
+ Planes[1] = XMVectorSet( 0.0f, 0.0f, 1.0f, -Far );
+ Planes[2] = XMVectorSet( 1.0f, 0.0f, -RightSlope, 0.0f );
+ Planes[3] = XMVectorSet( -1.0f, 0.0f, LeftSlope, 0.0f );
+ Planes[4] = XMVectorSet( 0.0f, 1.0f, -TopSlope, 0.0f );
+ Planes[5] = XMVectorSet( 0.0f, -1.0f, BottomSlope, 0.0f );
+
+ // Normalize the planes so we can compare to the sphere radius.
+ Planes[2] = XMVector3Normalize( Planes[2] );
+ Planes[3] = XMVector3Normalize( Planes[3] );
+ Planes[4] = XMVector3Normalize( Planes[4] );
+ Planes[5] = XMVector3Normalize( Planes[5] );
+
+ // Load origin and orientation of the frustum.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( vOrientation ) );
+
+ // Load the sphere.
+ XMVECTOR vCenter = XMLoadFloat3( &sh.Center );
+ XMVECTOR vRadius = XMVectorReplicatePtr( &sh.Radius );
+
+ // Transform the center of the sphere into the local space of frustum.
+ vCenter = XMVector3InverseRotate( vCenter - vOrigin, vOrientation );
+
+ // Set w of the center to one so we can dot4 with the plane.
+ vCenter = XMVectorInsert<0, 0, 0, 0, 1>( vCenter, XMVectorSplatOne() );
+
+ // Check against each plane of the frustum.
+ XMVECTOR Outside = XMVectorFalseInt();
+ XMVECTOR InsideAll = XMVectorTrueInt();
+ XMVECTOR CenterInsideAll = XMVectorTrueInt();
+
+ XMVECTOR Dist[6];
+
+ for( size_t i = 0; i < 6; ++i )
+ {
+ Dist[i] = XMVector4Dot( vCenter, Planes[i] );
+
+ // Outside the plane?
+ Outside = XMVectorOrInt( Outside, XMVectorGreater( Dist[i], vRadius ) );
+
+ // Fully inside the plane?
+ InsideAll = XMVectorAndInt( InsideAll, XMVectorLessOrEqual( Dist[i], -vRadius ) );
+
+ // Check if the center is inside the plane.
+ CenterInsideAll = XMVectorAndInt( CenterInsideAll, XMVectorLessOrEqual( Dist[i], Zero ) );
+ }
+
+ // If the sphere is outside any of the planes it is outside.
+ if ( XMVector4EqualInt( Outside, XMVectorTrueInt() ) )
+ return false;
+
+ // If the sphere is inside all planes it is fully inside.
+ if ( XMVector4EqualInt( InsideAll, XMVectorTrueInt() ) )
+ return true;
+
+ // If the center of the sphere is inside all planes and the sphere intersects
+ // one or more planes then it must intersect.
+ if ( XMVector4EqualInt( CenterInsideAll, XMVectorTrueInt() ) )
+ return true;
+
+ // The sphere may be outside the frustum or intersecting the frustum.
+ // Find the nearest feature (face, edge, or corner) on the frustum
+ // to the sphere.
+
+ // The faces adjacent to each face are:
+ static const size_t adjacent_faces[6][4] =
+ {
+ { 2, 3, 4, 5 }, // 0
+ { 2, 3, 4, 5 }, // 1
+ { 0, 1, 4, 5 }, // 2
+ { 0, 1, 4, 5 }, // 3
+ { 0, 1, 2, 3 }, // 4
+ { 0, 1, 2, 3 }
+ }; // 5
+
+ XMVECTOR Intersects = XMVectorFalseInt();
+
+ // Check to see if the nearest feature is one of the planes.
+ for( size_t i = 0; i < 6; ++i )
+ {
+ // Find the nearest point on the plane to the center of the sphere.
+ XMVECTOR Point = vCenter - (Planes[i] * Dist[i]);
+
+ // Set w of the point to one.
+ Point = XMVectorInsert<0, 0, 0, 0, 1>( Point, XMVectorSplatOne() );
+
+ // If the point is inside the face (inside the adjacent planes) then
+ // this plane is the nearest feature.
+ XMVECTOR InsideFace = XMVectorTrueInt();
+
+ for ( size_t j = 0; j < 4; j++ )
+ {
+ size_t plane_index = adjacent_faces[i][j];
+
+ InsideFace = XMVectorAndInt( InsideFace,
+ XMVectorLessOrEqual( XMVector4Dot( Point, Planes[plane_index] ), Zero ) );
+ }
+
+ // Since we have already checked distance from the plane we know that the
+ // sphere must intersect if this plane is the nearest feature.
+ Intersects = XMVectorOrInt( Intersects,
+ XMVectorAndInt( XMVectorGreater( Dist[i], Zero ), InsideFace ) );
+ }
+
+ if ( XMVector4EqualInt( Intersects, XMVectorTrueInt() ) )
+ return true;
+
+ // Build the corners of the frustum.
+ XMVECTOR vRightTop = XMVectorSet( RightSlope, TopSlope, 1.0f, 0.0f );
+ XMVECTOR vRightBottom = XMVectorSet( RightSlope, BottomSlope, 1.0f, 0.0f );
+ XMVECTOR vLeftTop = XMVectorSet( LeftSlope, TopSlope, 1.0f, 0.0f );
+ XMVECTOR vLeftBottom = XMVectorSet( LeftSlope, BottomSlope, 1.0f, 0.0f );
+ XMVECTOR vNear = XMVectorReplicatePtr( &Near );
+ XMVECTOR vFar = XMVectorReplicatePtr( &Far );
+
+ XMVECTOR Corners[CORNER_COUNT];
+ Corners[0] = vRightTop * vNear;
+ Corners[1] = vRightBottom * vNear;
+ Corners[2] = vLeftTop * vNear;
+ Corners[3] = vLeftBottom * vNear;
+ Corners[4] = vRightTop * vFar;
+ Corners[5] = vRightBottom * vFar;
+ Corners[6] = vLeftTop * vFar;
+ Corners[7] = vLeftBottom * vFar;
+
+ // The Edges are:
+ static const size_t edges[12][2] =
+ {
+ { 0, 1 }, { 2, 3 }, { 0, 2 }, { 1, 3 }, // Near plane
+ { 4, 5 }, { 6, 7 }, { 4, 6 }, { 5, 7 }, // Far plane
+ { 0, 4 }, { 1, 5 }, { 2, 6 }, { 3, 7 },
+ }; // Near to far
+
+ XMVECTOR RadiusSq = vRadius * vRadius;
+
+ // Check to see if the nearest feature is one of the edges (or corners).
+ for( size_t i = 0; i < 12; ++i )
+ {
+ size_t ei0 = edges[i][0];
+ size_t ei1 = edges[i][1];
+
+ // Find the nearest point on the edge to the center of the sphere.
+ // The corners of the frustum are included as the endpoints of the edges.
+ XMVECTOR Point = DirectX::Internal::PointOnLineSegmentNearestPoint( Corners[ei0], Corners[ei1], vCenter );
+
+ XMVECTOR Delta = vCenter - Point;
+
+ XMVECTOR DistSq = XMVector3Dot( Delta, Delta );
+
+ // If the distance to the center of the sphere to the point is less than
+ // the radius of the sphere then it must intersect.
+ Intersects = XMVectorOrInt( Intersects, XMVectorLessOrEqual( DistSq, RadiusSq ) );
+ }
+
+ if ( XMVector4EqualInt( Intersects, XMVectorTrueInt() ) )
+ return true;
+
+ // The sphere must be outside the frustum.
+ return false;
+}
+
+
+//-----------------------------------------------------------------------------
+// Exact axis aligned box vs frustum test. Constructs an oriented box and uses
+// the oriented box vs frustum test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingFrustum::Intersects( const BoundingBox& box ) const
+{
+ // Make the axis aligned box oriented and do an OBB vs frustum test.
+ BoundingOrientedBox obox( box.Center, box.Extents, XMFLOAT4( 0.f, 0.f, 0.f, 1.f ) );
+ return Intersects( obox );
+}
+
+
+//-----------------------------------------------------------------------------
+// Exact oriented box vs frustum test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingFrustum::Intersects( const BoundingOrientedBox& box ) const
+{
+ static const XMVECTORI32 SelectY =
+ {
+ XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0
+ };
+ static const XMVECTORI32 SelectZ =
+ {
+ XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0
+ };
+
+ XMVECTOR Zero = XMVectorZero();
+
+ // Build the frustum planes.
+ XMVECTOR Planes[6];
+ Planes[0] = XMVectorSet( 0.0f, 0.0f, -1.0f, Near );
+ Planes[1] = XMVectorSet( 0.0f, 0.0f, 1.0f, -Far );
+ Planes[2] = XMVectorSet( 1.0f, 0.0f, -RightSlope, 0.0f );
+ Planes[3] = XMVectorSet( -1.0f, 0.0f, LeftSlope, 0.0f );
+ Planes[4] = XMVectorSet( 0.0f, 1.0f, -TopSlope, 0.0f );
+ Planes[5] = XMVectorSet( 0.0f, -1.0f, BottomSlope, 0.0f );
+
+ // Load origin and orientation of the frustum.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR FrustumOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( FrustumOrientation ) );
+
+ // Load the box.
+ XMVECTOR Center = XMLoadFloat3( &box.Center );
+ XMVECTOR Extents = XMLoadFloat3( &box.Extents );
+ XMVECTOR BoxOrientation = XMLoadFloat4( &box.Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( BoxOrientation ) );
+
+ // Transform the oriented box into the space of the frustum in order to
+ // minimize the number of transforms we have to do.
+ Center = XMVector3InverseRotate( Center - vOrigin, FrustumOrientation );
+ BoxOrientation = XMQuaternionMultiply( BoxOrientation, XMQuaternionConjugate( FrustumOrientation ) );
+
+ // Set w of the center to one so we can dot4 with the plane.
+ Center = XMVectorInsert<0, 0, 0, 0, 1>( Center, XMVectorSplatOne() );
+
+ // Build the 3x3 rotation matrix that defines the box axes.
+ XMMATRIX R = XMMatrixRotationQuaternion( BoxOrientation );
+
+ // Check against each plane of the frustum.
+ XMVECTOR Outside = XMVectorFalseInt();
+ XMVECTOR InsideAll = XMVectorTrueInt();
+ XMVECTOR CenterInsideAll = XMVectorTrueInt();
+
+ for( size_t i = 0; i < 6; ++i )
+ {
+ // Compute the distance to the center of the box.
+ XMVECTOR Dist = XMVector4Dot( Center, Planes[i] );
+
+ // Project the axes of the box onto the normal of the plane. Half the
+ // length of the projection (sometime called the "radius") is equal to
+ // h(u) * abs(n dot b(u))) + h(v) * abs(n dot b(v)) + h(w) * abs(n dot b(w))
+ // where h(i) are extents of the box, n is the plane normal, and b(i) are the
+ // axes of the box.
+ XMVECTOR Radius = XMVector3Dot( Planes[i], R.r[0] );
+ Radius = XMVectorSelect( Radius, XMVector3Dot( Planes[i], R.r[1] ), SelectY );
+ Radius = XMVectorSelect( Radius, XMVector3Dot( Planes[i], R.r[2] ), SelectZ );
+ Radius = XMVector3Dot( Extents, XMVectorAbs( Radius ) );
+
+ // Outside the plane?
+ Outside = XMVectorOrInt( Outside, XMVectorGreater( Dist, Radius ) );
+
+ // Fully inside the plane?
+ InsideAll = XMVectorAndInt( InsideAll, XMVectorLessOrEqual( Dist, -Radius ) );
+
+ // Check if the center is inside the plane.
+ CenterInsideAll = XMVectorAndInt( CenterInsideAll, XMVectorLessOrEqual( Dist, Zero ) );
+ }
+
+ // If the box is outside any of the planes it is outside.
+ if ( XMVector4EqualInt( Outside, XMVectorTrueInt() ) )
+ return false;
+
+ // If the box is inside all planes it is fully inside.
+ if ( XMVector4EqualInt( InsideAll, XMVectorTrueInt() ) )
+ return true;
+
+ // If the center of the box is inside all planes and the box intersects
+ // one or more planes then it must intersect.
+ if ( XMVector4EqualInt( CenterInsideAll, XMVectorTrueInt() ) )
+ return true;
+
+ // Build the corners of the frustum.
+ XMVECTOR vRightTop = XMVectorSet( RightSlope, TopSlope, 1.0f, 0.0f );
+ XMVECTOR vRightBottom = XMVectorSet( RightSlope, BottomSlope, 1.0f, 0.0f );
+ XMVECTOR vLeftTop = XMVectorSet( LeftSlope, TopSlope, 1.0f, 0.0f );
+ XMVECTOR vLeftBottom = XMVectorSet( LeftSlope, BottomSlope, 1.0f, 0.0f );
+ XMVECTOR vNear = XMVectorReplicatePtr( &Near );
+ XMVECTOR vFar = XMVectorReplicatePtr( &Far );
+
+ XMVECTOR Corners[CORNER_COUNT];
+ Corners[0] = vRightTop * vNear;
+ Corners[1] = vRightBottom * vNear;
+ Corners[2] = vLeftTop * vNear;
+ Corners[3] = vLeftBottom * vNear;
+ Corners[4] = vRightTop * vFar;
+ Corners[5] = vRightBottom * vFar;
+ Corners[6] = vLeftTop * vFar;
+ Corners[7] = vLeftBottom * vFar;
+
+ // Test against box axes (3)
+ {
+ // Find the min/max values of the projection of the frustum onto each axis.
+ XMVECTOR FrustumMin, FrustumMax;
+
+ FrustumMin = XMVector3Dot( Corners[0], R.r[0] );
+ FrustumMin = XMVectorSelect( FrustumMin, XMVector3Dot( Corners[0], R.r[1] ), SelectY );
+ FrustumMin = XMVectorSelect( FrustumMin, XMVector3Dot( Corners[0], R.r[2] ), SelectZ );
+ FrustumMax = FrustumMin;
+
+ for( size_t i = 1; i < BoundingOrientedBox::CORNER_COUNT; ++i )
+ {
+ XMVECTOR Temp = XMVector3Dot( Corners[i], R.r[0] );
+ Temp = XMVectorSelect( Temp, XMVector3Dot( Corners[i], R.r[1] ), SelectY );
+ Temp = XMVectorSelect( Temp, XMVector3Dot( Corners[i], R.r[2] ), SelectZ );
+
+ FrustumMin = XMVectorMin( FrustumMin, Temp );
+ FrustumMax = XMVectorMax( FrustumMax, Temp );
+ }
+
+ // Project the center of the box onto the axes.
+ XMVECTOR BoxDist = XMVector3Dot( Center, R.r[0] );
+ BoxDist = XMVectorSelect( BoxDist, XMVector3Dot( Center, R.r[1] ), SelectY );
+ BoxDist = XMVectorSelect( BoxDist, XMVector3Dot( Center, R.r[2] ), SelectZ );
+
+ // The projection of the box onto the axis is just its Center and Extents.
+ // if (min > box_max || max < box_min) reject;
+ XMVECTOR Result = XMVectorOrInt( XMVectorGreater( FrustumMin, BoxDist + Extents ),
+ XMVectorLess( FrustumMax, BoxDist - Extents ) );
+
+ if( DirectX::Internal::XMVector3AnyTrue( Result ) )
+ return false;
+ }
+
+ // Test against edge/edge axes (3*6).
+ XMVECTOR FrustumEdgeAxis[6];
+
+ FrustumEdgeAxis[0] = vRightTop;
+ FrustumEdgeAxis[1] = vRightBottom;
+ FrustumEdgeAxis[2] = vLeftTop;
+ FrustumEdgeAxis[3] = vLeftBottom;
+ FrustumEdgeAxis[4] = vRightTop - vLeftTop;
+ FrustumEdgeAxis[5] = vLeftBottom - vLeftTop;
+
+ for( size_t i = 0; i < 3; ++i )
+ {
+ for( size_t j = 0; j < 6; j++ )
+ {
+ // Compute the axis we are going to test.
+ XMVECTOR Axis = XMVector3Cross( R.r[i], FrustumEdgeAxis[j] );
+
+ // Find the min/max values of the projection of the frustum onto the axis.
+ XMVECTOR FrustumMin, FrustumMax;
+
+ FrustumMin = FrustumMax = XMVector3Dot( Axis, Corners[0] );
+
+ for( size_t k = 1; k < CORNER_COUNT; k++ )
+ {
+ XMVECTOR Temp = XMVector3Dot( Axis, Corners[k] );
+ FrustumMin = XMVectorMin( FrustumMin, Temp );
+ FrustumMax = XMVectorMax( FrustumMax, Temp );
+ }
+
+ // Project the center of the box onto the axis.
+ XMVECTOR Dist = XMVector3Dot( Center, Axis );
+
+ // Project the axes of the box onto the axis to find the "radius" of the box.
+ XMVECTOR Radius = XMVector3Dot( Axis, R.r[0] );
+ Radius = XMVectorSelect( Radius, XMVector3Dot( Axis, R.r[1] ), SelectY );
+ Radius = XMVectorSelect( Radius, XMVector3Dot( Axis, R.r[2] ), SelectZ );
+ Radius = XMVector3Dot( Extents, XMVectorAbs( Radius ) );
+
+ // if (center > max + radius || center < min - radius) reject;
+ Outside = XMVectorOrInt( Outside, XMVectorGreater( Dist, FrustumMax + Radius ) );
+ Outside = XMVectorOrInt( Outside, XMVectorLess( Dist, FrustumMin - Radius ) );
+ }
+ }
+
+ if ( XMVector4EqualInt( Outside, XMVectorTrueInt() ) )
+ return false;
+
+ // If we did not find a separating plane then the box must intersect the frustum.
+ return true;
+}
+
+
+//-----------------------------------------------------------------------------
+// Exact frustum vs frustum test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingFrustum::Intersects( const BoundingFrustum& fr ) const
+{
+ // Load origin and orientation of frustum B.
+ XMVECTOR OriginB = XMLoadFloat3( &Origin );
+ XMVECTOR OrientationB = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( OrientationB ) );
+
+ // Build the planes of frustum B.
+ XMVECTOR AxisB[6];
+ AxisB[0] = XMVectorSet( 0.0f, 0.0f, -1.0f, 0.0f );
+ AxisB[1] = XMVectorSet( 0.0f, 0.0f, 1.0f, 0.0f );
+ AxisB[2] = XMVectorSet( 1.0f, 0.0f, -RightSlope, 0.0f );
+ AxisB[3] = XMVectorSet( -1.0f, 0.0f, LeftSlope, 0.0f );
+ AxisB[4] = XMVectorSet( 0.0f, 1.0f, -TopSlope, 0.0f );
+ AxisB[5] = XMVectorSet( 0.0f, -1.0f, BottomSlope, 0.0f );
+
+ XMVECTOR PlaneDistB[6];
+ PlaneDistB[0] = -XMVectorReplicatePtr( &Near );
+ PlaneDistB[1] = XMVectorReplicatePtr( &Far );
+ PlaneDistB[2] = XMVectorZero();
+ PlaneDistB[3] = XMVectorZero();
+ PlaneDistB[4] = XMVectorZero();
+ PlaneDistB[5] = XMVectorZero();
+
+ // Load origin and orientation of frustum A.
+ XMVECTOR OriginA = XMLoadFloat3( &fr.Origin );
+ XMVECTOR OrientationA = XMLoadFloat4( &fr.Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( OrientationA ) );
+
+ // Transform frustum A into the space of the frustum B in order to
+ // minimize the number of transforms we have to do.
+ OriginA = XMVector3InverseRotate( OriginA - OriginB, OrientationB );
+ OrientationA = XMQuaternionMultiply( OrientationA, XMQuaternionConjugate( OrientationB ) );
+
+ // Build the corners of frustum A (in the local space of B).
+ XMVECTOR RightTopA = XMVectorSet( fr.RightSlope, fr.TopSlope, 1.0f, 0.0f );
+ XMVECTOR RightBottomA = XMVectorSet( fr.RightSlope, fr.BottomSlope, 1.0f, 0.0f );
+ XMVECTOR LeftTopA = XMVectorSet(fr.LeftSlope,fr.TopSlope, 1.0f, 0.0f );
+ XMVECTOR LeftBottomA = XMVectorSet( fr.LeftSlope, fr.BottomSlope, 1.0f, 0.0f );
+ XMVECTOR NearA = XMVectorReplicatePtr( &fr.Near );
+ XMVECTOR FarA = XMVectorReplicatePtr( &fr.Far );
+
+ RightTopA = XMVector3Rotate( RightTopA, OrientationA );
+ RightBottomA = XMVector3Rotate( RightBottomA, OrientationA );
+ LeftTopA = XMVector3Rotate( LeftTopA, OrientationA );
+ LeftBottomA = XMVector3Rotate( LeftBottomA, OrientationA );
+
+ XMVECTOR CornersA[CORNER_COUNT];
+ CornersA[0] = OriginA + RightTopA * NearA;
+ CornersA[1] = OriginA + RightBottomA * NearA;
+ CornersA[2] = OriginA + LeftTopA * NearA;
+ CornersA[3] = OriginA + LeftBottomA * NearA;
+ CornersA[4] = OriginA + RightTopA * FarA;
+ CornersA[5] = OriginA + RightBottomA * FarA;
+ CornersA[6] = OriginA + LeftTopA * FarA;
+ CornersA[7] = OriginA + LeftBottomA * FarA;
+
+ // Check frustum A against each plane of frustum B.
+ XMVECTOR Outside = XMVectorFalseInt();
+ XMVECTOR InsideAll = XMVectorTrueInt();
+
+ for( size_t i = 0; i < 6; ++i )
+ {
+ // Find the min/max projection of the frustum onto the plane normal.
+ XMVECTOR Min, Max;
+
+ Min = Max = XMVector3Dot( AxisB[i], CornersA[0] );
+
+ for( size_t j = 1; j < CORNER_COUNT; j++ )
+ {
+ XMVECTOR Temp = XMVector3Dot( AxisB[i], CornersA[j] );
+ Min = XMVectorMin( Min, Temp );
+ Max = XMVectorMax( Max, Temp );
+ }
+
+ // Outside the plane?
+ Outside = XMVectorOrInt( Outside, XMVectorGreater( Min, PlaneDistB[i] ) );
+
+ // Fully inside the plane?
+ InsideAll = XMVectorAndInt( InsideAll, XMVectorLessOrEqual( Max, PlaneDistB[i] ) );
+ }
+
+ // If the frustum A is outside any of the planes of frustum B it is outside.
+ if ( XMVector4EqualInt( Outside, XMVectorTrueInt() ) )
+ return false;
+
+ // If frustum A is inside all planes of frustum B it is fully inside.
+ if ( XMVector4EqualInt( InsideAll, XMVectorTrueInt() ) )
+ return true;
+
+ // Build the corners of frustum B.
+ XMVECTOR RightTopB = XMVectorSet( RightSlope, TopSlope, 1.0f, 0.0f );
+ XMVECTOR RightBottomB = XMVectorSet( RightSlope, BottomSlope, 1.0f, 0.0f );
+ XMVECTOR LeftTopB = XMVectorSet( LeftSlope, TopSlope, 1.0f, 0.0f );
+ XMVECTOR LeftBottomB = XMVectorSet( LeftSlope, BottomSlope, 1.0f, 0.0f );
+ XMVECTOR NearB = XMVectorReplicatePtr( &Near );
+ XMVECTOR FarB = XMVectorReplicatePtr( &Far );
+
+ XMVECTOR CornersB[BoundingFrustum::CORNER_COUNT];
+ CornersB[0] = RightTopB * NearB;
+ CornersB[1] = RightBottomB * NearB;
+ CornersB[2] = LeftTopB * NearB;
+ CornersB[3] = LeftBottomB * NearB;
+ CornersB[4] = RightTopB * FarB;
+ CornersB[5] = RightBottomB * FarB;
+ CornersB[6] = LeftTopB * FarB;
+ CornersB[7] = LeftBottomB * FarB;
+
+ // Build the planes of frustum A (in the local space of B).
+ XMVECTOR AxisA[6];
+ XMVECTOR PlaneDistA[6];
+
+ AxisA[0] = XMVectorSet( 0.0f, 0.0f, -1.0f, 0.0f );
+ AxisA[1] = XMVectorSet( 0.0f, 0.0f, 1.0f, 0.0f );
+ AxisA[2] = XMVectorSet( 1.0f, 0.0f, -fr.RightSlope, 0.0f );
+ AxisA[3] = XMVectorSet( -1.0f, 0.0f, fr.LeftSlope, 0.0f );
+ AxisA[4] = XMVectorSet( 0.0f, 1.0f, -fr.TopSlope, 0.0f );
+ AxisA[5] = XMVectorSet( 0.0f, -1.0f, fr.BottomSlope, 0.0f );
+
+ AxisA[0] = XMVector3Rotate( AxisA[0], OrientationA );
+ AxisA[1] = -AxisA[0];
+ AxisA[2] = XMVector3Rotate( AxisA[2], OrientationA );
+ AxisA[3] = XMVector3Rotate( AxisA[3], OrientationA );
+ AxisA[4] = XMVector3Rotate( AxisA[4], OrientationA );
+ AxisA[5] = XMVector3Rotate( AxisA[5], OrientationA );
+
+ PlaneDistA[0] = XMVector3Dot( AxisA[0], CornersA[0] ); // Re-use corner on near plane.
+ PlaneDistA[1] = XMVector3Dot( AxisA[1], CornersA[4] ); // Re-use corner on far plane.
+ PlaneDistA[2] = XMVector3Dot( AxisA[2], OriginA );
+ PlaneDistA[3] = XMVector3Dot( AxisA[3], OriginA );
+ PlaneDistA[4] = XMVector3Dot( AxisA[4], OriginA );
+ PlaneDistA[5] = XMVector3Dot( AxisA[5], OriginA );
+
+ // Check each axis of frustum A for a seperating plane (5).
+ for( size_t i = 0; i < 6; ++i )
+ {
+ // Find the minimum projection of the frustum onto the plane normal.
+ XMVECTOR Min;
+
+ Min = XMVector3Dot( AxisA[i], CornersB[0] );
+
+ for( size_t j = 1; j < CORNER_COUNT; j++ )
+ {
+ XMVECTOR Temp = XMVector3Dot( AxisA[i], CornersB[j] );
+ Min = XMVectorMin( Min, Temp );
+ }
+
+ // Outside the plane?
+ Outside = XMVectorOrInt( Outside, XMVectorGreater( Min, PlaneDistA[i] ) );
+ }
+
+ // If the frustum B is outside any of the planes of frustum A it is outside.
+ if ( XMVector4EqualInt( Outside, XMVectorTrueInt() ) )
+ return false;
+
+ // Check edge/edge axes (6 * 6).
+ XMVECTOR FrustumEdgeAxisA[6];
+ FrustumEdgeAxisA[0] = RightTopA;
+ FrustumEdgeAxisA[1] = RightBottomA;
+ FrustumEdgeAxisA[2] = LeftTopA;
+ FrustumEdgeAxisA[3] = LeftBottomA;
+ FrustumEdgeAxisA[4] = RightTopA - LeftTopA;
+ FrustumEdgeAxisA[5] = LeftBottomA - LeftTopA;
+
+ XMVECTOR FrustumEdgeAxisB[6];
+ FrustumEdgeAxisB[0] = RightTopB;
+ FrustumEdgeAxisB[1] = RightBottomB;
+ FrustumEdgeAxisB[2] = LeftTopB;
+ FrustumEdgeAxisB[3] = LeftBottomB;
+ FrustumEdgeAxisB[4] = RightTopB - LeftTopB;
+ FrustumEdgeAxisB[5] = LeftBottomB - LeftTopB;
+
+ for( size_t i = 0; i < 6; ++i )
+ {
+ for( size_t j = 0; j < 6; j++ )
+ {
+ // Compute the axis we are going to test.
+ XMVECTOR Axis = XMVector3Cross( FrustumEdgeAxisA[i], FrustumEdgeAxisB[j] );
+
+ // Find the min/max values of the projection of both frustums onto the axis.
+ XMVECTOR MinA, MaxA;
+ XMVECTOR MinB, MaxB;
+
+ MinA = MaxA = XMVector3Dot( Axis, CornersA[0] );
+ MinB = MaxB = XMVector3Dot( Axis, CornersB[0] );
+
+ for( size_t k = 1; k < CORNER_COUNT; k++ )
+ {
+ XMVECTOR TempA = XMVector3Dot( Axis, CornersA[k] );
+ MinA = XMVectorMin( MinA, TempA );
+ MaxA = XMVectorMax( MaxA, TempA );
+
+ XMVECTOR TempB = XMVector3Dot( Axis, CornersB[k] );
+ MinB = XMVectorMin( MinB, TempB );
+ MaxB = XMVectorMax( MaxB, TempB );
+ }
+
+ // if (MinA > MaxB || MinB > MaxA) reject
+ Outside = XMVectorOrInt( Outside, XMVectorGreater( MinA, MaxB ) );
+ Outside = XMVectorOrInt( Outside, XMVectorGreater( MinB, MaxA ) );
+ }
+ }
+
+ // If there is a seperating plane, then the frustums do not intersect.
+ if ( XMVector4EqualInt( Outside, XMVectorTrueInt() ) )
+ return false;
+
+ // If we did not find a separating plane then the frustums intersect.
+ return true;
+}
+
+
+//-----------------------------------------------------------------------------
+// Triangle vs frustum test.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingFrustum::Intersects( FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2 ) const
+{
+ // Build the frustum planes (NOTE: D is negated from the usual).
+ XMVECTOR Planes[6];
+ Planes[0] = XMVectorSet( 0.0f, 0.0f, -1.0f, -Near );
+ Planes[1] = XMVectorSet( 0.0f, 0.0f, 1.0f, Far );
+ Planes[2] = XMVectorSet( 1.0f, 0.0f, -RightSlope, 0.0f );
+ Planes[3] = XMVectorSet( -1.0f, 0.0f, LeftSlope, 0.0f );
+ Planes[4] = XMVectorSet( 0.0f, 1.0f, -TopSlope, 0.0f );
+ Planes[5] = XMVectorSet( 0.0f, -1.0f, BottomSlope, 0.0f );
+
+ // Load origin and orientation of the frustum.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( vOrientation ) );
+
+ // Transform triangle into the local space of frustum.
+ XMVECTOR TV0 = XMVector3InverseRotate( V0 - vOrigin, vOrientation );
+ XMVECTOR TV1 = XMVector3InverseRotate( V1 - vOrigin, vOrientation );
+ XMVECTOR TV2 = XMVector3InverseRotate( V2 - vOrigin, vOrientation );
+
+ // Test each vertex of the triangle against the frustum planes.
+ XMVECTOR Outside = XMVectorFalseInt();
+ XMVECTOR InsideAll = XMVectorTrueInt();
+
+ for( size_t i = 0; i < 6; ++i )
+ {
+ XMVECTOR Dist0 = XMVector3Dot( TV0, Planes[i] );
+ XMVECTOR Dist1 = XMVector3Dot( TV1, Planes[i] );
+ XMVECTOR Dist2 = XMVector3Dot( TV2, Planes[i] );
+
+ XMVECTOR MinDist = XMVectorMin( Dist0, Dist1 );
+ MinDist = XMVectorMin( MinDist, Dist2 );
+ XMVECTOR MaxDist = XMVectorMax( Dist0, Dist1 );
+ MaxDist = XMVectorMax( MaxDist, Dist2 );
+
+ XMVECTOR PlaneDist = XMVectorSplatW( Planes[i] );
+
+ // Outside the plane?
+ Outside = XMVectorOrInt( Outside, XMVectorGreater( MinDist, PlaneDist ) );
+
+ // Fully inside the plane?
+ InsideAll = XMVectorAndInt( InsideAll, XMVectorLessOrEqual( MaxDist, PlaneDist ) );
+ }
+
+ // If the triangle is outside any of the planes it is outside.
+ if ( XMVector4EqualInt( Outside, XMVectorTrueInt() ) )
+ return false;
+
+ // If the triangle is inside all planes it is fully inside.
+ if ( XMVector4EqualInt( InsideAll, XMVectorTrueInt() ) )
+ return true;
+
+ // Build the corners of the frustum.
+ XMVECTOR vRightTop = XMVectorSet( RightSlope, TopSlope, 1.0f, 0.0f );
+ XMVECTOR vRightBottom = XMVectorSet( RightSlope, BottomSlope, 1.0f, 0.0f );
+ XMVECTOR vLeftTop = XMVectorSet( LeftSlope, TopSlope, 1.0f, 0.0f );
+ XMVECTOR vLeftBottom = XMVectorSet( LeftSlope, BottomSlope, 1.0f, 0.0f );
+ XMVECTOR vNear = XMVectorReplicatePtr( &Near );
+ XMVECTOR vFar = XMVectorReplicatePtr( &Far );
+
+ XMVECTOR Corners[CORNER_COUNT];
+ Corners[0] = vRightTop * vNear;
+ Corners[1] = vRightBottom * vNear;
+ Corners[2] = vLeftTop * vNear;
+ Corners[3] = vLeftBottom * vNear;
+ Corners[4] = vRightTop * vFar;
+ Corners[5] = vRightBottom * vFar;
+ Corners[6] = vLeftTop * vFar;
+ Corners[7] = vLeftBottom * vFar;
+
+ // Test the plane of the triangle.
+ XMVECTOR Normal = XMVector3Cross( V1 - V0, V2 - V0 );
+ XMVECTOR Dist = XMVector3Dot( Normal, V0 );
+
+ XMVECTOR MinDist, MaxDist;
+ MinDist = MaxDist = XMVector3Dot( Corners[0], Normal );
+ for( size_t i = 1; i < CORNER_COUNT; ++i )
+ {
+ XMVECTOR Temp = XMVector3Dot( Corners[i], Normal );
+ MinDist = XMVectorMin( MinDist, Temp );
+ MaxDist = XMVectorMax( MaxDist, Temp );
+ }
+
+ Outside = XMVectorOrInt( XMVectorGreater( MinDist, Dist ), XMVectorLess( MaxDist, Dist ) );
+ if ( XMVector4EqualInt( Outside, XMVectorTrueInt() ) )
+ return false;
+
+ // Check the edge/edge axes (3*6).
+ XMVECTOR TriangleEdgeAxis[3];
+ TriangleEdgeAxis[0] = V1 - V0;
+ TriangleEdgeAxis[1] = V2 - V1;
+ TriangleEdgeAxis[2] = V0 - V2;
+
+ XMVECTOR FrustumEdgeAxis[6];
+ FrustumEdgeAxis[0] = vRightTop;
+ FrustumEdgeAxis[1] = vRightBottom;
+ FrustumEdgeAxis[2] = vLeftTop;
+ FrustumEdgeAxis[3] = vLeftBottom;
+ FrustumEdgeAxis[4] = vRightTop - vLeftTop;
+ FrustumEdgeAxis[5] = vLeftBottom - vLeftTop;
+
+ for( size_t i = 0; i < 3; ++i )
+ {
+ for( size_t j = 0; j < 6; j++ )
+ {
+ // Compute the axis we are going to test.
+ XMVECTOR Axis = XMVector3Cross( TriangleEdgeAxis[i], FrustumEdgeAxis[j] );
+
+ // Find the min/max of the projection of the triangle onto the axis.
+ XMVECTOR MinA, MaxA;
+
+ XMVECTOR Dist0 = XMVector3Dot( V0, Axis );
+ XMVECTOR Dist1 = XMVector3Dot( V1, Axis );
+ XMVECTOR Dist2 = XMVector3Dot( V2, Axis );
+
+ MinA = XMVectorMin( Dist0, Dist1 );
+ MinA = XMVectorMin( MinA, Dist2 );
+ MaxA = XMVectorMax( Dist0, Dist1 );
+ MaxA = XMVectorMax( MaxA, Dist2 );
+
+ // Find the min/max of the projection of the frustum onto the axis.
+ XMVECTOR MinB, MaxB;
+
+ MinB = MaxB = XMVector3Dot( Axis, Corners[0] );
+
+ for( size_t k = 1; k < CORNER_COUNT; k++ )
+ {
+ XMVECTOR Temp = XMVector3Dot( Axis, Corners[k] );
+ MinB = XMVectorMin( MinB, Temp );
+ MaxB = XMVectorMax( MaxB, Temp );
+ }
+
+ // if (MinA > MaxB || MinB > MaxA) reject;
+ Outside = XMVectorOrInt( Outside, XMVectorGreater( MinA, MaxB ) );
+ Outside = XMVectorOrInt( Outside, XMVectorGreater( MinB, MaxA ) );
+ }
+ }
+
+ if ( XMVector4EqualInt( Outside, XMVectorTrueInt() ) )
+ return false;
+
+ // If we did not find a separating plane then the triangle must intersect the frustum.
+ return true;
+}
+
+
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PlaneIntersectionType BoundingFrustum::Intersects( FXMVECTOR Plane ) const
+{
+ assert( DirectX::Internal::XMPlaneIsUnit( Plane ) );
+
+ // Load origin and orientation of the frustum.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( vOrientation ) );
+
+ // Set w of the origin to one so we can dot4 with a plane.
+ vOrigin = XMVectorInsert<0, 0, 0, 0, 1>( vOrigin, XMVectorSplatOne() );
+
+ // Build the corners of the frustum (in world space).
+ XMVECTOR RightTop = XMVectorSet( RightSlope, TopSlope, 1.0f, 0.0f );
+ XMVECTOR RightBottom = XMVectorSet( RightSlope, BottomSlope, 1.0f, 0.0f );
+ XMVECTOR LeftTop = XMVectorSet( LeftSlope, TopSlope, 1.0f, 0.0f );
+ XMVECTOR LeftBottom = XMVectorSet( LeftSlope, BottomSlope, 1.0f, 0.0f );
+ XMVECTOR vNear = XMVectorReplicatePtr( &Near );
+ XMVECTOR vFar = XMVectorReplicatePtr( &Far );
+
+ RightTop = XMVector3Rotate( RightTop, vOrientation );
+ RightBottom = XMVector3Rotate( RightBottom, vOrientation );
+ LeftTop = XMVector3Rotate( LeftTop, vOrientation );
+ LeftBottom = XMVector3Rotate( LeftBottom, vOrientation );
+
+ XMVECTOR Corners0 = vOrigin + RightTop * vNear;
+ XMVECTOR Corners1 = vOrigin + RightBottom * vNear;
+ XMVECTOR Corners2 = vOrigin + LeftTop * vNear;
+ XMVECTOR Corners3 = vOrigin + LeftBottom * vNear;
+ XMVECTOR Corners4 = vOrigin + RightTop * vFar;
+ XMVECTOR Corners5 = vOrigin + RightBottom * vFar;
+ XMVECTOR Corners6 = vOrigin + LeftTop * vFar;
+ XMVECTOR Corners7 = vOrigin + LeftBottom * vFar;
+
+ XMVECTOR Outside, Inside;
+ DirectX::Internal::FastIntersectFrustumPlane( Corners0, Corners1, Corners2, Corners3,
+ Corners4, Corners5, Corners6, Corners7,
+ Plane, Outside, Inside );
+
+ // If the frustum is outside any plane it is outside.
+ if ( XMVector4EqualInt( Outside, XMVectorTrueInt() ) )
+ return FRONT;
+
+ // If the frustum is inside all planes it is inside.
+ if ( XMVector4EqualInt( Inside, XMVectorTrueInt() ) )
+ return BACK;
+
+ // The frustum is not inside all planes or outside a plane it intersects.
+ return INTERSECTING;
+}
+
+
+//-----------------------------------------------------------------------------
+// Ray vs. frustum test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool BoundingFrustum::Intersects( FXMVECTOR rayOrigin, FXMVECTOR Direction, float& Dist ) const
+{
+ // If ray starts inside the frustum, return a distance of 0 for the hit
+ if ( Contains(rayOrigin) == CONTAINS )
+ {
+ Dist = 0.0f;
+ return true;
+ }
+
+ // Build the frustum planes.
+ XMVECTOR Planes[6];
+ Planes[0] = XMVectorSet( 0.0f, 0.0f, -1.0f, Near );
+ Planes[1] = XMVectorSet( 0.0f, 0.0f, 1.0f, -Far );
+ Planes[2] = XMVectorSet( 1.0f, 0.0f, -RightSlope, 0.0f );
+ Planes[3] = XMVectorSet( -1.0f, 0.0f, LeftSlope, 0.0f );
+ Planes[4] = XMVectorSet( 0.0f, 1.0f, -TopSlope, 0.0f );
+ Planes[5] = XMVectorSet( 0.0f, -1.0f, BottomSlope, 0.0f );
+
+ // Load origin and orientation of the frustum.
+ XMVECTOR frOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR frOrientation = XMLoadFloat4( &Orientation );
+
+ // This algorithm based on "Fast Ray-Convex Polyhedron Intersectin," in James Arvo, ed., Graphics Gems II pp. 247-250
+ float tnear = -FLT_MAX;
+ float tfar = FLT_MAX;
+
+ for( size_t i=0; i < 6; ++i )
+ {
+ XMVECTOR Plane = DirectX::Internal::XMPlaneTransform( Planes[i], frOrientation, frOrigin );
+ Plane = XMPlaneNormalize( Plane );
+
+ XMVECTOR AxisDotOrigin = XMPlaneDotCoord( Plane, rayOrigin );
+ XMVECTOR AxisDotDirection = XMVector3Dot( Plane, Direction );
+
+ if ( XMVector3LessOrEqual( XMVectorAbs( AxisDotDirection ), g_RayEpsilon ) )
+ {
+ // Ray is parallel to plane - check if ray origin is inside plane's
+ if ( XMVector3Greater( AxisDotOrigin, g_XMZero ) )
+ {
+ // Ray origin is outside half-space.
+ Dist = 0.f;
+ return false;
+ }
+ }
+ else
+ {
+ // Ray not parallel - get distance to plane.
+ float vd = XMVectorGetX( AxisDotDirection );
+ float vn = XMVectorGetX( AxisDotOrigin );
+ float t = -vn / vd;
+ if (vd < 0.0f)
+ {
+ // Front face - T is a near point.
+ if (t > tfar)
+ {
+ Dist = 0.f;
+ return false;
+ }
+ if (t > tnear)
+ {
+ // Hit near face.
+ tnear = t;
+ }
+ }
+ else
+ {
+ // back face - T is far point.
+ if (t < tnear)
+ {
+ Dist = 0.f;
+ return false;
+ }
+ if (t < tfar)
+ {
+ // Hit far face.
+ tfar = t;
+ }
+ }
+ }
+ }
+
+ // Survived all tests.
+ // Note: if ray originates on polyhedron, may want to change 0.0f to some
+ // epsilon to avoid intersecting the originating face.
+ float distance = ( tnear >= 0.0f ) ? tnear : tfar;
+ if (distance >= 0.0f)
+ {
+ Dist = distance;
+ return true;
+ }
+
+ Dist = 0.f;
+ return false;
+}
+
+
+//-----------------------------------------------------------------------------
+// Test a frustum vs 6 planes (typically forming another frustum).
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType BoundingFrustum::ContainedBy( FXMVECTOR Plane0, FXMVECTOR Plane1, FXMVECTOR Plane2,
+ GXMVECTOR Plane3, CXMVECTOR Plane4, CXMVECTOR Plane5 ) const
+{
+ // Load origin and orientation of the frustum.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ assert( DirectX::Internal::XMQuaternionIsUnit( vOrientation ) );
+
+ // Set w of the origin to one so we can dot4 with a plane.
+ vOrigin = XMVectorInsert<0, 0, 0, 0, 1>( vOrigin, XMVectorSplatOne() );
+
+ // Build the corners of the frustum (in world space).
+ XMVECTOR RightTop = XMVectorSet( RightSlope, TopSlope, 1.0f, 0.0f );
+ XMVECTOR RightBottom = XMVectorSet( RightSlope, BottomSlope, 1.0f, 0.0f );
+ XMVECTOR LeftTop = XMVectorSet( LeftSlope, TopSlope, 1.0f, 0.0f );
+ XMVECTOR LeftBottom = XMVectorSet( LeftSlope, BottomSlope, 1.0f, 0.0f );
+ XMVECTOR vNear = XMVectorReplicatePtr( &Near );
+ XMVECTOR vFar = XMVectorReplicatePtr( &Far );
+
+ RightTop = XMVector3Rotate( RightTop, vOrientation );
+ RightBottom = XMVector3Rotate( RightBottom, vOrientation );
+ LeftTop = XMVector3Rotate( LeftTop, vOrientation );
+ LeftBottom = XMVector3Rotate( LeftBottom, vOrientation );
+
+ XMVECTOR Corners0 = vOrigin + RightTop * vNear;
+ XMVECTOR Corners1 = vOrigin + RightBottom * vNear;
+ XMVECTOR Corners2 = vOrigin + LeftTop * vNear;
+ XMVECTOR Corners3 = vOrigin + LeftBottom * vNear;
+ XMVECTOR Corners4 = vOrigin + RightTop * vFar;
+ XMVECTOR Corners5 = vOrigin + RightBottom * vFar;
+ XMVECTOR Corners6 = vOrigin + LeftTop * vFar;
+ XMVECTOR Corners7 = vOrigin + LeftBottom * vFar;
+
+ XMVECTOR Outside, Inside;
+
+ // Test against each plane.
+ DirectX::Internal::FastIntersectFrustumPlane( Corners0, Corners1, Corners2, Corners3,
+ Corners4, Corners5, Corners6, Corners7,
+ Plane0, Outside, Inside );
+
+ XMVECTOR AnyOutside = Outside;
+ XMVECTOR AllInside = Inside;
+
+ DirectX::Internal::FastIntersectFrustumPlane( Corners0, Corners1, Corners2, Corners3,
+ Corners4, Corners5, Corners6, Corners7,
+ Plane1, Outside, Inside );
+
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectFrustumPlane( Corners0, Corners1, Corners2, Corners3,
+ Corners4, Corners5, Corners6, Corners7,
+ Plane2, Outside, Inside );
+
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectFrustumPlane( Corners0, Corners1, Corners2, Corners3,
+ Corners4, Corners5, Corners6, Corners7,
+ Plane3, Outside, Inside );
+
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectFrustumPlane( Corners0, Corners1, Corners2, Corners3,
+ Corners4, Corners5, Corners6, Corners7,
+ Plane4, Outside, Inside );
+
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectFrustumPlane( Corners0, Corners1, Corners2, Corners3,
+ Corners4, Corners5, Corners6, Corners7,
+ Plane5, Outside, Inside );
+
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ // If the frustum is outside any plane it is outside.
+ if ( XMVector4EqualInt( AnyOutside, XMVectorTrueInt() ) )
+ return DISJOINT;
+
+ // If the frustum is inside all planes it is inside.
+ if ( XMVector4EqualInt( AllInside, XMVectorTrueInt() ) )
+ return CONTAINS;
+
+ // The frustum is not inside all planes or outside a plane, it may intersect.
+ return INTERSECTS;
+}
+
+
+//-----------------------------------------------------------------------------
+// Build the 6 frustum planes from a frustum.
+//
+// The intended use for these routines is for fast culling to a view frustum.
+// When the volume being tested against a view frustum is small relative to the
+// view frustum it is usually either inside all six planes of the frustum
+// (CONTAINS) or outside one of the planes of the frustum (DISJOINT). If neither
+// of these cases is true then it may or may not be intersecting the frustum
+// (INTERSECTS)
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingFrustum::GetPlanes( XMVECTOR* NearPlane, XMVECTOR* FarPlane, XMVECTOR* RightPlane,
+ XMVECTOR* LeftPlane, XMVECTOR* TopPlane, XMVECTOR* BottomPlane ) const
+{
+ // Load origin and orientation of the frustum.
+ XMVECTOR vOrigin = XMLoadFloat3( &Origin );
+ XMVECTOR vOrientation = XMLoadFloat4( &Orientation );
+
+ if (NearPlane)
+ {
+ XMVECTOR vNearPlane = XMVectorSet( 0.0f, 0.0f, -1.0f, Near );
+ vNearPlane = DirectX::Internal::XMPlaneTransform( vNearPlane, vOrientation, vOrigin );
+ *NearPlane = XMPlaneNormalize( vNearPlane );
+ }
+
+ if (FarPlane)
+ {
+ XMVECTOR vFarPlane = XMVectorSet( 0.0f, 0.0f, 1.0f, -Far );
+ vFarPlane = DirectX::Internal::XMPlaneTransform( vFarPlane, vOrientation, vOrigin );
+ *FarPlane = XMPlaneNormalize( vFarPlane );
+ }
+
+ if (RightPlane)
+ {
+ XMVECTOR vRightPlane = XMVectorSet( 1.0f, 0.0f, -RightSlope, 0.0f );
+ vRightPlane = DirectX::Internal::XMPlaneTransform( vRightPlane, vOrientation, vOrigin );
+ *RightPlane = XMPlaneNormalize( vRightPlane );
+ }
+
+ if (LeftPlane)
+ {
+ XMVECTOR vLeftPlane = XMVectorSet( -1.0f, 0.0f, LeftSlope, 0.0f );
+ vLeftPlane = DirectX::Internal::XMPlaneTransform( vLeftPlane, vOrientation, vOrigin );
+ *LeftPlane = XMPlaneNormalize( vLeftPlane );
+ }
+
+ if (TopPlane)
+ {
+ XMVECTOR vTopPlane = XMVectorSet( 0.0f, 1.0f, -TopSlope, 0.0f );
+ vTopPlane = DirectX::Internal::XMPlaneTransform( vTopPlane, vOrientation, vOrigin );
+ *TopPlane = XMPlaneNormalize( vTopPlane );
+ }
+
+ if (BottomPlane)
+ {
+ XMVECTOR vBottomPlane = XMVectorSet( 0.0f, -1.0f, BottomSlope, 0.0f );
+ vBottomPlane = DirectX::Internal::XMPlaneTransform( vBottomPlane, vOrientation, vOrigin );
+ *BottomPlane = XMPlaneNormalize( vBottomPlane );
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// Build a frustum from a persepective projection matrix. The matrix may only
+// contain a projection; any rotation, translation or scale will cause the
+// constructed frustum to be incorrect.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void BoundingFrustum::CreateFromMatrix( BoundingFrustum& Out, CXMMATRIX Projection )
+{
+ // Corners of the projection frustum in homogenous space.
+ static XMVECTORF32 HomogenousPoints[6] =
+ {
+ { 1.0f, 0.0f, 1.0f, 1.0f }, // right (at far plane)
+ { -1.0f, 0.0f, 1.0f, 1.0f }, // left
+ { 0.0f, 1.0f, 1.0f, 1.0f }, // top
+ { 0.0f, -1.0f, 1.0f, 1.0f }, // bottom
+
+ { 0.0f, 0.0f, 0.0f, 1.0f }, // near
+ { 0.0f, 0.0f, 1.0f, 1.0f } // far
+ };
+
+ XMVECTOR Determinant;
+ XMMATRIX matInverse = XMMatrixInverse( &Determinant, Projection );
+
+ // Compute the frustum corners in world space.
+ XMVECTOR Points[6];
+
+ for( size_t i = 0; i < 6; ++i )
+ {
+ // Transform point.
+ Points[i] = XMVector4Transform( HomogenousPoints[i], matInverse );
+ }
+
+ Out.Origin = XMFLOAT3( 0.0f, 0.0f, 0.0f );
+ Out.Orientation = XMFLOAT4( 0.0f, 0.0f, 0.0f, 1.0f );
+
+ // Compute the slopes.
+ Points[0] = Points[0] * XMVectorReciprocal( XMVectorSplatZ( Points[0] ) );
+ Points[1] = Points[1] * XMVectorReciprocal( XMVectorSplatZ( Points[1] ) );
+ Points[2] = Points[2] * XMVectorReciprocal( XMVectorSplatZ( Points[2] ) );
+ Points[3] = Points[3] * XMVectorReciprocal( XMVectorSplatZ( Points[3] ) );
+
+ Out.RightSlope = XMVectorGetX( Points[0] );
+ Out.LeftSlope = XMVectorGetX( Points[1] );
+ Out.TopSlope = XMVectorGetY( Points[2] );
+ Out.BottomSlope = XMVectorGetY( Points[3] );
+
+ // Compute near and far.
+ Points[4] = Points[4] * XMVectorReciprocal( XMVectorSplatW( Points[4] ) );
+ Points[5] = Points[5] * XMVectorReciprocal( XMVectorSplatW( Points[5] ) );
+
+ Out.Near = XMVectorGetZ( Points[4] );
+ Out.Far = XMVectorGetZ( Points[5] );
+}
+
+
+/****************************************************************************
+ *
+ * TriangleTests
+ *
+ ****************************************************************************/
+
+namespace TriangleTests
+{
+
+//-----------------------------------------------------------------------------
+// Compute the intersection of a ray (Origin, Direction) with a triangle
+// (V0, V1, V2). Return true if there is an intersection and also set *pDist
+// to the distance along the ray to the intersection.
+//
+// The algorithm is based on Moller, Tomas and Trumbore, "Fast, Minimum Storage
+// Ray-Triangle Intersection", Journal of Graphics Tools, vol. 2, no. 1,
+// pp 21-28, 1997.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool Intersects( FXMVECTOR Origin, FXMVECTOR Direction, FXMVECTOR V0, GXMVECTOR V1, CXMVECTOR V2, float& Dist )
+{
+ assert( DirectX::Internal::XMVector3IsUnit( Direction ) );
+
+ XMVECTOR Zero = XMVectorZero();
+
+ XMVECTOR e1 = V1 - V0;
+ XMVECTOR e2 = V2 - V0;
+
+ // p = Direction ^ e2;
+ XMVECTOR p = XMVector3Cross( Direction, e2 );
+
+ // det = e1 * p;
+ XMVECTOR det = XMVector3Dot( e1, p );
+
+ XMVECTOR u, v, t;
+
+ if( XMVector3GreaterOrEqual( det, g_RayEpsilon ) )
+ {
+ // Determinate is positive (front side of the triangle).
+ XMVECTOR s = Origin - V0;
+
+ // u = s * p;
+ u = XMVector3Dot( s, p );
+
+ XMVECTOR NoIntersection = XMVectorLess( u, Zero );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( u, det ) );
+
+ // q = s ^ e1;
+ XMVECTOR q = XMVector3Cross( s, e1 );
+
+ // v = Direction * q;
+ v = XMVector3Dot( Direction, q );
+
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( v, Zero ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( u + v, det ) );
+
+ // t = e2 * q;
+ t = XMVector3Dot( e2, q );
+
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( t, Zero ) );
+
+ if( XMVector4EqualInt( NoIntersection, XMVectorTrueInt() ) )
+ {
+ Dist = 0.f;
+ return false;
+ }
+ }
+ else if( XMVector3LessOrEqual( det, g_RayNegEpsilon ) )
+ {
+ // Determinate is negative (back side of the triangle).
+ XMVECTOR s = Origin - V0;
+
+ // u = s * p;
+ u = XMVector3Dot( s, p );
+
+ XMVECTOR NoIntersection = XMVectorGreater( u, Zero );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( u, det ) );
+
+ // q = s ^ e1;
+ XMVECTOR q = XMVector3Cross( s, e1 );
+
+ // v = Direction * q;
+ v = XMVector3Dot( Direction, q );
+
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( v, Zero ) );
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorLess( u + v, det ) );
+
+ // t = e2 * q;
+ t = XMVector3Dot( e2, q );
+
+ NoIntersection = XMVectorOrInt( NoIntersection, XMVectorGreater( t, Zero ) );
+
+ if ( XMVector4EqualInt( NoIntersection, XMVectorTrueInt() ) )
+ {
+ Dist = 0.f;
+ return false;
+ }
+ }
+ else
+ {
+ // Parallel ray.
+ Dist = 0.f;
+ return false;
+ }
+
+ t = XMVectorDivide ( t, det );
+
+ // (u / det) and (v / dev) are the barycentric cooridinates of the intersection.
+
+ // Store the x-component to *pDist
+ XMStoreFloat( &Dist, t );
+
+ return true;
+}
+
+
+//-----------------------------------------------------------------------------
+// Test if two triangles intersect.
+//
+// The final test of algorithm is based on Shen, Heng, and Tang, "A Fast
+// Triangle-Triangle Overlap Test Using Signed Distances", Journal of Graphics
+// Tools, vol. 8, no. 1, pp 17-23, 2003 and Guigue and Devillers, "Fast and
+// Robust Triangle-Triangle Overlap Test Using Orientation Predicates", Journal
+// of Graphics Tools, vol. 8, no. 1, pp 25-32, 2003.
+//
+// The final test could be considered an edge-edge separating plane test with
+// the 9 possible cases narrowed down to the only two pairs of edges that can
+// actaully result in a seperation.
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline bool Intersects( FXMVECTOR A0, FXMVECTOR A1, FXMVECTOR A2, GXMVECTOR B0, CXMVECTOR B1, CXMVECTOR B2 )
+{
+ static const XMVECTORI32 SelectY =
+ {
+ XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0
+ };
+ static const XMVECTORI32 SelectZ =
+ {
+ XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0
+ };
+ static const XMVECTORI32 Select0111 =
+ {
+ XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_1
+ };
+ static const XMVECTORI32 Select1011 =
+ {
+ XM_SELECT_1, XM_SELECT_0, XM_SELECT_1, XM_SELECT_1
+ };
+ static const XMVECTORI32 Select1101 =
+ {
+ XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1
+ };
+
+ XMVECTOR Zero = XMVectorZero();
+
+ // Compute the normal of triangle A.
+ XMVECTOR N1 = XMVector3Cross( A1 - A0, A2 - A0 );
+
+ // Assert that the triangle is not degenerate.
+ assert( !XMVector3Equal( N1, Zero ) );
+
+ // Test points of B against the plane of A.
+ XMVECTOR BDist = XMVector3Dot( N1, B0 - A0 );
+ BDist = XMVectorSelect( BDist, XMVector3Dot( N1, B1 - A0 ), SelectY );
+ BDist = XMVectorSelect( BDist, XMVector3Dot( N1, B2 - A0 ), SelectZ );
+
+ // Ensure robustness with co-planar triangles by zeroing small distances.
+ uint32_t BDistIsZeroCR;
+ XMVECTOR BDistIsZero = XMVectorGreaterR( &BDistIsZeroCR, g_RayEpsilon, XMVectorAbs( BDist ) );
+ BDist = XMVectorSelect( BDist, Zero, BDistIsZero );
+
+ uint32_t BDistIsLessCR;
+ XMVECTOR BDistIsLess = XMVectorGreaterR( &BDistIsLessCR, Zero, BDist );
+
+ uint32_t BDistIsGreaterCR;
+ XMVECTOR BDistIsGreater = XMVectorGreaterR( &BDistIsGreaterCR, BDist, Zero );
+
+ // If all the points are on the same side we don't intersect.
+ if( XMComparisonAllTrue( BDistIsLessCR ) || XMComparisonAllTrue( BDistIsGreaterCR ) )
+ return false;
+
+ // Compute the normal of triangle B.
+ XMVECTOR N2 = XMVector3Cross( B1 - B0, B2 - B0 );
+
+ // Assert that the triangle is not degenerate.
+ assert( !XMVector3Equal( N2, Zero ) );
+
+ // Test points of A against the plane of B.
+ XMVECTOR ADist = XMVector3Dot( N2, A0 - B0 );
+ ADist = XMVectorSelect( ADist, XMVector3Dot( N2, A1 - B0 ), SelectY );
+ ADist = XMVectorSelect( ADist, XMVector3Dot( N2, A2 - B0 ), SelectZ );
+
+ // Ensure robustness with co-planar triangles by zeroing small distances.
+ uint32_t ADistIsZeroCR;
+ XMVECTOR ADistIsZero = XMVectorGreaterR( &ADistIsZeroCR, g_RayEpsilon, XMVectorAbs( BDist ) );
+ ADist = XMVectorSelect( ADist, Zero, ADistIsZero );
+
+ uint32_t ADistIsLessCR;
+ XMVECTOR ADistIsLess = XMVectorGreaterR( &ADistIsLessCR, Zero, ADist );
+
+ uint32_t ADistIsGreaterCR;
+ XMVECTOR ADistIsGreater = XMVectorGreaterR( &ADistIsGreaterCR, ADist, Zero );
+
+ // If all the points are on the same side we don't intersect.
+ if( XMComparisonAllTrue( ADistIsLessCR ) || XMComparisonAllTrue( ADistIsGreaterCR ) )
+ return false;
+
+ // Special case for co-planar triangles.
+ if( XMComparisonAllTrue( ADistIsZeroCR ) || XMComparisonAllTrue( BDistIsZeroCR ) )
+ {
+ XMVECTOR Axis, Dist, MinDist;
+
+ // Compute an axis perpindicular to the edge (points out).
+ Axis = XMVector3Cross( N1, A1 - A0 );
+ Dist = XMVector3Dot( Axis, A0 );
+
+ // Test points of B against the axis.
+ MinDist = XMVector3Dot( B0, Axis );
+ MinDist = XMVectorMin( MinDist, XMVector3Dot( B1, Axis ) );
+ MinDist = XMVectorMin( MinDist, XMVector3Dot( B2, Axis ) );
+ if( XMVector4GreaterOrEqual( MinDist, Dist ) )
+ return false;
+
+ // Edge (A1, A2)
+ Axis = XMVector3Cross( N1, A2 - A1 );
+ Dist = XMVector3Dot( Axis, A1 );
+
+ MinDist = XMVector3Dot( B0, Axis );
+ MinDist = XMVectorMin( MinDist, XMVector3Dot( B1, Axis ) );
+ MinDist = XMVectorMin( MinDist, XMVector3Dot( B2, Axis ) );
+ if( XMVector4GreaterOrEqual( MinDist, Dist ) )
+ return false;
+
+ // Edge (A2, A0)
+ Axis = XMVector3Cross( N1, A0 - A2 );
+ Dist = XMVector3Dot( Axis, A2 );
+
+ MinDist = XMVector3Dot( B0, Axis );
+ MinDist = XMVectorMin( MinDist, XMVector3Dot( B1, Axis ) );
+ MinDist = XMVectorMin( MinDist, XMVector3Dot( B2, Axis ) );
+ if( XMVector4GreaterOrEqual( MinDist, Dist ) )
+ return false;
+
+ // Edge (B0, B1)
+ Axis = XMVector3Cross( N2, B1 - B0 );
+ Dist = XMVector3Dot( Axis, B0 );
+
+ MinDist = XMVector3Dot( A0, Axis );
+ MinDist = XMVectorMin( MinDist, XMVector3Dot( A1, Axis ) );
+ MinDist = XMVectorMin( MinDist, XMVector3Dot( A2, Axis ) );
+ if( XMVector4GreaterOrEqual( MinDist, Dist ) )
+ return false;
+
+ // Edge (B1, B2)
+ Axis = XMVector3Cross( N2, B2 - B1 );
+ Dist = XMVector3Dot( Axis, B1 );
+
+ MinDist = XMVector3Dot( A0, Axis );
+ MinDist = XMVectorMin( MinDist, XMVector3Dot( A1, Axis ) );
+ MinDist = XMVectorMin( MinDist, XMVector3Dot( A2, Axis ) );
+ if( XMVector4GreaterOrEqual( MinDist, Dist ) )
+ return false;
+
+ // Edge (B2,B0)
+ Axis = XMVector3Cross( N2, B0 - B2 );
+ Dist = XMVector3Dot( Axis, B2 );
+
+ MinDist = XMVector3Dot( A0, Axis );
+ MinDist = XMVectorMin( MinDist, XMVector3Dot( A1, Axis ) );
+ MinDist = XMVectorMin( MinDist, XMVector3Dot( A2, Axis ) );
+ if( XMVector4GreaterOrEqual( MinDist, Dist ) )
+ return false;
+
+ return true;
+ }
+
+ //
+ // Find the single vertex of A and B (ie the vertex on the opposite side
+ // of the plane from the other two) and reorder the edges so we can compute
+ // the signed edge/edge distances.
+ //
+ // if ( (V0 >= 0 && V1 < 0 && V2 < 0) ||
+ // (V0 > 0 && V1 <= 0 && V2 <= 0) ||
+ // (V0 <= 0 && V1 > 0 && V2 > 0) ||
+ // (V0 < 0 && V1 >= 0 && V2 >= 0) ) then V0 is singular;
+ //
+ // If our singular vertex is not on the positive side of the plane we reverse
+ // the triangle winding so that the overlap comparisons will compare the
+ // correct edges with the correct signs.
+ //
+ XMVECTOR ADistIsLessEqual = XMVectorOrInt( ADistIsLess, ADistIsZero );
+ XMVECTOR ADistIsGreaterEqual = XMVectorOrInt( ADistIsGreater, ADistIsZero );
+
+ XMVECTOR AA0, AA1, AA2;
+ bool bPositiveA;
+
+ if( DirectX::Internal::XMVector3AllTrue( XMVectorSelect( ADistIsGreaterEqual, ADistIsLess, Select0111 ) ) ||
+ DirectX::Internal::XMVector3AllTrue( XMVectorSelect( ADistIsGreater, ADistIsLessEqual, Select0111 ) ) )
+ {
+ // A0 is singular, crossing from positive to negative.
+ AA0 = A0; AA1 = A1; AA2 = A2;
+ bPositiveA = true;
+ }
+ else if( DirectX::Internal::XMVector3AllTrue( XMVectorSelect( ADistIsLessEqual, ADistIsGreater, Select0111 ) ) ||
+ DirectX::Internal::XMVector3AllTrue( XMVectorSelect( ADistIsLess, ADistIsGreaterEqual, Select0111 ) ) )
+ {
+ // A0 is singular, crossing from negative to positive.
+ AA0 = A0; AA1 = A2; AA2 = A1;
+ bPositiveA = false;
+ }
+ else if( DirectX::Internal::XMVector3AllTrue( XMVectorSelect( ADistIsGreaterEqual, ADistIsLess, Select1011 ) ) ||
+ DirectX::Internal::XMVector3AllTrue( XMVectorSelect( ADistIsGreater, ADistIsLessEqual, Select1011 ) ) )
+ {
+ // A1 is singular, crossing from positive to negative.
+ AA0 = A1; AA1 = A2; AA2 = A0;
+ bPositiveA = true;
+ }
+ else if( DirectX::Internal::XMVector3AllTrue( XMVectorSelect( ADistIsLessEqual, ADistIsGreater, Select1011 ) ) ||
+ DirectX::Internal::XMVector3AllTrue( XMVectorSelect( ADistIsLess, ADistIsGreaterEqual, Select1011 ) ) )
+ {
+ // A1 is singular, crossing from negative to positive.
+ AA0 = A1; AA1 = A0; AA2 = A2;
+ bPositiveA = false;
+ }
+ else if( DirectX::Internal::XMVector3AllTrue( XMVectorSelect( ADistIsGreaterEqual, ADistIsLess, Select1101 ) ) ||
+ DirectX::Internal::XMVector3AllTrue( XMVectorSelect( ADistIsGreater, ADistIsLessEqual, Select1101 ) ) )
+ {
+ // A2 is singular, crossing from positive to negative.
+ AA0 = A2; AA1 = A0; AA2 = A1;
+ bPositiveA = true;
+ }
+ else if( DirectX::Internal::XMVector3AllTrue( XMVectorSelect( ADistIsLessEqual, ADistIsGreater, Select1101 ) ) ||
+ DirectX::Internal::XMVector3AllTrue( XMVectorSelect( ADistIsLess, ADistIsGreaterEqual, Select1101 ) ) )
+ {
+ // A2 is singular, crossing from negative to positive.
+ AA0 = A2; AA1 = A1; AA2 = A0;
+ bPositiveA = false;
+ }
+ else
+ {
+ assert( false );
+ return false;
+ }
+
+ XMVECTOR BDistIsLessEqual = XMVectorOrInt( BDistIsLess, BDistIsZero );
+ XMVECTOR BDistIsGreaterEqual = XMVectorOrInt( BDistIsGreater, BDistIsZero );
+
+ XMVECTOR BB0, BB1, BB2;
+ bool bPositiveB;
+
+ if( DirectX::Internal::XMVector3AllTrue( XMVectorSelect( BDistIsGreaterEqual, BDistIsLess, Select0111 ) ) ||
+ DirectX::Internal::XMVector3AllTrue( XMVectorSelect( BDistIsGreater, BDistIsLessEqual, Select0111 ) ) )
+ {
+ // B0 is singular, crossing from positive to negative.
+ BB0 = B0; BB1 = B1; BB2 = B2;
+ bPositiveB = true;
+ }
+ else if( DirectX::Internal::XMVector3AllTrue( XMVectorSelect( BDistIsLessEqual, BDistIsGreater, Select0111 ) ) ||
+ DirectX::Internal::XMVector3AllTrue( XMVectorSelect( BDistIsLess, BDistIsGreaterEqual, Select0111 ) ) )
+ {
+ // B0 is singular, crossing from negative to positive.
+ BB0 = B0; BB1 = B2; BB2 = B1;
+ bPositiveB = false;
+ }
+ else if( DirectX::Internal::XMVector3AllTrue( XMVectorSelect( BDistIsGreaterEqual, BDistIsLess, Select1011 ) ) ||
+ DirectX::Internal::XMVector3AllTrue( XMVectorSelect( BDistIsGreater, BDistIsLessEqual, Select1011 ) ) )
+ {
+ // B1 is singular, crossing from positive to negative.
+ BB0 = B1; BB1 = B2; BB2 = B0;
+ bPositiveB = true;
+ }
+ else if( DirectX::Internal::XMVector3AllTrue( XMVectorSelect( BDistIsLessEqual, BDistIsGreater, Select1011 ) ) ||
+ DirectX::Internal::XMVector3AllTrue( XMVectorSelect( BDistIsLess, BDistIsGreaterEqual, Select1011 ) ) )
+ {
+ // B1 is singular, crossing from negative to positive.
+ BB0 = B1; BB1 = B0; BB2 = B2;
+ bPositiveB = false;
+ }
+ else if( DirectX::Internal::XMVector3AllTrue( XMVectorSelect( BDistIsGreaterEqual, BDistIsLess, Select1101 ) ) ||
+ DirectX::Internal::XMVector3AllTrue( XMVectorSelect( BDistIsGreater, BDistIsLessEqual, Select1101 ) ) )
+ {
+ // B2 is singular, crossing from positive to negative.
+ BB0 = B2; BB1 = B0; BB2 = B1;
+ bPositiveB = true;
+ }
+ else if( DirectX::Internal::XMVector3AllTrue( XMVectorSelect( BDistIsLessEqual, BDistIsGreater, Select1101 ) ) ||
+ DirectX::Internal::XMVector3AllTrue( XMVectorSelect( BDistIsLess, BDistIsGreaterEqual, Select1101 ) ) )
+ {
+ // B2 is singular, crossing from negative to positive.
+ BB0 = B2; BB1 = B1; BB2 = B0;
+ bPositiveB = false;
+ }
+ else
+ {
+ assert( false );
+ return false;
+ }
+
+ XMVECTOR Delta0, Delta1;
+
+ // Reverse the direction of the test depending on whether the singular vertices are
+ // the same sign or different signs.
+ if( bPositiveA ^ bPositiveB )
+ {
+ Delta0 = ( BB0 - AA0 );
+ Delta1 = ( AA0 - BB0 );
+ }
+ else
+ {
+ Delta0 = ( AA0 - BB0 );
+ Delta1 = ( BB0 - AA0 );
+ }
+
+ // Check if the triangles overlap on the line of intersection between the
+ // planes of the two triangles by finding the signed line distances.
+ XMVECTOR Dist0 = XMVector3Dot( Delta0, XMVector3Cross( ( BB2 - BB0 ), ( AA2 - AA0 ) ) );
+ if( XMVector4Greater( Dist0, Zero ) )
+ return false;
+
+ XMVECTOR Dist1 = XMVector3Dot( Delta1, XMVector3Cross( ( BB1 - BB0 ), ( AA1 - AA0 ) ) );
+ if( XMVector4Greater( Dist1, Zero ) )
+ return false;
+
+ return true;
+}
+
+
+//-----------------------------------------------------------------------------
+// Ray-triangle test
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PlaneIntersectionType Intersects( FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2, GXMVECTOR Plane )
+{
+ XMVECTOR One = XMVectorSplatOne();
+
+ assert( DirectX::Internal::XMPlaneIsUnit( Plane ) );
+
+ // Set w of the points to one so we can dot4 with a plane.
+ XMVECTOR TV0 = XMVectorInsert<0, 0, 0, 0, 1>(V0, One);
+ XMVECTOR TV1 = XMVectorInsert<0, 0, 0, 0, 1>(V1, One);
+ XMVECTOR TV2 = XMVectorInsert<0, 0, 0, 0, 1>(V2, One);
+
+ XMVECTOR Outside, Inside;
+ DirectX::Internal::FastIntersectTrianglePlane( TV0, TV1, TV2, Plane, Outside, Inside );
+
+ // If the triangle is outside any plane it is outside.
+ if ( XMVector4EqualInt( Outside, XMVectorTrueInt() ) )
+ return FRONT;
+
+ // If the triangle is inside all planes it is inside.
+ if ( XMVector4EqualInt( Inside, XMVectorTrueInt() ) )
+ return BACK;
+
+ // The triangle is not inside all planes or outside a plane it intersects.
+ return INTERSECTING;
+}
+
+
+//-----------------------------------------------------------------------------
+// Test a triangle vs 6 planes (typically forming a frustum).
+//-----------------------------------------------------------------------------
+_Use_decl_annotations_
+inline ContainmentType ContainedBy( FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2,
+ GXMVECTOR Plane0, CXMVECTOR Plane1, CXMVECTOR Plane2,
+ CXMVECTOR Plane3, CXMVECTOR Plane4, CXMVECTOR Plane5 )
+{
+ XMVECTOR One = XMVectorSplatOne();
+
+ // Set w of the points to one so we can dot4 with a plane.
+ XMVECTOR TV0 = XMVectorInsert<0, 0, 0, 0, 1>(V0, One);
+ XMVECTOR TV1 = XMVectorInsert<0, 0, 0, 0, 1>(V1, One);
+ XMVECTOR TV2 = XMVectorInsert<0, 0, 0, 0, 1>(V2, One);
+
+ XMVECTOR Outside, Inside;
+
+ // Test against each plane.
+ DirectX::Internal::FastIntersectTrianglePlane( TV0, TV1, TV2, Plane0, Outside, Inside );
+
+ XMVECTOR AnyOutside = Outside;
+ XMVECTOR AllInside = Inside;
+
+ DirectX::Internal::FastIntersectTrianglePlane( TV0, TV1, TV2, Plane1, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectTrianglePlane( TV0, TV1, TV2, Plane2, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectTrianglePlane( TV0, TV1, TV2, Plane3, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectTrianglePlane( TV0, TV1, TV2, Plane4, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ DirectX::Internal::FastIntersectTrianglePlane( TV0, TV1, TV2, Plane5, Outside, Inside );
+ AnyOutside = XMVectorOrInt( AnyOutside, Outside );
+ AllInside = XMVectorAndInt( AllInside, Inside );
+
+ // If the triangle is outside any plane it is outside.
+ if ( XMVector4EqualInt( AnyOutside, XMVectorTrueInt() ) )
+ return DISJOINT;
+
+ // If the triangle is inside all planes it is inside.
+ if ( XMVector4EqualInt( AllInside, XMVectorTrueInt() ) )
+ return CONTAINS;
+
+ // The triangle is not inside all planes or outside a plane, it may intersect.
+ return INTERSECTS;
+}
+
+}; // namespace TriangleTests
+
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXColors.h b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXColors.h
new file mode 100644
index 00000000..b728302c
--- /dev/null
+++ b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXColors.h
@@ -0,0 +1,168 @@
+//-------------------------------------------------------------------------------------
+// DirectXColors.h -- C++ Color Math library
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//-------------------------------------------------------------------------------------
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+#include "DirectXMath.h"
+
+namespace DirectX
+{
+
+namespace Colors
+{
+ // Standard colors (Red/Green/Blue/Alpha)
+ XMGLOBALCONST XMVECTORF32 AliceBlue = {0.941176534f, 0.972549081f, 1.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 AntiqueWhite = {0.980392218f, 0.921568692f, 0.843137324f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Aqua = {0.000000000f, 1.000000000f, 1.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Aquamarine = {0.498039246f, 1.000000000f, 0.831372619f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Azure = {0.941176534f, 1.000000000f, 1.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Beige = {0.960784376f, 0.960784376f, 0.862745166f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Bisque = {1.000000000f, 0.894117713f, 0.768627524f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Black = {0.000000000f, 0.000000000f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 BlanchedAlmond = {1.000000000f, 0.921568692f, 0.803921640f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Blue = {0.000000000f, 0.000000000f, 1.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 BlueViolet = {0.541176498f, 0.168627456f, 0.886274576f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Brown = {0.647058845f, 0.164705887f, 0.164705887f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 BurlyWood = {0.870588303f, 0.721568644f, 0.529411793f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 CadetBlue = {0.372549027f, 0.619607866f, 0.627451003f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Chartreuse = {0.498039246f, 1.000000000f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Chocolate = {0.823529482f, 0.411764741f, 0.117647067f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Coral = {1.000000000f, 0.498039246f, 0.313725501f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 CornflowerBlue = {0.392156899f, 0.584313750f, 0.929411829f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Cornsilk = {1.000000000f, 0.972549081f, 0.862745166f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Crimson = {0.862745166f, 0.078431375f, 0.235294133f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Cyan = {0.000000000f, 1.000000000f, 1.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkBlue = {0.000000000f, 0.000000000f, 0.545098066f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkCyan = {0.000000000f, 0.545098066f, 0.545098066f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkGoldenrod = {0.721568644f, 0.525490224f, 0.043137256f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkGray = {0.662745118f, 0.662745118f, 0.662745118f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkGreen = {0.000000000f, 0.392156899f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkKhaki = {0.741176486f, 0.717647076f, 0.419607878f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkMagenta = {0.545098066f, 0.000000000f, 0.545098066f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkOliveGreen = {0.333333343f, 0.419607878f, 0.184313729f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkOrange = {1.000000000f, 0.549019635f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkOrchid = {0.600000024f, 0.196078449f, 0.800000072f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkRed = {0.545098066f, 0.000000000f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkSalmon = {0.913725555f, 0.588235319f, 0.478431404f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkSeaGreen = {0.560784340f, 0.737254918f, 0.545098066f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkSlateBlue = {0.282352954f, 0.239215702f, 0.545098066f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkSlateGray = {0.184313729f, 0.309803933f, 0.309803933f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkTurquoise = {0.000000000f, 0.807843208f, 0.819607913f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DarkViolet = {0.580392182f, 0.000000000f, 0.827451050f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DeepPink = {1.000000000f, 0.078431375f, 0.576470613f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DeepSkyBlue = {0.000000000f, 0.749019623f, 1.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DimGray = {0.411764741f, 0.411764741f, 0.411764741f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 DodgerBlue = {0.117647067f, 0.564705908f, 1.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Firebrick = {0.698039234f, 0.133333340f, 0.133333340f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 FloralWhite = {1.000000000f, 0.980392218f, 0.941176534f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 ForestGreen = {0.133333340f, 0.545098066f, 0.133333340f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Fuchsia = {1.000000000f, 0.000000000f, 1.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Gainsboro = {0.862745166f, 0.862745166f, 0.862745166f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 GhostWhite = {0.972549081f, 0.972549081f, 1.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Gold = {1.000000000f, 0.843137324f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Goldenrod = {0.854902029f, 0.647058845f, 0.125490203f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Gray = {0.501960814f, 0.501960814f, 0.501960814f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Green = {0.000000000f, 0.501960814f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 GreenYellow = {0.678431392f, 1.000000000f, 0.184313729f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Honeydew = {0.941176534f, 1.000000000f, 0.941176534f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 HotPink = {1.000000000f, 0.411764741f, 0.705882370f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 IndianRed = {0.803921640f, 0.360784322f, 0.360784322f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Indigo = {0.294117659f, 0.000000000f, 0.509803951f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Ivory = {1.000000000f, 1.000000000f, 0.941176534f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Khaki = {0.941176534f, 0.901960850f, 0.549019635f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Lavender = {0.901960850f, 0.901960850f, 0.980392218f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LavenderBlush = {1.000000000f, 0.941176534f, 0.960784376f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LawnGreen = {0.486274540f, 0.988235354f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LemonChiffon = {1.000000000f, 0.980392218f, 0.803921640f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LightBlue = {0.678431392f, 0.847058892f, 0.901960850f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LightCoral = {0.941176534f, 0.501960814f, 0.501960814f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LightCyan = {0.878431439f, 1.000000000f, 1.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LightGoldenrodYellow = {0.980392218f, 0.980392218f, 0.823529482f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LightGreen = {0.564705908f, 0.933333397f, 0.564705908f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LightGray = {0.827451050f, 0.827451050f, 0.827451050f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LightPink = {1.000000000f, 0.713725507f, 0.756862819f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LightSalmon = {1.000000000f, 0.627451003f, 0.478431404f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LightSeaGreen = {0.125490203f, 0.698039234f, 0.666666687f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LightSkyBlue = {0.529411793f, 0.807843208f, 0.980392218f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LightSlateGray = {0.466666698f, 0.533333361f, 0.600000024f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LightSteelBlue = {0.690196097f, 0.768627524f, 0.870588303f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LightYellow = {1.000000000f, 1.000000000f, 0.878431439f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Lime = {0.000000000f, 1.000000000f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 LimeGreen = {0.196078449f, 0.803921640f, 0.196078449f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Linen = {0.980392218f, 0.941176534f, 0.901960850f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Magenta = {1.000000000f, 0.000000000f, 1.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Maroon = {0.501960814f, 0.000000000f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 MediumAquamarine = {0.400000036f, 0.803921640f, 0.666666687f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 MediumBlue = {0.000000000f, 0.000000000f, 0.803921640f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 MediumOrchid = {0.729411781f, 0.333333343f, 0.827451050f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 MediumPurple = {0.576470613f, 0.439215720f, 0.858823597f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 MediumSeaGreen = {0.235294133f, 0.701960802f, 0.443137288f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 MediumSlateBlue = {0.482352972f, 0.407843173f, 0.933333397f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 MediumSpringGreen = {0.000000000f, 0.980392218f, 0.603921592f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 MediumTurquoise = {0.282352954f, 0.819607913f, 0.800000072f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 MediumVioletRed = {0.780392230f, 0.082352944f, 0.521568656f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 MidnightBlue = {0.098039225f, 0.098039225f, 0.439215720f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 MintCream = {0.960784376f, 1.000000000f, 0.980392218f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 MistyRose = {1.000000000f, 0.894117713f, 0.882353008f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Moccasin = {1.000000000f, 0.894117713f, 0.709803939f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 NavajoWhite = {1.000000000f, 0.870588303f, 0.678431392f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Navy = {0.000000000f, 0.000000000f, 0.501960814f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 OldLace = {0.992156923f, 0.960784376f, 0.901960850f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Olive = {0.501960814f, 0.501960814f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 OliveDrab = {0.419607878f, 0.556862772f, 0.137254909f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Orange = {1.000000000f, 0.647058845f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 OrangeRed = {1.000000000f, 0.270588249f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Orchid = {0.854902029f, 0.439215720f, 0.839215755f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 PaleGoldenrod = {0.933333397f, 0.909803987f, 0.666666687f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 PaleGreen = {0.596078455f, 0.984313786f, 0.596078455f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 PaleTurquoise = {0.686274529f, 0.933333397f, 0.933333397f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 PaleVioletRed = {0.858823597f, 0.439215720f, 0.576470613f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 PapayaWhip = {1.000000000f, 0.937254965f, 0.835294187f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 PeachPuff = {1.000000000f, 0.854902029f, 0.725490212f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Peru = {0.803921640f, 0.521568656f, 0.247058839f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Pink = {1.000000000f, 0.752941251f, 0.796078503f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Plum = {0.866666734f, 0.627451003f, 0.866666734f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 PowderBlue = {0.690196097f, 0.878431439f, 0.901960850f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Purple = {0.501960814f, 0.000000000f, 0.501960814f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Red = {1.000000000f, 0.000000000f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 RosyBrown = {0.737254918f, 0.560784340f, 0.560784340f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 RoyalBlue = {0.254901975f, 0.411764741f, 0.882353008f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 SaddleBrown = {0.545098066f, 0.270588249f, 0.074509807f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Salmon = {0.980392218f, 0.501960814f, 0.447058856f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 SandyBrown = {0.956862807f, 0.643137276f, 0.376470625f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 SeaGreen = {0.180392161f, 0.545098066f, 0.341176480f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 SeaShell = {1.000000000f, 0.960784376f, 0.933333397f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Sienna = {0.627451003f, 0.321568638f, 0.176470593f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Silver = {0.752941251f, 0.752941251f, 0.752941251f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 SkyBlue = {0.529411793f, 0.807843208f, 0.921568692f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 SlateBlue = {0.415686309f, 0.352941185f, 0.803921640f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 SlateGray = {0.439215720f, 0.501960814f, 0.564705908f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Snow = {1.000000000f, 0.980392218f, 0.980392218f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 SpringGreen = {0.000000000f, 1.000000000f, 0.498039246f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 SteelBlue = {0.274509817f, 0.509803951f, 0.705882370f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Tan = {0.823529482f, 0.705882370f, 0.549019635f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Teal = {0.000000000f, 0.501960814f, 0.501960814f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Thistle = {0.847058892f, 0.749019623f, 0.847058892f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Tomato = {1.000000000f, 0.388235331f, 0.278431386f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Transparent = {0.000000000f, 0.000000000f, 0.000000000f, 0.000000000f};
+ XMGLOBALCONST XMVECTORF32 Turquoise = {0.250980407f, 0.878431439f, 0.815686345f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Violet = {0.933333397f, 0.509803951f, 0.933333397f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Wheat = {0.960784376f, 0.870588303f, 0.701960802f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 White = {1.000000000f, 1.000000000f, 1.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 WhiteSmoke = {0.960784376f, 0.960784376f, 0.960784376f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 Yellow = {1.000000000f, 1.000000000f, 0.000000000f, 1.000000000f};
+ XMGLOBALCONST XMVECTORF32 YellowGreen = {0.603921592f, 0.803921640f, 0.196078449f, 1.000000000f};
+
+}; // namespace Colors
+
+}; // namespace DirectX
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMath.h b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMath.h
new file mode 100644
index 00000000..c79ef233
--- /dev/null
+++ b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMath.h
@@ -0,0 +1,1861 @@
+//-------------------------------------------------------------------------------------
+// DirectXMath.h -- SIMD C++ Math library
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//-------------------------------------------------------------------------------------
+
+
+// MGH -------------------
+#define _XM_BIGENDIAN_
+#define _XM_NO_INTRINSICS_
+// -----------------------
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+#ifndef __cplusplus
+#error DirectX Math requires C++
+#endif
+
+#define DIRECTX_MATH_VERSION 303
+
+#if !defined(_XM_BIGENDIAN_) && !defined(_XM_LITTLEENDIAN_)
+#if defined(_M_AMD64) || defined(_M_IX86) || defined(_M_ARM)
+#define _XM_LITTLEENDIAN_
+#elif defined(_M_PPCBE)
+#define _XM_BIGENDIAN_
+#else
+#error DirectX Math does not support this target
+#endif
+#endif // !_XM_BIGENDIAN_ && !_XM_LITTLEENDIAN_
+
+
+
+#if !defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_) && !defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+#if defined(_M_IX86) || defined(_M_AMD64)
+#define _XM_SSE_INTRINSICS_
+#elif defined(_M_PPCBE)
+#define _XM_VMX128_INTRINSICS_
+#elif defined(_M_ARM)
+#define _XM_ARM_NEON_INTRINSICS_
+#elif !defined(_XM_NO_INTRINSICS_)
+#error DirectX Math does not support this target
+#endif
+#endif // !_XM_ARM_NEON_INTRINSICS_ && !_XM_SSE_INTRINSICS_ && !_XM_VMX128_INTRINSICS_ && !_XM_NO_INTRINSICS_
+
+#pragma warning(push)
+#pragma warning(disable:4514 4820 4985)
+#include <cmath>
+#include <float.h>
+// MGH - #include <malloc.h>
+#pragma warning(pop)
+
+
+#if defined(_XM_SSE_INTRINSICS_)
+#ifndef _XM_NO_INTRINSICS_
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#endif
+#elif defined(_XM_VMX128_INTRINSICS_)
+#error This version of DirectX Math does not support Xbox 360
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+#ifndef _XM_NO_INTRINSICS_
+#include <arm_neon.h>
+#endif
+#endif
+
+
+
+#include <DirectX/no_sal2.h>
+#include <assert.h>
+
+
+#pragma warning(push)
+#pragma warning(disable : 4005 4668)
+#include <stdint.h>
+#pragma warning(pop)
+
+
+namespace DirectX
+{
+
+/****************************************************************************
+ *
+ * Constant definitions
+ *
+ ****************************************************************************/
+
+#if defined(__XNAMATH_H__) && defined(XM_PI)
+#undef XM_PI
+#undef XM_2PI
+#undef XM_1DIVPI
+#undef XM_1DIV2PI
+#undef XM_PIDIV2
+#undef XM_PIDIV4
+#undef XM_SELECT_0
+#undef XM_SELECT_1
+#undef XM_PERMUTE_0X
+#undef XM_PERMUTE_0Y
+#undef XM_PERMUTE_0Z
+#undef XM_PERMUTE_0W
+#undef XM_PERMUTE_1X
+#undef XM_PERMUTE_1Y
+#undef XM_PERMUTE_1Z
+#undef XM_PERMUTE_1W
+#undef XM_CRMASK_CR6
+#undef XM_CRMASK_CR6TRUE
+#undef XM_CRMASK_CR6FALSE
+#undef XM_CRMASK_CR6BOUNDS
+#undef XM_CACHE_LINE_SIZE
+#endif
+
+const float XM_PI = 3.141592654f;
+const float XM_2PI = 6.283185307f;
+const float XM_1DIVPI = 0.318309886f;
+const float XM_1DIV2PI = 0.159154943f;
+const float XM_PIDIV2 = 1.570796327f;
+const float XM_PIDIV4 = 0.785398163f;
+
+const uint32_t XM_SELECT_0 = 0x00000000;
+const uint32_t XM_SELECT_1 = 0xFFFFFFFF;
+
+const uint32_t XM_PERMUTE_0X = 0;
+const uint32_t XM_PERMUTE_0Y = 1;
+const uint32_t XM_PERMUTE_0Z = 2;
+const uint32_t XM_PERMUTE_0W = 3;
+const uint32_t XM_PERMUTE_1X = 4;
+const uint32_t XM_PERMUTE_1Y = 5;
+const uint32_t XM_PERMUTE_1Z = 6;
+const uint32_t XM_PERMUTE_1W = 7;
+
+const uint32_t XM_SWIZZLE_X = 0;
+const uint32_t XM_SWIZZLE_Y = 1;
+const uint32_t XM_SWIZZLE_Z = 2;
+const uint32_t XM_SWIZZLE_W = 3;
+
+const uint32_t XM_CRMASK_CR6 = 0x000000F0;
+const uint32_t XM_CRMASK_CR6TRUE = 0x00000080;
+const uint32_t XM_CRMASK_CR6FALSE = 0x00000020;
+const uint32_t XM_CRMASK_CR6BOUNDS = XM_CRMASK_CR6FALSE;
+
+
+
+/****************************************************************************
+ *
+ * Macros
+ *
+ ****************************************************************************/
+
+#if defined(__XNAMATH_H__) && defined(XMComparisonAllTrue)
+#undef XMComparisonAllTrue
+#undef XMComparisonAnyTrue
+#undef XMComparisonAllFalse
+#undef XMComparisonAnyFalse
+#undef XMComparisonMixed
+#undef XMComparisonAllInBounds
+#undef XMComparisonAnyOutOfBounds
+#endif
+
+// Unit conversion
+
+inline float XMConvertToRadians(float fDegrees) { return fDegrees * (XM_PI / 180.0f); }
+inline float XMConvertToDegrees(float fRadians) { return fRadians * (180.0f / XM_PI); }
+
+// Condition register evaluation proceeding a recording (R) comparison
+
+inline bool XMComparisonAllTrue(uint32_t CR) { return (((CR) & XM_CRMASK_CR6TRUE) == XM_CRMASK_CR6TRUE); }
+inline bool XMComparisonAnyTrue(uint32_t CR) { return (((CR) & XM_CRMASK_CR6FALSE) != XM_CRMASK_CR6FALSE); }
+inline bool XMComparisonAllFalse(uint32_t CR) { return (((CR) & XM_CRMASK_CR6FALSE) == XM_CRMASK_CR6FALSE); }
+inline bool XMComparisonAnyFalse(uint32_t CR) { return (((CR) & XM_CRMASK_CR6TRUE) != XM_CRMASK_CR6TRUE); }
+inline bool XMComparisonMixed(uint32_t CR) { return (((CR) & XM_CRMASK_CR6) == 0); }
+inline bool XMComparisonAllInBounds(uint32_t CR) { return (((CR) & XM_CRMASK_CR6BOUNDS) == XM_CRMASK_CR6BOUNDS); }
+inline bool XMComparisonAnyOutOfBounds(uint32_t CR) { return (((CR) & XM_CRMASK_CR6BOUNDS) != XM_CRMASK_CR6BOUNDS); }
+
+
+/****************************************************************************
+ *
+ * Data types
+ *
+ ****************************************************************************/
+
+#pragma warning(push)
+#pragma warning(disable:4068 4201 4365 4324 4820)
+
+#pragma prefast(push)
+#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes")
+
+#ifdef _XM_BIGENDIAN_
+#pragma bitfield_order(push)
+#pragma bitfield_order(lsb_to_msb)
+#endif
+
+//------------------------------------------------------------------------------
+#if defined(_XM_NO_INTRINSICS_) && !defined(_M_PPCBE)
+// The __vector4 structure is an intrinsic on Xbox but must be separately defined
+// for x86/x64
+struct __vector4
+{
+ union
+ {
+ float vector4_f32[4];
+ uint32_t vector4_u32[4];
+ // MGH - added to match 360 version
+//----------------------
+struct
+ {
+ float x;
+ float y;
+ float z;
+ float w;
+ };
+ float v[4];
+ uint32_t u[4];
+//----------------------
+
+ };
+};
+#endif // _XM_NO_INTRINSICS_
+
+//------------------------------------------------------------------------------
+#if (defined (_M_IX86) || defined(_M_AMD64) || defined(_M_ARM)) && defined(_XM_NO_INTRINSICS_)
+typedef uint32_t __vector4i[4];
+#else
+typedef __declspec(align(16)) uint32_t __vector4i[4];
+#endif
+
+//------------------------------------------------------------------------------
+// Vector intrinsic: Four 32 bit floating point components aligned on a 16 byte
+// boundary and mapped to hardware vector registers
+#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+typedef __m128 XMVECTOR;
+#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+typedef __n128 XMVECTOR;
+#else
+typedef __vector4 XMVECTOR;
+#endif
+
+// Fix-up for (1st-3rd) XMVECTOR parameters that are pass-in-register for x86, ARM, and Xbox 360; by reference otherwise
+#if ( defined(_M_IX86) || defined(_M_ARM) || defined(_XM_VMX128_INTRINSICS_) ) && !defined(_XM_NO_INTRINSICS_)
+typedef const XMVECTOR FXMVECTOR;
+#else
+typedef const XMVECTOR& FXMVECTOR;
+#endif
+
+// Fix-up for (4th) XMVECTOR parameter to pass in-register for ARM and Xbox 360; by reference otherwise
+#if ( defined(_M_ARM) || defined(_XM_VMX128_INTRINSICS_) ) && !defined(_XM_NO_INTRINSICS_)
+typedef const XMVECTOR GXMVECTOR;
+#else
+typedef const XMVECTOR& GXMVECTOR;
+#endif
+
+// Fix-up for (5th+) XMVECTOR parameters to pass in-register for Xbox 360 and by reference otherwise
+#if defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+typedef const XMVECTOR CXMVECTOR;
+#else
+typedef const XMVECTOR& CXMVECTOR;
+#endif
+
+//------------------------------------------------------------------------------
+// Conversion types for constants
+__declspec(align(16)) struct XMVECTORF32
+{
+ union
+ {
+ float f[4];
+ XMVECTOR v;
+ };
+
+ inline operator XMVECTOR() const { return v; }
+ inline operator const float*() const { return f; }
+#if !defined(_XM_NO_INTRINSICS_) && defined(_XM_SSE_INTRINSICS_)
+ inline operator __m128i() const { return _mm_castps_si128(v); }
+ inline operator __m128d() const { return _mm_castps_pd(v); }
+#endif
+};
+
+__declspec(align(16)) struct XMVECTORI32
+{
+ union
+ {
+ int32_t i[4];
+ XMVECTOR v;
+ };
+
+ inline operator XMVECTOR() const { return v; }
+#if !defined(_XM_NO_INTRINSICS_) && defined(_XM_SSE_INTRINSICS_)
+ inline operator __m128i() const { return _mm_castps_si128(v); }
+ inline operator __m128d() const { return _mm_castps_pd(v); }
+#endif
+};
+
+__declspec(align(16)) struct XMVECTORU8
+{
+ union
+ {
+ uint8_t u[16];
+ XMVECTOR v;
+ };
+
+ inline operator XMVECTOR() const { return v; }
+#if !defined(_XM_NO_INTRINSICS_) && defined(_XM_SSE_INTRINSICS_)
+ inline operator __m128i() const { return _mm_castps_si128(v); }
+ inline operator __m128d() const { return _mm_castps_pd(v); }
+#endif
+};
+
+__declspec(align(16)) struct XMVECTORU32
+{
+ union
+ {
+ uint32_t u[4];
+ XMVECTOR v;
+ };
+
+ inline operator XMVECTOR() const { return v; }
+#if !defined(_XM_NO_INTRINSICS_) && defined(_XM_SSE_INTRINSICS_)
+ inline operator __m128i() const { return _mm_castps_si128(v); }
+ inline operator __m128d() const { return _mm_castps_pd(v); }
+#endif
+};
+
+//------------------------------------------------------------------------------
+// Vector operators
+XMVECTOR operator+ (FXMVECTOR V);
+XMVECTOR operator- (FXMVECTOR V);
+
+XMVECTOR& operator+= (XMVECTOR& V1, FXMVECTOR V2);
+XMVECTOR& operator-= (XMVECTOR& V1, FXMVECTOR V2);
+XMVECTOR& operator*= (XMVECTOR& V1, FXMVECTOR V2);
+XMVECTOR& operator/= (XMVECTOR& V1, FXMVECTOR V2);
+XMVECTOR& operator*= (XMVECTOR& V, float S);
+XMVECTOR& operator/= (XMVECTOR& V, float S);
+
+XMVECTOR operator+ (FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR operator- (FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR operator* (FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR operator/ (FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR operator* (FXMVECTOR V, float S);
+XMVECTOR operator* (float S, FXMVECTOR V);
+XMVECTOR operator/ (FXMVECTOR V, float S);
+
+//------------------------------------------------------------------------------
+// Matrix type: Sixteen 32 bit floating point components aligned on a
+// 16 byte boundary and mapped to four hardware vector registers
+
+struct XMMATRIX;
+
+// Fix-up for XMMATRIX parameters to pass in-register on Xbox 360, by reference otherwise
+#if defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+typedef const XMMATRIX CXMMATRIX;
+#else
+typedef const XMMATRIX& CXMMATRIX;
+#endif
+
+#if (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_ARM)) && defined(_XM_NO_INTRINSICS_)
+struct XMMATRIX
+#else
+__declspec(align(16)) struct XMMATRIX
+#endif
+{
+#ifdef _XM_NO_INTRINSICS_
+ union
+ {
+ XMVECTOR r[4];
+ struct
+ {
+ float _11, _12, _13, _14;
+ float _21, _22, _23, _24;
+ float _31, _32, _33, _34;
+ float _41, _42, _43, _44;
+ };
+ float m[4][4];
+ };
+#else
+ XMVECTOR r[4];
+#endif
+
+ XMMATRIX() {}
+ XMMATRIX(FXMVECTOR R0, FXMVECTOR R1, FXMVECTOR R2, GXMVECTOR R3) { r[0] = R0; r[1] = R1; r[2] = R2; r[3] = R3; }
+ XMMATRIX(float m00, float m01, float m02, float m03,
+ float m10, float m11, float m12, float m13,
+ float m20, float m21, float m22, float m23,
+ float m30, float m31, float m32, float m33);
+ explicit XMMATRIX(_In_reads_(16) const float *pArray);
+
+#ifdef _XM_NO_INTRINSICS_
+ float operator() (size_t Row, size_t Column) const { return m[Row][Column]; }
+ float& operator() (size_t Row, size_t Column) { return m[Row][Column]; }
+#endif
+
+ XMMATRIX& operator= (const XMMATRIX& M) { r[0] = M.r[0]; r[1] = M.r[1]; r[2] = M.r[2]; r[3] = M.r[3]; return *this; }
+
+ XMMATRIX operator+ () const { return *this; }
+ XMMATRIX operator- () const;
+
+ XMMATRIX& operator+= (CXMMATRIX M);
+ XMMATRIX& operator-= (CXMMATRIX M);
+ XMMATRIX& operator*= (CXMMATRIX M);
+ XMMATRIX& operator*= (float S);
+ XMMATRIX& operator/= (float S);
+
+ XMMATRIX operator+ (CXMMATRIX M) const;
+ XMMATRIX operator- (CXMMATRIX M) const;
+ XMMATRIX operator* (CXMMATRIX M) const;
+ XMMATRIX operator* (float S) const;
+ XMMATRIX operator/ (float S) const;
+
+ friend XMMATRIX operator* (float S, CXMMATRIX M);
+};
+
+//------------------------------------------------------------------------------
+// 2D Vector; 32 bit floating point components
+struct XMFLOAT2
+{
+ float x;
+ float y;
+
+ XMFLOAT2() {}
+ XMFLOAT2(float _x, float _y) : x(_x), y(_y) {}
+ explicit XMFLOAT2(_In_reads_(2) const float *pArray) : x(pArray[0]), y(pArray[1]) {}
+
+ XMFLOAT2& operator= (const XMFLOAT2& Float2) { x = Float2.x; y = Float2.y; return *this; }
+};
+
+// 2D Vector; 32 bit floating point components aligned on a 16 byte boundary
+__declspec(align(16)) struct XMFLOAT2A : public XMFLOAT2
+{
+ XMFLOAT2A() : XMFLOAT2() {}
+ XMFLOAT2A(float _x, float _y) : XMFLOAT2(_x, _y) {}
+ explicit XMFLOAT2A(_In_reads_(2) const float *pArray) : XMFLOAT2(pArray) {}
+
+ XMFLOAT2A& operator= (const XMFLOAT2A& Float2) { x = Float2.x; y = Float2.y; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 2D Vector; 32 bit signed integer components
+struct XMINT2
+{
+ int32_t x;
+ int32_t y;
+
+ XMINT2() {}
+ XMINT2(int32_t _x, int32_t _y) : x(_x), y(_y) {}
+ explicit XMINT2(_In_reads_(2) const int32_t *pArray) : x(pArray[0]), y(pArray[1]) {}
+
+ XMINT2& operator= (const XMINT2& Int2) { x = Int2.x; y = Int2.y; return *this; }
+};
+
+// 2D Vector; 32 bit unsigned integer components
+struct XMUINT2
+{
+ uint32_t x;
+ uint32_t y;
+
+ XMUINT2() {}
+ XMUINT2(uint32_t _x, uint32_t _y) : x(_x), y(_y) {}
+ explicit XMUINT2(_In_reads_(2) const uint32_t *pArray) : x(pArray[0]), y(pArray[1]) {}
+
+ XMUINT2& operator= (const XMUINT2& UInt2) { x = UInt2.x; y = UInt2.y; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 3D Vector; 32 bit floating point components
+struct XMFLOAT3
+{
+ float x;
+ float y;
+ float z;
+
+ XMFLOAT3() {}
+ XMFLOAT3(float _x, float _y, float _z) : x(_x), y(_y), z(_z) {}
+ explicit XMFLOAT3(_In_reads_(3) const float *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]) {}
+
+ XMFLOAT3& operator= (const XMFLOAT3& Float3) { x = Float3.x; y = Float3.y; z = Float3.z; return *this; }
+};
+
+// 3D Vector; 32 bit floating point components aligned on a 16 byte boundary
+__declspec(align(16)) struct XMFLOAT3A : public XMFLOAT3
+{
+ XMFLOAT3A() : XMFLOAT3() {}
+ XMFLOAT3A(float _x, float _y, float _z) : XMFLOAT3(_x, _y, _z) {}
+ explicit XMFLOAT3A(_In_reads_(3) const float *pArray) : XMFLOAT3(pArray) {}
+
+ XMFLOAT3A& operator= (const XMFLOAT3A& Float3) { x = Float3.x; y = Float3.y; z = Float3.z; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 3D Vector; 32 bit signed integer components
+struct XMINT3
+{
+ int32_t x;
+ int32_t y;
+ int32_t z;
+
+ XMINT3() {}
+ XMINT3(int32_t _x, int32_t _y, int32_t _z) : x(_x), y(_y), z(_z) {}
+ explicit XMINT3(_In_reads_(3) const int32_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]) {}
+
+ XMINT3& operator= (const XMINT3& i3) { x = i3.x; y = i3.y; z = i3.z; return *this; }
+};
+
+// 3D Vector; 32 bit unsigned integer components
+struct XMUINT3
+{
+ uint32_t x;
+ uint32_t y;
+ uint32_t z;
+
+ XMUINT3() {}
+ XMUINT3(uint32_t _x, uint32_t _y, uint32_t _z) : x(_x), y(_y), z(_z) {}
+ explicit XMUINT3(_In_reads_(3) const uint32_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]) {}
+
+ XMUINT3& operator= (const XMUINT3& u3) { x = u3.x; y = u3.y; z = u3.z; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 4D Vector; 32 bit floating point components
+struct XMFLOAT4
+{
+ float x;
+ float y;
+ float z;
+ float w;
+
+ XMFLOAT4() {}
+ XMFLOAT4(float _x, float _y, float _z, float _w) : x(_x), y(_y), z(_z), w(_w) {}
+ explicit XMFLOAT4(_In_reads_(4) const float *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
+
+ XMFLOAT4& operator= (const XMFLOAT4& Float4) { x = Float4.x; y = Float4.y; z = Float4.z; w = Float4.w; return *this; }
+};
+
+// 4D Vector; 32 bit floating point components aligned on a 16 byte boundary
+__declspec(align(16)) struct XMFLOAT4A : public XMFLOAT4
+{
+ XMFLOAT4A() : XMFLOAT4() {}
+ XMFLOAT4A(float _x, float _y, float _z, float _w) : XMFLOAT4(_x, _y, _z, _w) {}
+ explicit XMFLOAT4A(_In_reads_(4) const float *pArray) : XMFLOAT4(pArray) {}
+
+ XMFLOAT4A& operator= (const XMFLOAT4A& Float4) { x = Float4.x; y = Float4.y; z = Float4.z; w = Float4.w; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 4D Vector; 32 bit signed integer components
+struct XMINT4
+{
+ int32_t x;
+ int32_t y;
+ int32_t z;
+ int32_t w;
+
+ XMINT4() {}
+ XMINT4(int32_t _x, int32_t _y, int32_t _z, int32_t _w) : x(_x), y(_y), z(_z), w(_w) {}
+ explicit XMINT4(_In_reads_(4) const int32_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
+
+ XMINT4& operator= (const XMINT4& Int4) { x = Int4.x; y = Int4.y; z = Int4.z; w = Int4.w; return *this; }
+};
+
+// 4D Vector; 32 bit unsigned integer components
+struct XMUINT4
+{
+ uint32_t x;
+ uint32_t y;
+ uint32_t z;
+ uint32_t w;
+
+ XMUINT4() {}
+ XMUINT4(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w) : x(_x), y(_y), z(_z), w(_w) {}
+ explicit XMUINT4(_In_reads_(4) const uint32_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
+
+ XMUINT4& operator= (const XMUINT4& UInt4) { x = UInt4.x; y = UInt4.y; z = UInt4.z; w = UInt4.w; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 3x3 Matrix: 32 bit floating point components
+struct XMFLOAT3X3
+{
+ union
+ {
+ struct
+ {
+ float _11, _12, _13;
+ float _21, _22, _23;
+ float _31, _32, _33;
+ };
+ float m[3][3];
+ };
+
+ XMFLOAT3X3() {}
+ XMFLOAT3X3(float m00, float m01, float m02,
+ float m10, float m11, float m12,
+ float m20, float m21, float m22);
+ explicit XMFLOAT3X3(_In_reads_(9) const float *pArray);
+
+ float operator() (size_t Row, size_t Column) const { return m[Row][Column]; }
+ float& operator() (size_t Row, size_t Column) { return m[Row][Column]; }
+
+ XMFLOAT3X3& operator= (const XMFLOAT3X3& Float3x3);
+};
+
+//------------------------------------------------------------------------------
+// 4x3 Matrix: 32 bit floating point components
+struct XMFLOAT4X3
+{
+ union
+ {
+ struct
+ {
+ float _11, _12, _13;
+ float _21, _22, _23;
+ float _31, _32, _33;
+ float _41, _42, _43;
+ };
+ float m[4][3];
+ };
+
+ XMFLOAT4X3() {}
+ XMFLOAT4X3(float m00, float m01, float m02,
+ float m10, float m11, float m12,
+ float m20, float m21, float m22,
+ float m30, float m31, float m32);
+ explicit XMFLOAT4X3(_In_reads_(12) const float *pArray);
+
+ float operator() (size_t Row, size_t Column) const { return m[Row][Column]; }
+ float& operator() (size_t Row, size_t Column) { return m[Row][Column]; }
+
+ XMFLOAT4X3& operator= (const XMFLOAT4X3& Float4x3);
+
+};
+
+// 4x3 Matrix: 32 bit floating point components aligned on a 16 byte boundary
+__declspec(align(16)) struct XMFLOAT4X3A : public XMFLOAT4X3
+{
+ XMFLOAT4X3A() : XMFLOAT4X3() {}
+ XMFLOAT4X3A(float m00, float m01, float m02,
+ float m10, float m11, float m12,
+ float m20, float m21, float m22,
+ float m30, float m31, float m32) :
+ XMFLOAT4X3(m00,m01,m02,m10,m11,m12,m20,m21,m22,m30,m31,m32) {}
+ explicit XMFLOAT4X3A(_In_reads_(12) const float *pArray) : XMFLOAT4X3(pArray) {}
+
+ float operator() (size_t Row, size_t Column) const { return m[Row][Column]; }
+ float& operator() (size_t Row, size_t Column) { return m[Row][Column]; }
+
+ XMFLOAT4X3A& operator= (const XMFLOAT4X3A& Float4x3);
+};
+
+//------------------------------------------------------------------------------
+// 4x4 Matrix: 32 bit floating point components
+struct XMFLOAT4X4
+{
+ union
+ {
+ struct
+ {
+ float _11, _12, _13, _14;
+ float _21, _22, _23, _24;
+ float _31, _32, _33, _34;
+ float _41, _42, _43, _44;
+ };
+ float m[4][4];
+ };
+
+ XMFLOAT4X4() {}
+ XMFLOAT4X4(float m00, float m01, float m02, float m03,
+ float m10, float m11, float m12, float m13,
+ float m20, float m21, float m22, float m23,
+ float m30, float m31, float m32, float m33);
+ explicit XMFLOAT4X4(_In_reads_(16) const float *pArray);
+
+ float operator() (size_t Row, size_t Column) const { return m[Row][Column]; }
+ float& operator() (size_t Row, size_t Column) { return m[Row][Column]; }
+
+ XMFLOAT4X4& operator= (const XMFLOAT4X4& Float4x4);
+};
+
+// 4x4 Matrix: 32 bit floating point components aligned on a 16 byte boundary
+__declspec(align(16)) struct XMFLOAT4X4A : public XMFLOAT4X4
+{
+ XMFLOAT4X4A() : XMFLOAT4X4() {}
+ XMFLOAT4X4A(float m00, float m01, float m02, float m03,
+ float m10, float m11, float m12, float m13,
+ float m20, float m21, float m22, float m23,
+ float m30, float m31, float m32, float m33)
+ : XMFLOAT4X4(m00,m01,m02,m03,m10,m11,m12,m13,m20,m21,m22,m23,m30,m31,m32,m33) {}
+ explicit XMFLOAT4X4A(_In_reads_(16) const float *pArray) : XMFLOAT4X4(pArray) {}
+
+ float operator() (size_t Row, size_t Column) const { return m[Row][Column]; }
+ float& operator() (size_t Row, size_t Column) { return m[Row][Column]; }
+
+ XMFLOAT4X4A& operator= (const XMFLOAT4X4A& Float4x4);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+#ifdef _XM_BIGENDIAN_
+#pragma bitfield_order(pop)
+#endif
+
+#pragma prefast(pop)
+#pragma warning(pop)
+
+/****************************************************************************
+ *
+ * Data conversion operations
+ *
+ ****************************************************************************/
+
+#if !defined(_XM_NO_INTRINSICS_) && defined(_XM_VMX128_INTRINSICS_)
+#else
+XMVECTOR XMConvertVectorIntToFloat(FXMVECTOR VInt, uint32_t DivExponent);
+XMVECTOR XMConvertVectorFloatToInt(FXMVECTOR VFloat, uint32_t MulExponent);
+XMVECTOR XMConvertVectorUIntToFloat(FXMVECTOR VUInt, uint32_t DivExponent);
+XMVECTOR XMConvertVectorFloatToUInt(FXMVECTOR VFloat, uint32_t MulExponent);
+#endif
+
+#if !defined(_XM_NO_INTRINSICS_) && defined(_XM_VMX128_INTRINSICS_)
+#else
+
+#if defined(__XNAMATH_H__) && defined(XMVectorSetBinaryConstant)
+#undef XMVectorSetBinaryConstant
+#undef XMVectorSplatConstant
+#undef XMVectorSplatConstantInt
+#endif
+
+XMVECTOR XMVectorSetBinaryConstant(uint32_t C0, uint32_t C1, uint32_t C2, uint32_t C3);
+XMVECTOR XMVectorSplatConstant(int32_t IntConstant, uint32_t DivExponent);
+XMVECTOR XMVectorSplatConstantInt(int32_t IntConstant);
+#endif
+
+/****************************************************************************
+ *
+ * Load operations
+ *
+ ****************************************************************************/
+
+XMVECTOR XMLoadInt(_In_ const uint32_t* pSource);
+XMVECTOR XMLoadFloat(_In_ const float* pSource);
+
+XMVECTOR XMLoadInt2(_In_reads_(2) const uint32_t* pSource);
+XMVECTOR XMLoadInt2A(_In_reads_(2) const uint32_t* PSource);
+XMVECTOR XMLoadFloat2(_In_ const XMFLOAT2* pSource);
+XMVECTOR XMLoadFloat2A(_In_ const XMFLOAT2A* pSource);
+XMVECTOR XMLoadSInt2(_In_ const XMINT2* pSource);
+XMVECTOR XMLoadUInt2(_In_ const XMUINT2* pSource);
+
+XMVECTOR XMLoadInt3(_In_reads_(3) const uint32_t* pSource);
+XMVECTOR XMLoadInt3A(_In_reads_(3) const uint32_t* pSource);
+XMVECTOR XMLoadFloat3(_In_ const XMFLOAT3* pSource);
+XMVECTOR XMLoadFloat3A(_In_ const XMFLOAT3A* pSource);
+XMVECTOR XMLoadSInt3(_In_ const XMINT3* pSource);
+XMVECTOR XMLoadUInt3(_In_ const XMUINT3* pSource);
+
+XMVECTOR XMLoadInt4(_In_reads_(4) const uint32_t* pSource);
+XMVECTOR XMLoadInt4A(_In_reads_(4) const uint32_t* pSource);
+XMVECTOR XMLoadFloat4(_In_ const XMFLOAT4* pSource);
+XMVECTOR XMLoadFloat4A(_In_ const XMFLOAT4A* pSource);
+XMVECTOR XMLoadSInt4(_In_ const XMINT4* pSource);
+XMVECTOR XMLoadUInt4(_In_ const XMUINT4* pSource);
+
+XMMATRIX XMLoadFloat3x3(_In_ const XMFLOAT3X3* pSource);
+XMMATRIX XMLoadFloat4x3(_In_ const XMFLOAT4X3* pSource);
+XMMATRIX XMLoadFloat4x3A(_In_ const XMFLOAT4X3A* pSource);
+XMMATRIX XMLoadFloat4x4(_In_ const XMFLOAT4X4* pSource);
+XMMATRIX XMLoadFloat4x4A(_In_ const XMFLOAT4X4A* pSource);
+
+/****************************************************************************
+ *
+ * Store operations
+ *
+ ****************************************************************************/
+
+void XMStoreInt(_Out_ uint32_t* pDestination, _In_ FXMVECTOR V);
+void XMStoreFloat(_Out_ float* pDestination, _In_ FXMVECTOR V);
+
+void XMStoreInt2(_Out_writes_(2) uint32_t* pDestination, _In_ FXMVECTOR V);
+void XMStoreInt2A(_Out_writes_(2) uint32_t* pDestination, _In_ FXMVECTOR V);
+void XMStoreFloat2(_Out_ XMFLOAT2* pDestination, _In_ FXMVECTOR V);
+void XMStoreFloat2A(_Out_ XMFLOAT2A* pDestination, _In_ FXMVECTOR V);
+void XMStoreSInt2(_Out_ XMINT2* pDestination, _In_ FXMVECTOR V);
+void XMStoreUInt2(_Out_ XMUINT2* pDestination, _In_ FXMVECTOR V);
+
+void XMStoreInt3(_Out_writes_(3) uint32_t* pDestination, _In_ FXMVECTOR V);
+void XMStoreInt3A(_Out_writes_(3) uint32_t* pDestination, _In_ FXMVECTOR V);
+void XMStoreFloat3(_Out_ XMFLOAT3* pDestination, _In_ FXMVECTOR V);
+void XMStoreFloat3A(_Out_ XMFLOAT3A* pDestination, _In_ FXMVECTOR V);
+void XMStoreSInt3(_Out_ XMINT3* pDestination, _In_ FXMVECTOR V);
+void XMStoreUInt3(_Out_ XMUINT3* pDestination, _In_ FXMVECTOR V);
+
+void XMStoreInt4(_Out_writes_(4) uint32_t* pDestination, _In_ FXMVECTOR V);
+void XMStoreInt4A(_Out_writes_(4) uint32_t* pDestination, _In_ FXMVECTOR V);
+void XMStoreFloat4(_Out_ XMFLOAT4* pDestination, _In_ FXMVECTOR V);
+void XMStoreFloat4A(_Out_ XMFLOAT4A* pDestination, _In_ FXMVECTOR V);
+void XMStoreSInt4(_Out_ XMINT4* pDestination, _In_ FXMVECTOR V);
+void XMStoreUInt4(_Out_ XMUINT4* pDestination, _In_ FXMVECTOR V);
+
+void XMStoreFloat3x3(_Out_ XMFLOAT3X3* pDestination, _In_ CXMMATRIX M);
+void XMStoreFloat4x3(_Out_ XMFLOAT4X3* pDestination, _In_ CXMMATRIX M);
+void XMStoreFloat4x3A(_Out_ XMFLOAT4X3A* pDestination, _In_ CXMMATRIX M);
+void XMStoreFloat4x4(_Out_ XMFLOAT4X4* pDestination, _In_ CXMMATRIX M);
+void XMStoreFloat4x4A(_Out_ XMFLOAT4X4A* pDestination, _In_ CXMMATRIX M);
+
+/****************************************************************************
+ *
+ * General vector operations
+ *
+ ****************************************************************************/
+
+XMVECTOR XMVectorZero();
+XMVECTOR XMVectorSet(float x, float y, float z, float w);
+XMVECTOR XMVectorSetInt(uint32_t x, uint32_t y, uint32_t z, uint32_t w);
+XMVECTOR XMVectorReplicate(float Value);
+XMVECTOR XMVectorReplicatePtr(_In_ const float *pValue);
+XMVECTOR XMVectorReplicateInt(uint32_t Value);
+XMVECTOR XMVectorReplicateIntPtr(_In_ const uint32_t *pValue);
+XMVECTOR XMVectorTrueInt();
+XMVECTOR XMVectorFalseInt();
+XMVECTOR XMVectorSplatX(FXMVECTOR V);
+XMVECTOR XMVectorSplatY(FXMVECTOR V);
+XMVECTOR XMVectorSplatZ(FXMVECTOR V);
+XMVECTOR XMVectorSplatW(FXMVECTOR V);
+XMVECTOR XMVectorSplatOne();
+XMVECTOR XMVectorSplatInfinity();
+XMVECTOR XMVectorSplatQNaN();
+XMVECTOR XMVectorSplatEpsilon();
+XMVECTOR XMVectorSplatSignMask();
+
+float XMVectorGetByIndex(FXMVECTOR V, size_t i);
+float XMVectorGetX(FXMVECTOR V);
+float XMVectorGetY(FXMVECTOR V);
+float XMVectorGetZ(FXMVECTOR V);
+float XMVectorGetW(FXMVECTOR V);
+
+void XMVectorGetByIndexPtr(_Out_ float *f, _In_ FXMVECTOR V, _In_ size_t i);
+void XMVectorGetXPtr(_Out_ float *x, _In_ FXMVECTOR V);
+void XMVectorGetYPtr(_Out_ float *y, _In_ FXMVECTOR V);
+void XMVectorGetZPtr(_Out_ float *z, _In_ FXMVECTOR V);
+void XMVectorGetWPtr(_Out_ float *w, _In_ FXMVECTOR V);
+
+uint32_t XMVectorGetIntByIndex(FXMVECTOR V, size_t i);
+uint32_t XMVectorGetIntX(FXMVECTOR V);
+uint32_t XMVectorGetIntY(FXMVECTOR V);
+uint32_t XMVectorGetIntZ(FXMVECTOR V);
+uint32_t XMVectorGetIntW(FXMVECTOR V);
+
+void XMVectorGetIntByIndexPtr(_Out_ uint32_t *x, _In_ FXMVECTOR V, _In_ size_t i);
+void XMVectorGetIntXPtr(_Out_ uint32_t *x, _In_ FXMVECTOR V);
+void XMVectorGetIntYPtr(_Out_ uint32_t *y, _In_ FXMVECTOR V);
+void XMVectorGetIntZPtr(_Out_ uint32_t *z, _In_ FXMVECTOR V);
+void XMVectorGetIntWPtr(_Out_ uint32_t *w, _In_ FXMVECTOR V);
+
+XMVECTOR XMVectorSetByIndex(FXMVECTOR V,float f, size_t i);
+XMVECTOR XMVectorSetX(FXMVECTOR V, float x);
+XMVECTOR XMVectorSetY(FXMVECTOR V, float y);
+XMVECTOR XMVectorSetZ(FXMVECTOR V, float z);
+XMVECTOR XMVectorSetW(FXMVECTOR V, float w);
+
+XMVECTOR XMVectorSetByIndexPtr(_In_ FXMVECTOR V, _In_ const float *f, _In_ size_t i);
+XMVECTOR XMVectorSetXPtr(_In_ FXMVECTOR V, _In_ const float *x);
+XMVECTOR XMVectorSetYPtr(_In_ FXMVECTOR V, _In_ const float *y);
+XMVECTOR XMVectorSetZPtr(_In_ FXMVECTOR V, _In_ const float *z);
+XMVECTOR XMVectorSetWPtr(_In_ FXMVECTOR V, _In_ const float *w);
+
+XMVECTOR XMVectorSetIntByIndex(FXMVECTOR V, uint32_t x, size_t i);
+XMVECTOR XMVectorSetIntX(FXMVECTOR V, uint32_t x);
+XMVECTOR XMVectorSetIntY(FXMVECTOR V, uint32_t y);
+XMVECTOR XMVectorSetIntZ(FXMVECTOR V, uint32_t z);
+XMVECTOR XMVectorSetIntW(FXMVECTOR V, uint32_t w);
+
+XMVECTOR XMVectorSetIntByIndexPtr(_In_ FXMVECTOR V, _In_ const uint32_t *x, _In_ size_t i);
+XMVECTOR XMVectorSetIntXPtr(_In_ FXMVECTOR V, _In_ const uint32_t *x);
+XMVECTOR XMVectorSetIntYPtr(_In_ FXMVECTOR V, _In_ const uint32_t *y);
+XMVECTOR XMVectorSetIntZPtr(_In_ FXMVECTOR V, _In_ const uint32_t *z);
+XMVECTOR XMVectorSetIntWPtr(_In_ FXMVECTOR V, _In_ const uint32_t *w);
+
+#if defined(__XNAMATH_H__) && defined(XMVectorSwizzle)
+#undef XMVectorSwizzle
+#endif
+
+XMVECTOR XMVectorSwizzle(FXMVECTOR V, uint32_t E0, uint32_t E1, uint32_t E2, uint32_t E3);
+XMVECTOR XMVectorPermute(FXMVECTOR V1, FXMVECTOR V2, uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW);
+XMVECTOR XMVectorSelectControl(uint32_t VectorIndex0, uint32_t VectorIndex1, uint32_t VectorIndex2, uint32_t VectorIndex3);
+XMVECTOR XMVectorSelect(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR Control);
+XMVECTOR XMVectorMergeXY(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorMergeZW(FXMVECTOR V1, FXMVECTOR V2);
+
+#if defined(__XNAMATH_H__) && defined(XMVectorShiftLeft)
+#undef XMVectorShiftLeft
+#undef XMVectorRotateLeft
+#undef XMVectorRotateRight
+#undef XMVectorInsert
+#endif
+
+XMVECTOR XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2, uint32_t Elements);
+XMVECTOR XMVectorRotateLeft(FXMVECTOR V, uint32_t Elements);
+XMVECTOR XMVectorRotateRight(FXMVECTOR V, uint32_t Elements);
+XMVECTOR XMVectorInsert(FXMVECTOR VD, FXMVECTOR VS, uint32_t VSLeftRotateElements,
+ uint32_t Select0, uint32_t Select1, uint32_t Select2, uint32_t Select3);
+
+XMVECTOR XMVectorEqual(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorEqualR(_Out_ uint32_t* pCR, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2);
+XMVECTOR XMVectorEqualInt(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorEqualIntR(_Out_ uint32_t* pCR, _In_ FXMVECTOR V, _In_ FXMVECTOR V2);
+XMVECTOR XMVectorNearEqual(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR Epsilon);
+XMVECTOR XMVectorNotEqual(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorNotEqualInt(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorGreater(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorGreaterR(_Out_ uint32_t* pCR, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2);
+XMVECTOR XMVectorGreaterOrEqual(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorGreaterOrEqualR(_Out_ uint32_t* pCR, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2);
+XMVECTOR XMVectorLess(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorLessOrEqual(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorInBounds(FXMVECTOR V, FXMVECTOR Bounds);
+XMVECTOR XMVectorInBoundsR(_Out_ uint32_t* pCR, _In_ FXMVECTOR V, _In_ FXMVECTOR Bounds);
+
+XMVECTOR XMVectorIsNaN(FXMVECTOR V);
+XMVECTOR XMVectorIsInfinite(FXMVECTOR V);
+
+XMVECTOR XMVectorMin(FXMVECTOR V1,FXMVECTOR V2);
+XMVECTOR XMVectorMax(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorRound(FXMVECTOR V);
+XMVECTOR XMVectorTruncate(FXMVECTOR V);
+XMVECTOR XMVectorFloor(FXMVECTOR V);
+XMVECTOR XMVectorCeiling(FXMVECTOR V);
+XMVECTOR XMVectorClamp(FXMVECTOR V, FXMVECTOR Min, FXMVECTOR Max);
+XMVECTOR XMVectorSaturate(FXMVECTOR V);
+
+XMVECTOR XMVectorAndInt(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorAndCInt(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorOrInt(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorNorInt(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorXorInt(FXMVECTOR V1, FXMVECTOR V2);
+
+XMVECTOR XMVectorNegate(FXMVECTOR V);
+XMVECTOR XMVectorAdd(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorAddAngles(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorSubtract(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorSubtractAngles(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorMultiply(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorMultiplyAdd(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR V3);
+XMVECTOR XMVectorDivide(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorNegativeMultiplySubtract(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR V3);
+XMVECTOR XMVectorScale(FXMVECTOR V, float ScaleFactor);
+XMVECTOR XMVectorReciprocalEst(FXMVECTOR V);
+XMVECTOR XMVectorReciprocal(FXMVECTOR V);
+XMVECTOR XMVectorSqrtEst(FXMVECTOR V);
+XMVECTOR XMVectorSqrt(FXMVECTOR V);
+XMVECTOR XMVectorReciprocalSqrtEst(FXMVECTOR V);
+XMVECTOR XMVectorReciprocalSqrt(FXMVECTOR V);
+XMVECTOR XMVectorExp(FXMVECTOR V);
+XMVECTOR XMVectorLog(FXMVECTOR V);
+XMVECTOR XMVectorPow(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorAbs(FXMVECTOR V);
+XMVECTOR XMVectorMod(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVectorModAngles(FXMVECTOR Angles);
+XMVECTOR XMVectorSin(FXMVECTOR V);
+XMVECTOR XMVectorSinEst(FXMVECTOR V);
+XMVECTOR XMVectorCos(FXMVECTOR V);
+XMVECTOR XMVectorCosEst(FXMVECTOR V);
+void XMVectorSinCos(_Out_ XMVECTOR* pSin, _Out_ XMVECTOR* pCos, _In_ FXMVECTOR V);
+void XMVectorSinCosEst(_Out_ XMVECTOR* pSin, _Out_ XMVECTOR* pCos, _In_ FXMVECTOR V);
+XMVECTOR XMVectorTan(FXMVECTOR V);
+XMVECTOR XMVectorTanEst(FXMVECTOR V);
+XMVECTOR XMVectorSinH(FXMVECTOR V);
+XMVECTOR XMVectorCosH(FXMVECTOR V);
+XMVECTOR XMVectorTanH(FXMVECTOR V);
+XMVECTOR XMVectorASin(FXMVECTOR V);
+XMVECTOR XMVectorASinEst(FXMVECTOR V);
+XMVECTOR XMVectorACos(FXMVECTOR V);
+XMVECTOR XMVectorACosEst(FXMVECTOR V);
+XMVECTOR XMVectorATan(FXMVECTOR V);
+XMVECTOR XMVectorATanEst(FXMVECTOR V);
+XMVECTOR XMVectorATan2(FXMVECTOR Y, FXMVECTOR X);
+XMVECTOR XMVectorATan2Est(FXMVECTOR Y, FXMVECTOR X);
+XMVECTOR XMVectorLerp(FXMVECTOR V0, FXMVECTOR V1, float t);
+XMVECTOR XMVectorLerpV(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR T);
+XMVECTOR XMVectorHermite(FXMVECTOR Position0, FXMVECTOR Tangent0, FXMVECTOR Position1, GXMVECTOR Tangent1, float t);
+XMVECTOR XMVectorHermiteV(FXMVECTOR Position0, FXMVECTOR Tangent0, FXMVECTOR Position1, GXMVECTOR Tangent1, CXMVECTOR T);
+XMVECTOR XMVectorCatmullRom(FXMVECTOR Position0, FXMVECTOR Position1, FXMVECTOR Position2, GXMVECTOR Position3, float t);
+XMVECTOR XMVectorCatmullRomV(FXMVECTOR Position0, FXMVECTOR Position1, FXMVECTOR Position2, GXMVECTOR Position3, CXMVECTOR T);
+XMVECTOR XMVectorBaryCentric(FXMVECTOR Position0, FXMVECTOR Position1, FXMVECTOR Position2, float f, float g);
+XMVECTOR XMVectorBaryCentricV(FXMVECTOR Position0, FXMVECTOR Position1, FXMVECTOR Position2, GXMVECTOR F, CXMVECTOR G);
+
+/****************************************************************************
+ *
+ * 2D vector operations
+ *
+ ****************************************************************************/
+
+bool XMVector2Equal(FXMVECTOR V1, FXMVECTOR V2);
+uint32_t XMVector2EqualR(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector2EqualInt(FXMVECTOR V1, FXMVECTOR V2);
+uint32_t XMVector2EqualIntR(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector2NearEqual(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR Epsilon);
+bool XMVector2NotEqual(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector2NotEqualInt(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector2Greater(FXMVECTOR V1, FXMVECTOR V2);
+uint32_t XMVector2GreaterR(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector2GreaterOrEqual(FXMVECTOR V1, FXMVECTOR V2);
+uint32_t XMVector2GreaterOrEqualR(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector2Less(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector2LessOrEqual(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector2InBounds(FXMVECTOR V, FXMVECTOR Bounds);
+
+bool XMVector2IsNaN(FXMVECTOR V);
+bool XMVector2IsInfinite(FXMVECTOR V);
+
+XMVECTOR XMVector2Dot(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVector2Cross(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVector2LengthSq(FXMVECTOR V);
+XMVECTOR XMVector2ReciprocalLengthEst(FXMVECTOR V);
+XMVECTOR XMVector2ReciprocalLength(FXMVECTOR V);
+XMVECTOR XMVector2LengthEst(FXMVECTOR V);
+XMVECTOR XMVector2Length(FXMVECTOR V);
+XMVECTOR XMVector2NormalizeEst(FXMVECTOR V);
+XMVECTOR XMVector2Normalize(FXMVECTOR V);
+XMVECTOR XMVector2ClampLength(FXMVECTOR V, float LengthMin, float LengthMax);
+XMVECTOR XMVector2ClampLengthV(FXMVECTOR V, FXMVECTOR LengthMin, FXMVECTOR LengthMax);
+XMVECTOR XMVector2Reflect(FXMVECTOR Incident, FXMVECTOR Normal);
+XMVECTOR XMVector2Refract(FXMVECTOR Incident, FXMVECTOR Normal, float RefractionIndex);
+XMVECTOR XMVector2RefractV(FXMVECTOR Incident, FXMVECTOR Normal, FXMVECTOR RefractionIndex);
+XMVECTOR XMVector2Orthogonal(FXMVECTOR V);
+XMVECTOR XMVector2AngleBetweenNormalsEst(FXMVECTOR N1, FXMVECTOR N2);
+XMVECTOR XMVector2AngleBetweenNormals(FXMVECTOR N1, FXMVECTOR N2);
+XMVECTOR XMVector2AngleBetweenVectors(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVector2LinePointDistance(FXMVECTOR LinePoint1, FXMVECTOR LinePoint2, FXMVECTOR Point);
+XMVECTOR XMVector2IntersectLine(FXMVECTOR Line1Point1, FXMVECTOR Line1Point2, FXMVECTOR Line2Point1, GXMVECTOR Line2Point2);
+XMVECTOR XMVector2Transform(FXMVECTOR V, CXMMATRIX M);
+XMFLOAT4* XMVector2TransformStream(_Out_writes_bytes_(sizeof(XMFLOAT4)+OutputStride*(VectorCount-1)) XMFLOAT4* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(sizeof(XMFLOAT2)+InputStride*(VectorCount-1)) const XMFLOAT2* pInputStream,
+ _In_ size_t InputStride, _In_ size_t VectorCount, _In_ CXMMATRIX M);
+XMVECTOR XMVector2TransformCoord(FXMVECTOR V, CXMMATRIX M);
+XMFLOAT2* XMVector2TransformCoordStream(_Out_writes_bytes_(sizeof(XMFLOAT2)+OutputStride*(VectorCount-1)) XMFLOAT2* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(sizeof(XMFLOAT2)+InputStride*(VectorCount-1)) const XMFLOAT2* pInputStream,
+ _In_ size_t InputStride, _In_ size_t VectorCount, _In_ CXMMATRIX M);
+XMVECTOR XMVector2TransformNormal(FXMVECTOR V, CXMMATRIX M);
+XMFLOAT2* XMVector2TransformNormalStream(_Out_writes_bytes_(sizeof(XMFLOAT2)+OutputStride*(VectorCount-1)) XMFLOAT2* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(sizeof(XMFLOAT2)+InputStride*(VectorCount-1)) const XMFLOAT2* pInputStream,
+ _In_ size_t InputStride, _In_ size_t VectorCount, _In_ CXMMATRIX M);
+
+/****************************************************************************
+ *
+ * 3D vector operations
+ *
+ ****************************************************************************/
+
+bool XMVector3Equal(FXMVECTOR V1, FXMVECTOR V2);
+uint32_t XMVector3EqualR(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector3EqualInt(FXMVECTOR V1, FXMVECTOR V2);
+uint32_t XMVector3EqualIntR(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector3NearEqual(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR Epsilon);
+bool XMVector3NotEqual(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector3NotEqualInt(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector3Greater(FXMVECTOR V1, FXMVECTOR V2);
+uint32_t XMVector3GreaterR(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector3GreaterOrEqual(FXMVECTOR V1, FXMVECTOR V2);
+uint32_t XMVector3GreaterOrEqualR(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector3Less(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector3LessOrEqual(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector3InBounds(FXMVECTOR V, FXMVECTOR Bounds);
+
+bool XMVector3IsNaN(FXMVECTOR V);
+bool XMVector3IsInfinite(FXMVECTOR V);
+
+XMVECTOR XMVector3Dot(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVector3Cross(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVector3LengthSq(FXMVECTOR V);
+XMVECTOR XMVector3ReciprocalLengthEst(FXMVECTOR V);
+XMVECTOR XMVector3ReciprocalLength(FXMVECTOR V);
+XMVECTOR XMVector3LengthEst(FXMVECTOR V);
+XMVECTOR XMVector3Length(FXMVECTOR V);
+XMVECTOR XMVector3NormalizeEst(FXMVECTOR V);
+XMVECTOR XMVector3Normalize(FXMVECTOR V);
+XMVECTOR XMVector3ClampLength(FXMVECTOR V, float LengthMin, float LengthMax);
+XMVECTOR XMVector3ClampLengthV(FXMVECTOR V, FXMVECTOR LengthMin, FXMVECTOR LengthMax);
+XMVECTOR XMVector3Reflect(FXMVECTOR Incident, FXMVECTOR Normal);
+XMVECTOR XMVector3Refract(FXMVECTOR Incident, FXMVECTOR Normal, float RefractionIndex);
+XMVECTOR XMVector3RefractV(FXMVECTOR Incident, FXMVECTOR Normal, FXMVECTOR RefractionIndex);
+XMVECTOR XMVector3Orthogonal(FXMVECTOR V);
+XMVECTOR XMVector3AngleBetweenNormalsEst(FXMVECTOR N1, FXMVECTOR N2);
+XMVECTOR XMVector3AngleBetweenNormals(FXMVECTOR N1, FXMVECTOR N2);
+XMVECTOR XMVector3AngleBetweenVectors(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVector3LinePointDistance(FXMVECTOR LinePoint1, FXMVECTOR LinePoint2, FXMVECTOR Point);
+void XMVector3ComponentsFromNormal(_Out_ XMVECTOR* pParallel, _Out_ XMVECTOR* pPerpendicular, _In_ FXMVECTOR V, _In_ FXMVECTOR Normal);
+XMVECTOR XMVector3Rotate(FXMVECTOR V, FXMVECTOR RotationQuaternion);
+XMVECTOR XMVector3InverseRotate(FXMVECTOR V, FXMVECTOR RotationQuaternion);
+XMVECTOR XMVector3Transform(FXMVECTOR V, CXMMATRIX M);
+XMFLOAT4* XMVector3TransformStream(_Out_writes_bytes_(sizeof(XMFLOAT4)+OutputStride*(VectorCount-1)) XMFLOAT4* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(sizeof(XMFLOAT3)+InputStride*(VectorCount-1)) const XMFLOAT3* pInputStream,
+ _In_ size_t InputStride, _In_ size_t VectorCount, _In_ CXMMATRIX M);
+XMVECTOR XMVector3TransformCoord(FXMVECTOR V, CXMMATRIX M);
+XMFLOAT3* XMVector3TransformCoordStream(_Out_writes_bytes_(sizeof(XMFLOAT3)+OutputStride*(VectorCount-1)) XMFLOAT3* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(sizeof(XMFLOAT3)+InputStride*(VectorCount-1)) const XMFLOAT3* pInputStream,
+ _In_ size_t InputStride, _In_ size_t VectorCount, _In_ CXMMATRIX M);
+XMVECTOR XMVector3TransformNormal(FXMVECTOR V, CXMMATRIX M);
+XMFLOAT3* XMVector3TransformNormalStream(_Out_writes_bytes_(sizeof(XMFLOAT3)+OutputStride*(VectorCount-1)) XMFLOAT3* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(sizeof(XMFLOAT3)+InputStride*(VectorCount-1)) const XMFLOAT3* pInputStream,
+ _In_ size_t InputStride, _In_ size_t VectorCount, _In_ CXMMATRIX M);
+XMVECTOR XMVector3Project(FXMVECTOR V, float ViewportX, float ViewportY, float ViewportWidth, float ViewportHeight, float ViewportMinZ, float ViewportMaxZ,
+ CXMMATRIX Projection, CXMMATRIX View, CXMMATRIX World);
+XMFLOAT3* XMVector3ProjectStream(_Out_writes_bytes_(sizeof(XMFLOAT3)+OutputStride*(VectorCount-1)) XMFLOAT3* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(sizeof(XMFLOAT3)+InputStride*(VectorCount-1)) const XMFLOAT3* pInputStream,
+ _In_ size_t InputStride, _In_ size_t VectorCount,
+ _In_ float ViewportX, _In_ float ViewportY, _In_ float ViewportWidth, _In_ float ViewportHeight, _In_ float ViewportMinZ, _In_ float ViewportMaxZ,
+ _In_ CXMMATRIX Projection, _In_ CXMMATRIX View, _In_ CXMMATRIX World);
+XMVECTOR XMVector3Unproject(FXMVECTOR V, float ViewportX, float ViewportY, float ViewportWidth, float ViewportHeight, float ViewportMinZ, float ViewportMaxZ,
+ CXMMATRIX Projection, CXMMATRIX View, CXMMATRIX World);
+XMFLOAT3* XMVector3UnprojectStream(_Out_writes_bytes_(sizeof(XMFLOAT3)+OutputStride*(VectorCount-1)) XMFLOAT3* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(sizeof(XMFLOAT3)+InputStride*(VectorCount-1)) const XMFLOAT3* pInputStream,
+ _In_ size_t InputStride, _In_ size_t VectorCount,
+ _In_ float ViewportX, _In_ float ViewportY, _In_ float ViewportWidth, _In_ float ViewportHeight, _In_ float ViewportMinZ, _In_ float ViewportMaxZ,
+ _In_ CXMMATRIX Projection, _In_ CXMMATRIX View, _In_ CXMMATRIX World);
+
+/****************************************************************************
+ *
+ * 4D vector operations
+ *
+ ****************************************************************************/
+
+bool XMVector4Equal(FXMVECTOR V1, FXMVECTOR V2);
+uint32_t XMVector4EqualR(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector4EqualInt(FXMVECTOR V1, FXMVECTOR V2);
+uint32_t XMVector4EqualIntR(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector4NearEqual(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR Epsilon);
+bool XMVector4NotEqual(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector4NotEqualInt(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector4Greater(FXMVECTOR V1, FXMVECTOR V2);
+uint32_t XMVector4GreaterR(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector4GreaterOrEqual(FXMVECTOR V1, FXMVECTOR V2);
+uint32_t XMVector4GreaterOrEqualR(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector4Less(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector4LessOrEqual(FXMVECTOR V1, FXMVECTOR V2);
+bool XMVector4InBounds(FXMVECTOR V, FXMVECTOR Bounds);
+
+bool XMVector4IsNaN(FXMVECTOR V);
+bool XMVector4IsInfinite(FXMVECTOR V);
+
+XMVECTOR XMVector4Dot(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVector4Cross(FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR V3);
+XMVECTOR XMVector4LengthSq(FXMVECTOR V);
+XMVECTOR XMVector4ReciprocalLengthEst(FXMVECTOR V);
+XMVECTOR XMVector4ReciprocalLength(FXMVECTOR V);
+XMVECTOR XMVector4LengthEst(FXMVECTOR V);
+XMVECTOR XMVector4Length(FXMVECTOR V);
+XMVECTOR XMVector4NormalizeEst(FXMVECTOR V);
+XMVECTOR XMVector4Normalize(FXMVECTOR V);
+XMVECTOR XMVector4ClampLength(FXMVECTOR V, float LengthMin, float LengthMax);
+XMVECTOR XMVector4ClampLengthV(FXMVECTOR V, FXMVECTOR LengthMin, FXMVECTOR LengthMax);
+XMVECTOR XMVector4Reflect(FXMVECTOR Incident, FXMVECTOR Normal);
+XMVECTOR XMVector4Refract(FXMVECTOR Incident, FXMVECTOR Normal, float RefractionIndex);
+XMVECTOR XMVector4RefractV(FXMVECTOR Incident, FXMVECTOR Normal, FXMVECTOR RefractionIndex);
+XMVECTOR XMVector4Orthogonal(FXMVECTOR V);
+XMVECTOR XMVector4AngleBetweenNormalsEst(FXMVECTOR N1, FXMVECTOR N2);
+XMVECTOR XMVector4AngleBetweenNormals(FXMVECTOR N1, FXMVECTOR N2);
+XMVECTOR XMVector4AngleBetweenVectors(FXMVECTOR V1, FXMVECTOR V2);
+XMVECTOR XMVector4Transform(FXMVECTOR V, CXMMATRIX M);
+XMFLOAT4* XMVector4TransformStream(_Out_writes_bytes_(sizeof(XMFLOAT4)+OutputStride*(VectorCount-1)) XMFLOAT4* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(sizeof(XMFLOAT4)+InputStride*(VectorCount-1)) const XMFLOAT4* pInputStream,
+ _In_ size_t InputStride, _In_ size_t VectorCount, _In_ CXMMATRIX M);
+
+/****************************************************************************
+ *
+ * Matrix operations
+ *
+ ****************************************************************************/
+
+bool XMMatrixIsNaN(CXMMATRIX M);
+bool XMMatrixIsInfinite(CXMMATRIX M);
+bool XMMatrixIsIdentity(CXMMATRIX M);
+
+XMMATRIX XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2);
+XMMATRIX XMMatrixMultiplyTranspose(CXMMATRIX M1, CXMMATRIX M2);
+XMMATRIX XMMatrixTranspose(CXMMATRIX M);
+XMMATRIX XMMatrixInverse(_Out_opt_ XMVECTOR* pDeterminant, _In_ CXMMATRIX M);
+XMVECTOR XMMatrixDeterminant(CXMMATRIX M);
+_Success_(return)
+bool XMMatrixDecompose(_Out_ XMVECTOR *outScale, _Out_ XMVECTOR *outRotQuat, _Out_ XMVECTOR *outTrans, _In_ CXMMATRIX M);
+
+XMMATRIX XMMatrixIdentity();
+XMMATRIX XMMatrixSet(float m00, float m01, float m02, float m03,
+ float m10, float m11, float m12, float m13,
+ float m20, float m21, float m22, float m23,
+ float m30, float m31, float m32, float m33);
+XMMATRIX XMMatrixTranslation(float OffsetX, float OffsetY, float OffsetZ);
+XMMATRIX XMMatrixTranslationFromVector(FXMVECTOR Offset);
+XMMATRIX XMMatrixScaling(float ScaleX, float ScaleY, float ScaleZ);
+XMMATRIX XMMatrixScalingFromVector(FXMVECTOR Scale);
+XMMATRIX XMMatrixRotationX(float Angle);
+XMMATRIX XMMatrixRotationY(float Angle);
+XMMATRIX XMMatrixRotationZ(float Angle);
+XMMATRIX XMMatrixRotationRollPitchYaw(float Pitch, float Yaw, float Roll);
+XMMATRIX XMMatrixRotationRollPitchYawFromVector(FXMVECTOR Angles);
+XMMATRIX XMMatrixRotationNormal(FXMVECTOR NormalAxis, float Angle);
+XMMATRIX XMMatrixRotationAxis(FXMVECTOR Axis, float Angle);
+XMMATRIX XMMatrixRotationQuaternion(FXMVECTOR Quaternion);
+XMMATRIX XMMatrixTransformation2D(FXMVECTOR ScalingOrigin, float ScalingOrientation, FXMVECTOR Scaling,
+ FXMVECTOR RotationOrigin, float Rotation, GXMVECTOR Translation);
+XMMATRIX XMMatrixTransformation(FXMVECTOR ScalingOrigin, FXMVECTOR ScalingOrientationQuaternion, FXMVECTOR Scaling,
+ GXMVECTOR RotationOrigin, CXMVECTOR RotationQuaternion, CXMVECTOR Translation);
+XMMATRIX XMMatrixAffineTransformation2D(FXMVECTOR Scaling, FXMVECTOR RotationOrigin, float Rotation, FXMVECTOR Translation);
+XMMATRIX XMMatrixAffineTransformation(FXMVECTOR Scaling, FXMVECTOR RotationOrigin, FXMVECTOR RotationQuaternion, GXMVECTOR Translation);
+XMMATRIX XMMatrixReflect(FXMVECTOR ReflectionPlane);
+XMMATRIX XMMatrixShadow(FXMVECTOR ShadowPlane, FXMVECTOR LightPosition);
+
+XMMATRIX XMMatrixLookAtLH(FXMVECTOR EyePosition, FXMVECTOR FocusPosition, FXMVECTOR UpDirection);
+XMMATRIX XMMatrixLookAtRH(FXMVECTOR EyePosition, FXMVECTOR FocusPosition, FXMVECTOR UpDirection);
+XMMATRIX XMMatrixLookToLH(FXMVECTOR EyePosition, FXMVECTOR EyeDirection, FXMVECTOR UpDirection);
+XMMATRIX XMMatrixLookToRH(FXMVECTOR EyePosition, FXMVECTOR EyeDirection, FXMVECTOR UpDirection);
+XMMATRIX XMMatrixPerspectiveLH(float ViewWidth, float ViewHeight, float NearZ, float FarZ);
+XMMATRIX XMMatrixPerspectiveRH(float ViewWidth, float ViewHeight, float NearZ, float FarZ);
+XMMATRIX XMMatrixPerspectiveFovLH(float FovAngleY, float AspectHByW, float NearZ, float FarZ);
+XMMATRIX XMMatrixPerspectiveFovRH(float FovAngleY, float AspectHByW, float NearZ, float FarZ);
+XMMATRIX XMMatrixPerspectiveOffCenterLH(float ViewLeft, float ViewRight, float ViewBottom, float ViewTop, float NearZ, float FarZ);
+XMMATRIX XMMatrixPerspectiveOffCenterRH(float ViewLeft, float ViewRight, float ViewBottom, float ViewTop, float NearZ, float FarZ);
+XMMATRIX XMMatrixOrthographicLH(float ViewWidth, float ViewHeight, float NearZ, float FarZ);
+XMMATRIX XMMatrixOrthographicRH(float ViewWidth, float ViewHeight, float NearZ, float FarZ);
+XMMATRIX XMMatrixOrthographicOffCenterLH(float ViewLeft, float ViewRight, float ViewBottom, float ViewTop, float NearZ, float FarZ);
+XMMATRIX XMMatrixOrthographicOffCenterRH(float ViewLeft, float ViewRight, float ViewBottom, float ViewTop, float NearZ, float FarZ);
+
+
+/****************************************************************************
+ *
+ * Quaternion operations
+ *
+ ****************************************************************************/
+
+bool XMQuaternionEqual(FXMVECTOR Q1, FXMVECTOR Q2);
+bool XMQuaternionNotEqual(FXMVECTOR Q1, FXMVECTOR Q2);
+
+bool XMQuaternionIsNaN(FXMVECTOR Q);
+bool XMQuaternionIsInfinite(FXMVECTOR Q);
+bool XMQuaternionIsIdentity(FXMVECTOR Q);
+
+XMVECTOR XMQuaternionDot(FXMVECTOR Q1, FXMVECTOR Q2);
+XMVECTOR XMQuaternionMultiply(FXMVECTOR Q1, FXMVECTOR Q2);
+XMVECTOR XMQuaternionLengthSq(FXMVECTOR Q);
+XMVECTOR XMQuaternionReciprocalLength(FXMVECTOR Q);
+XMVECTOR XMQuaternionLength(FXMVECTOR Q);
+XMVECTOR XMQuaternionNormalizeEst(FXMVECTOR Q);
+XMVECTOR XMQuaternionNormalize(FXMVECTOR Q);
+XMVECTOR XMQuaternionConjugate(FXMVECTOR Q);
+XMVECTOR XMQuaternionInverse(FXMVECTOR Q);
+XMVECTOR XMQuaternionLn(FXMVECTOR Q);
+XMVECTOR XMQuaternionExp(FXMVECTOR Q);
+XMVECTOR XMQuaternionSlerp(FXMVECTOR Q0, FXMVECTOR Q1, float t);
+XMVECTOR XMQuaternionSlerpV(FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR T);
+XMVECTOR XMQuaternionSquad(FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR Q2, GXMVECTOR Q3, float t);
+XMVECTOR XMQuaternionSquadV(FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR Q2, GXMVECTOR Q3, CXMVECTOR T);
+void XMQuaternionSquadSetup(_Out_ XMVECTOR* pA, _Out_ XMVECTOR* pB, _Out_ XMVECTOR* pC, _In_ FXMVECTOR Q0, _In_ FXMVECTOR Q1, _In_ FXMVECTOR Q2, _In_ GXMVECTOR Q3);
+XMVECTOR XMQuaternionBaryCentric(FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR Q2, float f, float g);
+XMVECTOR XMQuaternionBaryCentricV(FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR Q2, GXMVECTOR F, CXMVECTOR G);
+
+XMVECTOR XMQuaternionIdentity();
+XMVECTOR XMQuaternionRotationRollPitchYaw(float Pitch, float Yaw, float Roll);
+XMVECTOR XMQuaternionRotationRollPitchYawFromVector(FXMVECTOR Angles);
+XMVECTOR XMQuaternionRotationNormal(FXMVECTOR NormalAxis, float Angle);
+XMVECTOR XMQuaternionRotationAxis(FXMVECTOR Axis, float Angle);
+XMVECTOR XMQuaternionRotationMatrix(CXMMATRIX M);
+
+void XMQuaternionToAxisAngle(_Out_ XMVECTOR* pAxis, _Out_ float* pAngle, _In_ FXMVECTOR Q);
+
+/****************************************************************************
+ *
+ * Plane operations
+ *
+ ****************************************************************************/
+
+bool XMPlaneEqual(FXMVECTOR P1, FXMVECTOR P2);
+bool XMPlaneNearEqual(FXMVECTOR P1, FXMVECTOR P2, FXMVECTOR Epsilon);
+bool XMPlaneNotEqual(FXMVECTOR P1, FXMVECTOR P2);
+
+bool XMPlaneIsNaN(FXMVECTOR P);
+bool XMPlaneIsInfinite(FXMVECTOR P);
+
+XMVECTOR XMPlaneDot(FXMVECTOR P, FXMVECTOR V);
+XMVECTOR XMPlaneDotCoord(FXMVECTOR P, FXMVECTOR V);
+XMVECTOR XMPlaneDotNormal(FXMVECTOR P, FXMVECTOR V);
+XMVECTOR XMPlaneNormalizeEst(FXMVECTOR P);
+XMVECTOR XMPlaneNormalize(FXMVECTOR P);
+XMVECTOR XMPlaneIntersectLine(FXMVECTOR P, FXMVECTOR LinePoint1, FXMVECTOR LinePoint2);
+void XMPlaneIntersectPlane(_Out_ XMVECTOR* pLinePoint1, _Out_ XMVECTOR* pLinePoint2, _In_ FXMVECTOR P1, _In_ FXMVECTOR P2);
+XMVECTOR XMPlaneTransform(FXMVECTOR P, CXMMATRIX M);
+XMFLOAT4* XMPlaneTransformStream(_Out_writes_bytes_(sizeof(XMFLOAT4)+OutputStride*(PlaneCount-1)) XMFLOAT4* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(sizeof(XMFLOAT4)+InputStride*(PlaneCount-1)) const XMFLOAT4* pInputStream,
+ _In_ size_t InputStride, _In_ size_t PlaneCount, _In_ CXMMATRIX M);
+
+XMVECTOR XMPlaneFromPointNormal(FXMVECTOR Point, FXMVECTOR Normal);
+XMVECTOR XMPlaneFromPoints(FXMVECTOR Point1, FXMVECTOR Point2, FXMVECTOR Point3);
+
+/****************************************************************************
+ *
+ * Color operations
+ *
+ ****************************************************************************/
+
+bool XMColorEqual(FXMVECTOR C1, FXMVECTOR C2);
+bool XMColorNotEqual(FXMVECTOR C1, FXMVECTOR C2);
+bool XMColorGreater(FXMVECTOR C1, FXMVECTOR C2);
+bool XMColorGreaterOrEqual(FXMVECTOR C1, FXMVECTOR C2);
+bool XMColorLess(FXMVECTOR C1, FXMVECTOR C2);
+bool XMColorLessOrEqual(FXMVECTOR C1, FXMVECTOR C2);
+
+bool XMColorIsNaN(FXMVECTOR C);
+bool XMColorIsInfinite(FXMVECTOR C);
+
+XMVECTOR XMColorNegative(FXMVECTOR C);
+XMVECTOR XMColorModulate(FXMVECTOR C1, FXMVECTOR C2);
+XMVECTOR XMColorAdjustSaturation(FXMVECTOR C, float Saturation);
+XMVECTOR XMColorAdjustContrast(FXMVECTOR C, float Contrast);
+
+XMVECTOR XMColorRGBToHSL( FXMVECTOR rgb );
+XMVECTOR XMColorHSLToRGB( FXMVECTOR hsl );
+
+XMVECTOR XMColorRGBToHSV( FXMVECTOR rgb );
+XMVECTOR XMColorHSVToRGB( FXMVECTOR hsv );
+
+XMVECTOR XMColorRGBToYUV( FXMVECTOR rgb );
+XMVECTOR XMColorYUVToRGB( FXMVECTOR yuv );
+
+XMVECTOR XMColorRGBToYUV_HD( FXMVECTOR rgb );
+XMVECTOR XMColorYUVToRGB_HD( FXMVECTOR yuv );
+
+XMVECTOR XMColorRGBToXYZ( FXMVECTOR rgb );
+XMVECTOR XMColorXYZToRGB( FXMVECTOR xyz );
+
+XMVECTOR XMColorXYZToSRGB( FXMVECTOR xyz );
+XMVECTOR XMColorSRGBToXYZ( FXMVECTOR srgb );
+
+/****************************************************************************
+ *
+ * Miscellaneous operations
+ *
+ ****************************************************************************/
+
+bool XMVerifyCPUSupport();
+
+XMVECTOR XMFresnelTerm(FXMVECTOR CosIncidentAngle, FXMVECTOR RefractionIndex);
+
+bool XMScalarNearEqual(float S1, float S2, float Epsilon);
+float XMScalarModAngle(float Value);
+
+float XMScalarSin(float Value);
+float XMScalarSinEst(float Value);
+
+float XMScalarCos(float Value);
+float XMScalarCosEst(float Value);
+
+void XMScalarSinCos(_Out_ float* pSin, _Out_ float* pCos, float Value);
+void XMScalarSinCosEst(_Out_ float* pSin, _Out_ float* pCos, float Value);
+
+float XMScalarASin(float Value);
+float XMScalarASinEst(float Value);
+
+float XMScalarACos(float Value);
+float XMScalarACosEst(float Value);
+
+/****************************************************************************
+ *
+ * Templates
+ *
+ ****************************************************************************/
+
+#if defined(__XNAMATH_H__) && defined(XMMin)
+#undef XMMin
+#undef XMMax
+#endif
+
+template<class T> inline T XMMin(T a, T b) { return (a < b) ? a : b; }
+template<class T> inline T XMMax(T a, T b) { return (a > b) ? a : b; }
+
+//------------------------------------------------------------------------------
+
+#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+
+#define XM_PERMUTE_PS( v, c ) _mm_shuffle_ps( v, v, c )
+
+// PermuteHelper internal template (SSE only)
+namespace Internal
+{
+ // Slow path fallback for permutes that do not map to a single SSE shuffle opcode.
+ template<uint32_t Shuffle, bool WhichX, bool WhichY, bool WhichZ, bool WhichW> struct PermuteHelper
+ {
+ static XMVECTOR Permute(FXMVECTOR v1, FXMVECTOR v2)
+ {
+ static const XMVECTORU32 selectMask =
+ {
+ WhichX ? 0xFFFFFFFF : 0,
+ WhichY ? 0xFFFFFFFF : 0,
+ WhichZ ? 0xFFFFFFFF : 0,
+ WhichW ? 0xFFFFFFFF : 0,
+ };
+
+ XMVECTOR shuffled1 = XM_PERMUTE_PS(v1, Shuffle);
+ XMVECTOR shuffled2 = XM_PERMUTE_PS(v2, Shuffle);
+
+ XMVECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1);
+ XMVECTOR masked2 = _mm_and_ps(selectMask, shuffled2);
+
+ return _mm_or_ps(masked1, masked2);
+ }
+ };
+
+ // Fast path for permutes that only read from the first vector.
+ template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, false, false>
+ {
+ static XMVECTOR Permute(FXMVECTOR v1, FXMVECTOR v2) { (v2); return XM_PERMUTE_PS(v1, Shuffle); }
+ };
+
+ // Fast path for permutes that only read from the second vector.
+ template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, true, true>
+ {
+ static XMVECTOR Permute(FXMVECTOR v1, FXMVECTOR v2){ (v1); return XM_PERMUTE_PS(v2, Shuffle); }
+ };
+
+ // Fast path for permutes that read XY from the first vector, ZW from the second.
+ template<uint32_t Shuffle> struct PermuteHelper<Shuffle, false, false, true, true>
+ {
+ static XMVECTOR Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v1, v2, Shuffle); }
+ };
+
+ // Fast path for permutes that read XY from the second vector, ZW from the first.
+ template<uint32_t Shuffle> struct PermuteHelper<Shuffle, true, true, false, false>
+ {
+ static XMVECTOR Permute(FXMVECTOR v1, FXMVECTOR v2) { return _mm_shuffle_ps(v2, v1, Shuffle); }
+ };
+};
+
+#endif // _XM_SSE_INTRINSICS_ && !_XM_NO_INTRINSICS_
+
+// General permute template
+template<uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t PermuteW>
+ inline XMVECTOR XMVectorPermute(FXMVECTOR V1, FXMVECTOR V2)
+{
+ static_assert(PermuteX <= 7, "PermuteX template parameter out of range");
+ static_assert(PermuteY <= 7, "PermuteY template parameter out of range");
+ static_assert(PermuteZ <= 7, "PermuteZ template parameter out of range");
+ static_assert(PermuteW <= 7, "PermuteW template parameter out of range");
+
+#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ const uint32_t Shuffle = _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3);
+
+ const bool WhichX = PermuteX > 3;
+ const bool WhichY = PermuteY > 3;
+ const bool WhichZ = PermuteZ > 3;
+ const bool WhichW = PermuteW > 3;
+
+ return Internal::PermuteHelper<Shuffle, WhichX, WhichY, WhichZ, WhichW>::Permute(V1, V2);
+#else
+
+ return XMVectorPermute( V1, V2, PermuteX, PermuteY, PermuteZ, PermuteW );
+
+#endif
+}
+
+// Special-case permute templates
+template<> inline XMVECTOR XMVectorPermute<0,1,2,3>(FXMVECTOR V1, FXMVECTOR V2) { (V2); return V1; }
+template<> inline XMVECTOR XMVectorPermute<4,5,6,7>(FXMVECTOR V1, FXMVECTOR V2) { (V1); return V2; }
+
+#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+
+// If the indices are all in the range 0-3 or 4-7, then use XMVectorSwizzle instead
+// The mirror cases are not spelled out here as the programmer can always swap the arguments
+// (i.e. prefer permutes where the X element comes from the V1 vector instead of the V2 vector)
+
+template<> inline XMVECTOR XMVectorPermute<0,1,4,5>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vget_low_f32(V1), vget_low_f32(V2) ); }
+template<> inline XMVECTOR XMVectorPermute<1,0,4,5>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vrev64_f32( vget_low_f32(V1) ), vget_low_f32(V2) ); }
+template<> inline XMVECTOR XMVectorPermute<0,1,5,4>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vget_low_f32(V1), vrev64_f32( vget_low_f32(V2) ) ); }
+template<> inline XMVECTOR XMVectorPermute<1,0,5,4>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vrev64_f32( vget_low_f32(V1) ), vrev64_f32( vget_low_f32(V2) ) ); }
+
+template<> inline XMVECTOR XMVectorPermute<2,3,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vget_high_f32(V1), vget_high_f32(V2) ); }
+template<> inline XMVECTOR XMVectorPermute<3,2,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vrev64_f32( vget_high_f32(V1) ), vget_high_f32(V2) ); }
+template<> inline XMVECTOR XMVectorPermute<2,3,7,6>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vget_high_f32(V1), vrev64_f32( vget_high_f32(V2) ) ); }
+template<> inline XMVECTOR XMVectorPermute<3,2,7,6>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vrev64_f32( vget_high_f32(V1) ), vrev64_f32( vget_high_f32(V2) ) ); }
+
+template<> inline XMVECTOR XMVectorPermute<0,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vget_low_f32(V1), vget_high_f32(V2) ); }
+template<> inline XMVECTOR XMVectorPermute<1,0,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vrev64_f32( vget_low_f32(V1) ), vget_high_f32(V2) ); }
+template<> inline XMVECTOR XMVectorPermute<0,1,7,6>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vget_low_f32(V1), vrev64_f32( vget_high_f32(V2) ) ); }
+template<> inline XMVECTOR XMVectorPermute<1,0,7,6>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vrev64_f32( vget_low_f32(V1) ), vrev64_f32( vget_high_f32(V2) ) ); }
+
+template<> inline XMVECTOR XMVectorPermute<3,2,4,5>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vrev64_f32( vget_high_f32(V1) ), vget_low_f32(V2) ); }
+template<> inline XMVECTOR XMVectorPermute<2,3,5,4>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vget_high_f32(V1), vrev64_f32( vget_low_f32(V2) ) ); }
+template<> inline XMVECTOR XMVectorPermute<3,2,5,4>(FXMVECTOR V1, FXMVECTOR V2) { return vcombine_f32( vrev64_f32( vget_high_f32(V1) ), vrev64_f32( vget_low_f32(V2) ) ); }
+
+template<> inline XMVECTOR XMVectorPermute<0,4,2,6>(FXMVECTOR V1, FXMVECTOR V2) { return vtrnq_f32(V1,V2).val[0]; }
+template<> inline XMVECTOR XMVectorPermute<1,5,3,7>(FXMVECTOR V1, FXMVECTOR V2) { return vtrnq_f32(V1,V2).val[1]; }
+
+template<> inline XMVECTOR XMVectorPermute<0,4,1,5>(FXMVECTOR V1, FXMVECTOR V2) { return vzipq_f32(V1,V2).val[0]; }
+template<> inline XMVECTOR XMVectorPermute<2,6,3,7>(FXMVECTOR V1, FXMVECTOR V2) { return vzipq_f32(V1,V2).val[1]; }
+
+template<> inline XMVECTOR XMVectorPermute<0,2,4,6>(FXMVECTOR V1, FXMVECTOR V2) { return vuzpq_f32(V1,V2).val[0]; }
+template<> inline XMVECTOR XMVectorPermute<1,3,5,7>(FXMVECTOR V1, FXMVECTOR V2) { return vuzpq_f32(V1,V2).val[1]; }
+
+template<> inline XMVECTOR XMVectorPermute<1,2,3,4>(FXMVECTOR V1, FXMVECTOR V2) { return vextq_f32(V1, V2, 1); }
+template<> inline XMVECTOR XMVectorPermute<2,3,4,5>(FXMVECTOR V1, FXMVECTOR V2) { return vextq_f32(V1, V2, 2); }
+template<> inline XMVECTOR XMVectorPermute<3,4,5,6>(FXMVECTOR V1, FXMVECTOR V2) { return vextq_f32(V1, V2, 3); }
+
+#endif // _XM_ARM_NEON_INTRINSICS_ && !_XM_NO_INTRINSICS_
+
+//------------------------------------------------------------------------------
+
+// General swizzle template
+template<uint32_t SwizzleX, uint32_t SwizzleY, uint32_t SwizzleZ, uint32_t SwizzleW>
+ inline XMVECTOR XMVectorSwizzle(FXMVECTOR V)
+{
+ static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range");
+ static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range");
+ static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range");
+ static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range");
+
+#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ return XM_PERMUTE_PS( V, _MM_SHUFFLE( SwizzleW, SwizzleZ, SwizzleY, SwizzleX ) );
+#elif defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ return __vpermwi(V, ((SwizzleX & 3) << 6) | ((SwizzleY & 3) << 4) | ((SwizzleZ & 3) << 2) | (SwizzleW & 3) );
+#else
+
+ return XMVectorSwizzle( V, SwizzleX, SwizzleY, SwizzleZ, SwizzleW );
+
+#endif
+}
+
+// Specialized swizzles
+template<> inline XMVECTOR XMVectorSwizzle<0,1,2,3>(FXMVECTOR V) { return V; }
+
+
+#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+
+template<> inline XMVECTOR XMVectorSwizzle<0,0,0,0>(FXMVECTOR V) { return vdupq_lane_f32( vget_low_f32(V), 0); }
+template<> inline XMVECTOR XMVectorSwizzle<1,1,1,1>(FXMVECTOR V) { return vdupq_lane_f32( vget_low_f32(V), 1); }
+template<> inline XMVECTOR XMVectorSwizzle<2,2,2,2>(FXMVECTOR V) { return vdupq_lane_f32( vget_high_f32(V), 0); }
+template<> inline XMVECTOR XMVectorSwizzle<3,3,3,3>(FXMVECTOR V) { return vdupq_lane_f32( vget_high_f32(V), 1); }
+
+template<> inline XMVECTOR XMVectorSwizzle<1,0,3,2>(FXMVECTOR V) { return vrev64q_f32(V); }
+
+template<> inline XMVECTOR XMVectorSwizzle<0,1,0,1>(FXMVECTOR V) { __n64 vt = vget_low_f32(V); return vcombine_f32( vt, vt ); }
+template<> inline XMVECTOR XMVectorSwizzle<2,3,2,3>(FXMVECTOR V) { __n64 vt = vget_high_f32(V); return vcombine_f32( vt, vt ); }
+template<> inline XMVECTOR XMVectorSwizzle<1,0,1,0>(FXMVECTOR V) { __n64 vt = vrev64_f32( vget_low_f32(V) ); return vcombine_f32( vt, vt ); }
+template<> inline XMVECTOR XMVectorSwizzle<3,2,3,2>(FXMVECTOR V) { __n64 vt = vrev64_f32( vget_high_f32(V) ); return vcombine_f32( vt, vt ); }
+
+template<> inline XMVECTOR XMVectorSwizzle<0,1,3,2>(FXMVECTOR V) { return vcombine_f32( vget_low_f32(V), vrev64_f32( vget_high_f32(V) ) ); }
+template<> inline XMVECTOR XMVectorSwizzle<1,0,2,3>(FXMVECTOR V) { return vcombine_f32( vrev64_f32( vget_low_f32(V) ), vget_high_f32(V) ); }
+template<> inline XMVECTOR XMVectorSwizzle<2,3,1,0>(FXMVECTOR V) { return vcombine_f32( vget_high_f32(V), vrev64_f32( vget_low_f32(V) ) ); }
+template<> inline XMVECTOR XMVectorSwizzle<3,2,0,1>(FXMVECTOR V) { return vcombine_f32( vrev64_f32( vget_high_f32(V) ), vget_low_f32(V) ); }
+template<> inline XMVECTOR XMVectorSwizzle<3,2,1,0>(FXMVECTOR V) { return vcombine_f32( vrev64_f32( vget_high_f32(V) ), vrev64_f32( vget_low_f32(V) ) ); }
+
+template<> inline XMVECTOR XMVectorSwizzle<0,0,2,2>(FXMVECTOR V) { return vtrnq_f32(V,V).val[0]; }
+template<> inline XMVECTOR XMVectorSwizzle<1,1,3,3>(FXMVECTOR V) { return vtrnq_f32(V,V).val[1]; }
+
+template<> inline XMVECTOR XMVectorSwizzle<0,0,1,1>(FXMVECTOR V) { return vzipq_f32(V,V).val[0]; }
+template<> inline XMVECTOR XMVectorSwizzle<2,2,3,3>(FXMVECTOR V) { return vzipq_f32(V,V).val[1]; }
+
+template<> inline XMVECTOR XMVectorSwizzle<0,2,0,2>(FXMVECTOR V) { return vuzpq_f32(V,V).val[0]; }
+template<> inline XMVECTOR XMVectorSwizzle<1,3,1,3>(FXMVECTOR V) { return vuzpq_f32(V,V).val[1]; }
+
+template<> inline XMVECTOR XMVectorSwizzle<1,2,3,0>(FXMVECTOR V) { return vextq_f32(V, V, 1); }
+template<> inline XMVECTOR XMVectorSwizzle<2,3,0,1>(FXMVECTOR V) { return vextq_f32(V, V, 2); }
+template<> inline XMVECTOR XMVectorSwizzle<3,0,1,2>(FXMVECTOR V) { return vextq_f32(V, V, 3); }
+
+#endif // _XM_ARM_NEON_INTRINSICS_ && !_XM_NO_INTRINSICS_
+
+//------------------------------------------------------------------------------
+
+template<uint32_t Elements>
+ inline XMVECTOR XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2)
+{
+ static_assert( Elements < 4, "Elements template parameter out of range" );
+#if defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+#else
+ return XMVectorPermute<Elements, (Elements + 1), (Elements + 2), (Elements + 3)>(V1, V2);
+#endif
+}
+
+template<uint32_t Elements>
+ inline XMVECTOR XMVectorRotateLeft(FXMVECTOR V)
+{
+ static_assert( Elements < 4, "Elements template parameter out of range" );
+#if defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+#else
+ return XMVectorSwizzle<Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3>(V);
+#endif
+}
+
+template<uint32_t Elements>
+ inline XMVECTOR XMVectorRotateRight(FXMVECTOR V)
+{
+ static_assert( Elements < 4, "Elements template parameter out of range" );
+#if defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+#else
+ return XMVectorSwizzle<(4 - Elements) & 3, (5 - Elements) & 3, (6 - Elements) & 3, (7 - Elements) & 3>(V);
+#endif
+}
+
+template<uint32_t VSLeftRotateElements, uint32_t Select0, uint32_t Select1, uint32_t Select2, uint32_t Select3>
+ inline XMVECTOR XMVectorInsert(FXMVECTOR VD, FXMVECTOR VS)
+{
+#if defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+#else
+ XMVECTOR Control = XMVectorSelectControl(Select0&1, Select1&1, Select2&1, Select3&1);
+ return XMVectorSelect( VD, XMVectorRotateLeft<VSLeftRotateElements>(VS), Control );
+#endif
+}
+
+/****************************************************************************
+ *
+ * Globals
+ *
+ ****************************************************************************/
+
+// The purpose of the following global constants is to prevent redundant
+// reloading of the constants when they are referenced by more than one
+// separate inline math routine called within the same function. Declaring
+// a constant locally within a routine is sufficient to prevent redundant
+// reloads of that constant when that single routine is called multiple
+// times in a function, but if the constant is used (and declared) in a
+// separate math routine it would be reloaded.
+
+#ifndef XMGLOBALCONST
+#define XMGLOBALCONST static const // extern const // MGH - __declspec(selectany)
+#endif
+
+XMGLOBALCONST XMVECTORF32 g_XMSinCoefficients0 = {-0.16666667f, +0.0083333310f, -0.00019840874f, +2.7525562e-06f};
+XMGLOBALCONST XMVECTORF32 g_XMSinCoefficients1 = {-2.3889859e-08f, -0.16665852f /*Est1*/, +0.0083139502f /*Est2*/, -0.00018524670f /*Est3*/};
+XMGLOBALCONST XMVECTORF32 g_XMCosCoefficients0 = {-0.5f, +0.041666638f, -0.0013888378f, +2.4760495e-05f};
+XMGLOBALCONST XMVECTORF32 g_XMCosCoefficients1 = {-2.6051615e-07f, -0.49992746f /*Est1*/, +0.041493919f /*Est2*/, -0.0012712436f /*Est3*/};
+XMGLOBALCONST XMVECTORF32 g_XMTanCoefficients0 = {1.0f, 0.333333333f, 0.133333333f, 5.396825397e-2f};
+XMGLOBALCONST XMVECTORF32 g_XMTanCoefficients1 = {2.186948854e-2f, 8.863235530e-3f, 3.592128167e-3f, 1.455834485e-3f};
+XMGLOBALCONST XMVECTORF32 g_XMTanCoefficients2 = {5.900274264e-4f, 2.391290764e-4f, 9.691537707e-5f, 3.927832950e-5f};
+XMGLOBALCONST XMVECTORF32 g_XMArcCoefficients0 = {+1.5707963050f, -0.2145988016f, +0.0889789874f, -0.0501743046f};
+XMGLOBALCONST XMVECTORF32 g_XMArcCoefficients1 = {+0.0308918810f, -0.0170881256f, +0.0066700901f, -0.0012624911f};
+XMGLOBALCONST XMVECTORF32 g_XMATanCoefficients0 = {-0.3333314528f, +0.1999355085f, -0.1420889944f, +0.1065626393f};
+XMGLOBALCONST XMVECTORF32 g_XMATanCoefficients1 = {-0.0752896400f, +0.0429096138f, -0.0161657367f, +0.0028662257f};
+XMGLOBALCONST XMVECTORF32 g_XMATanEstCoefficients0 = {+0.999866f, +0.999866f, +0.999866f, +0.999866f};
+XMGLOBALCONST XMVECTORF32 g_XMATanEstCoefficients1 = {-0.3302995f, +0.180141f, -0.085133f, +0.0208351f};
+XMGLOBALCONST XMVECTORF32 g_XMTanEstCoefficients = {2.484f, -1.954923183e-1f, 2.467401101f, XM_1DIVPI};
+XMGLOBALCONST XMVECTORF32 g_XMArcEstCoefficients = {+1.5707288f,-0.2121144f,+0.0742610f,-0.0187293f};
+XMGLOBALCONST XMVECTORF32 g_XMPiConstants0 = {XM_PI, XM_2PI, XM_1DIVPI, XM_1DIV2PI};
+XMGLOBALCONST XMVECTORF32 g_XMIdentityR0 = {1.0f, 0.0f, 0.0f, 0.0f};
+XMGLOBALCONST XMVECTORF32 g_XMIdentityR1 = {0.0f, 1.0f, 0.0f, 0.0f};
+XMGLOBALCONST XMVECTORF32 g_XMIdentityR2 = {0.0f, 0.0f, 1.0f, 0.0f};
+XMGLOBALCONST XMVECTORF32 g_XMIdentityR3 = {0.0f, 0.0f, 0.0f, 1.0f};
+XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR0 = {-1.0f,0.0f, 0.0f, 0.0f};
+XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR1 = {0.0f,-1.0f, 0.0f, 0.0f};
+XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR2 = {0.0f, 0.0f,-1.0f, 0.0f};
+XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR3 = {0.0f, 0.0f, 0.0f,-1.0f};
+XMGLOBALCONST XMVECTORI32 g_XMNegativeZero = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
+XMGLOBALCONST XMVECTORI32 g_XMNegate3 = {0x80000000, 0x80000000, 0x80000000, 0x00000000};
+XMGLOBALCONST XMVECTORI32 g_XMMask3 = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000};
+XMGLOBALCONST XMVECTORI32 g_XMMaskX = {0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000};
+XMGLOBALCONST XMVECTORI32 g_XMMaskY = {0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000};
+XMGLOBALCONST XMVECTORI32 g_XMMaskZ = {0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000};
+XMGLOBALCONST XMVECTORI32 g_XMMaskW = {0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF};
+XMGLOBALCONST XMVECTORF32 g_XMOne = { 1.0f, 1.0f, 1.0f, 1.0f};
+XMGLOBALCONST XMVECTORF32 g_XMOne3 = { 1.0f, 1.0f, 1.0f, 0.0f};
+XMGLOBALCONST XMVECTORF32 g_XMZero = { 0.0f, 0.0f, 0.0f, 0.0f};
+XMGLOBALCONST XMVECTORF32 g_XMTwo = { 2.f, 2.f, 2.f, 2.f };
+XMGLOBALCONST XMVECTORF32 g_XMFour = { 4.f, 4.f, 4.f, 4.f };
+XMGLOBALCONST XMVECTORF32 g_XMSix = { 6.f, 6.f, 6.f, 6.f };
+XMGLOBALCONST XMVECTORF32 g_XMNegativeOne = {-1.0f,-1.0f,-1.0f,-1.0f};
+XMGLOBALCONST XMVECTORF32 g_XMOneHalf = { 0.5f, 0.5f, 0.5f, 0.5f};
+XMGLOBALCONST XMVECTORF32 g_XMNegativeOneHalf = {-0.5f,-0.5f,-0.5f,-0.5f};
+XMGLOBALCONST XMVECTORF32 g_XMNegativeTwoPi = {-XM_2PI, -XM_2PI, -XM_2PI, -XM_2PI};
+XMGLOBALCONST XMVECTORF32 g_XMNegativePi = {-XM_PI, -XM_PI, -XM_PI, -XM_PI};
+XMGLOBALCONST XMVECTORF32 g_XMHalfPi = {XM_PIDIV2, XM_PIDIV2, XM_PIDIV2, XM_PIDIV2};
+XMGLOBALCONST XMVECTORF32 g_XMPi = {XM_PI, XM_PI, XM_PI, XM_PI};
+XMGLOBALCONST XMVECTORF32 g_XMReciprocalPi = {XM_1DIVPI, XM_1DIVPI, XM_1DIVPI, XM_1DIVPI};
+XMGLOBALCONST XMVECTORF32 g_XMTwoPi = {XM_2PI, XM_2PI, XM_2PI, XM_2PI};
+XMGLOBALCONST XMVECTORF32 g_XMReciprocalTwoPi = {XM_1DIV2PI, XM_1DIV2PI, XM_1DIV2PI, XM_1DIV2PI};
+XMGLOBALCONST XMVECTORF32 g_XMEpsilon = {1.192092896e-7f, 1.192092896e-7f, 1.192092896e-7f, 1.192092896e-7f};
+XMGLOBALCONST XMVECTORI32 g_XMInfinity = {0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};
+XMGLOBALCONST XMVECTORI32 g_XMQNaN = {0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000};
+XMGLOBALCONST XMVECTORI32 g_XMQNaNTest = {0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF};
+XMGLOBALCONST XMVECTORI32 g_XMAbsMask = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
+XMGLOBALCONST XMVECTORI32 g_XMFltMin = {0x00800000, 0x00800000, 0x00800000, 0x00800000};
+XMGLOBALCONST XMVECTORI32 g_XMFltMax = {0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF};
+XMGLOBALCONST XMVECTORI32 g_XMNegOneMask = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
+XMGLOBALCONST XMVECTORI32 g_XMMaskA8R8G8B8 = {0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000};
+XMGLOBALCONST XMVECTORI32 g_XMFlipA8R8G8B8 = {0x00000000, 0x00000000, 0x00000000, 0x80000000};
+XMGLOBALCONST XMVECTORF32 g_XMFixAA8R8G8B8 = {0.0f,0.0f,0.0f,(float)(0x80000000U)};
+XMGLOBALCONST XMVECTORF32 g_XMNormalizeA8R8G8B8 = {1.0f/(255.0f*(float)(0x10000)),1.0f/(255.0f*(float)(0x100)),1.0f/255.0f,1.0f/(255.0f*(float)(0x1000000))};
+XMGLOBALCONST XMVECTORI32 g_XMMaskA2B10G10R10 = {0x000003FF, 0x000FFC00, 0x3FF00000, 0xC0000000};
+XMGLOBALCONST XMVECTORI32 g_XMFlipA2B10G10R10 = {0x00000200, 0x00080000, 0x20000000, 0x80000000};
+XMGLOBALCONST XMVECTORF32 g_XMFixAA2B10G10R10 = {-512.0f,-512.0f*(float)(0x400),-512.0f*(float)(0x100000),(float)(0x80000000U)};
+XMGLOBALCONST XMVECTORF32 g_XMNormalizeA2B10G10R10 = {1.0f/511.0f,1.0f/(511.0f*(float)(0x400)),1.0f/(511.0f*(float)(0x100000)),1.0f/(3.0f*(float)(0x40000000))};
+XMGLOBALCONST XMVECTORI32 g_XMMaskX16Y16 = {0x0000FFFF, 0xFFFF0000, 0x00000000, 0x00000000};
+XMGLOBALCONST XMVECTORI32 g_XMFlipX16Y16 = {0x00008000, 0x00000000, 0x00000000, 0x00000000};
+XMGLOBALCONST XMVECTORF32 g_XMFixX16Y16 = {-32768.0f,0.0f,0.0f,0.0f};
+XMGLOBALCONST XMVECTORF32 g_XMNormalizeX16Y16 = {1.0f/32767.0f,1.0f/(32767.0f*65536.0f),0.0f,0.0f};
+XMGLOBALCONST XMVECTORI32 g_XMMaskX16Y16Z16W16 = {0x0000FFFF, 0x0000FFFF, 0xFFFF0000, 0xFFFF0000};
+XMGLOBALCONST XMVECTORI32 g_XMFlipX16Y16Z16W16 = {0x00008000, 0x00008000, 0x00000000, 0x00000000};
+XMGLOBALCONST XMVECTORF32 g_XMFixX16Y16Z16W16 = {-32768.0f,-32768.0f,0.0f,0.0f};
+XMGLOBALCONST XMVECTORF32 g_XMNormalizeX16Y16Z16W16 = {1.0f/32767.0f,1.0f/32767.0f,1.0f/(32767.0f*65536.0f),1.0f/(32767.0f*65536.0f)};
+XMGLOBALCONST XMVECTORF32 g_XMNoFraction = {8388608.0f,8388608.0f,8388608.0f,8388608.0f};
+XMGLOBALCONST XMVECTORI32 g_XMMaskByte = {0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF};
+XMGLOBALCONST XMVECTORF32 g_XMNegateX = {-1.0f, 1.0f, 1.0f, 1.0f};
+XMGLOBALCONST XMVECTORF32 g_XMNegateY = { 1.0f,-1.0f, 1.0f, 1.0f};
+XMGLOBALCONST XMVECTORF32 g_XMNegateZ = { 1.0f, 1.0f,-1.0f, 1.0f};
+XMGLOBALCONST XMVECTORF32 g_XMNegateW = { 1.0f, 1.0f, 1.0f,-1.0f};
+XMGLOBALCONST XMVECTORI32 g_XMSelect0101 = {XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1};
+XMGLOBALCONST XMVECTORI32 g_XMSelect1010 = {XM_SELECT_1, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0};
+XMGLOBALCONST XMVECTORI32 g_XMOneHalfMinusEpsilon = { 0x3EFFFFFD, 0x3EFFFFFD, 0x3EFFFFFD, 0x3EFFFFFD};
+XMGLOBALCONST XMVECTORI32 g_XMSelect1000 = {XM_SELECT_1, XM_SELECT_0, XM_SELECT_0, XM_SELECT_0};
+XMGLOBALCONST XMVECTORI32 g_XMSelect1100 = {XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0};
+XMGLOBALCONST XMVECTORI32 g_XMSelect1110 = {XM_SELECT_1, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0};
+XMGLOBALCONST XMVECTORI32 g_XMSelect1011 = { XM_SELECT_1, XM_SELECT_0, XM_SELECT_1, XM_SELECT_1 };
+XMGLOBALCONST XMVECTORF32 g_XMFixupY16 = {1.0f,1.0f/65536.0f,0.0f,0.0f};
+XMGLOBALCONST XMVECTORF32 g_XMFixupY16W16 = {1.0f,1.0f,1.0f/65536.0f,1.0f/65536.0f};
+XMGLOBALCONST XMVECTORI32 g_XMFlipY = {0,0x80000000,0,0};
+XMGLOBALCONST XMVECTORI32 g_XMFlipZ = {0,0,0x80000000,0};
+XMGLOBALCONST XMVECTORI32 g_XMFlipW = {0,0,0,0x80000000};
+XMGLOBALCONST XMVECTORI32 g_XMFlipYZ = {0,0x80000000,0x80000000,0};
+XMGLOBALCONST XMVECTORI32 g_XMFlipZW = {0,0,0x80000000,0x80000000};
+XMGLOBALCONST XMVECTORI32 g_XMFlipYW = {0,0x80000000,0,0x80000000};
+XMGLOBALCONST XMVECTORI32 g_XMMaskDec4 = {0x3FF,0x3FF<<10,0x3FF<<20,0x3<<30};
+XMGLOBALCONST XMVECTORI32 g_XMXorDec4 = {0x200,0x200<<10,0x200<<20,0};
+XMGLOBALCONST XMVECTORF32 g_XMAddUDec4 = {0,0,0,32768.0f*65536.0f};
+XMGLOBALCONST XMVECTORF32 g_XMAddDec4 = {-512.0f,-512.0f*1024.0f,-512.0f*1024.0f*1024.0f,0};
+XMGLOBALCONST XMVECTORF32 g_XMMulDec4 = {1.0f,1.0f/1024.0f,1.0f/(1024.0f*1024.0f),1.0f/(1024.0f*1024.0f*1024.0f)};
+XMGLOBALCONST XMVECTORI32 g_XMMaskByte4 = {0xFF,0xFF00,0xFF0000,0xFF000000};
+XMGLOBALCONST XMVECTORI32 g_XMXorByte4 = {0x80,0x8000,0x800000,0x00000000};
+XMGLOBALCONST XMVECTORF32 g_XMAddByte4 = {-128.0f,-128.0f*256.0f,-128.0f*65536.0f,0};
+XMGLOBALCONST XMVECTORF32 g_XMFixUnsigned = {32768.0f*65536.0f,32768.0f*65536.0f,32768.0f*65536.0f,32768.0f*65536.0f};
+XMGLOBALCONST XMVECTORF32 g_XMMaxInt = {65536.0f*32768.0f-128.0f,65536.0f*32768.0f-128.0f,65536.0f*32768.0f-128.0f,65536.0f*32768.0f-128.0f};
+XMGLOBALCONST XMVECTORF32 g_XMMaxUInt = {65536.0f*65536.0f-256.0f,65536.0f*65536.0f-256.0f,65536.0f*65536.0f-256.0f,65536.0f*65536.0f-256.0f};
+XMGLOBALCONST XMVECTORF32 g_XMUnsignedFix = {32768.0f*65536.0f,32768.0f*65536.0f,32768.0f*65536.0f,32768.0f*65536.0f};
+XMGLOBALCONST XMVECTORF32 g_XMsrgbScale = { 12.92f, 12.92f, 12.92f, 1.0f };
+XMGLOBALCONST XMVECTORF32 g_XMsrgbA = { 0.055f, 0.055f, 0.055f, 0.0f };
+XMGLOBALCONST XMVECTORF32 g_XMsrgbA1 = { 1.055f, 1.055f, 1.055f, 1.0f };
+
+/****************************************************************************
+ *
+ * Implementation
+ *
+ ****************************************************************************/
+
+#pragma warning(push)
+#pragma warning(disable:4068 4214 4204 4365 4616 4640 6001)
+
+#pragma prefast(push)
+#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes")
+
+//------------------------------------------------------------------------------
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+inline XMVECTOR XMVectorSetBinaryConstant(uint32_t C0, uint32_t C1, uint32_t C2, uint32_t C3)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTORU32 vResult;
+ vResult.u[0] = (0-(C0&1)) & 0x3F800000;
+ vResult.u[1] = (0-(C1&1)) & 0x3F800000;
+ vResult.u[2] = (0-(C2&1)) & 0x3F800000;
+ vResult.u[3] = (0-(C3&1)) & 0x3F800000;
+ return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORU32 vResult;
+ vResult.u[0] = (0-(C0&1)) & 0x3F800000;
+ vResult.u[1] = (0-(C1&1)) & 0x3F800000;
+ vResult.u[2] = (0-(C2&1)) & 0x3F800000;
+ vResult.u[3] = (0-(C3&1)) & 0x3F800000;
+ return vResult.v;
+#else // XM_SSE_INTRINSICS_
+ static const XMVECTORU32 g_vMask1 = {1,1,1,1};
+ // Move the parms to a vector
+ __m128i vTemp = _mm_set_epi32(C3,C2,C1,C0);
+ // Mask off the low bits
+ vTemp = _mm_and_si128(vTemp,g_vMask1);
+ // 0xFFFFFFFF on true bits
+ vTemp = _mm_cmpeq_epi32(vTemp,g_vMask1);
+ // 0xFFFFFFFF -> 1.0f, 0x00000000 -> 0.0f
+ vTemp = _mm_and_si128(vTemp,g_XMOne);
+ return _mm_castsi128_ps(vTemp);
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorSplatConstant(int32_t IntConstant, uint32_t DivExponent)
+{
+ assert( IntConstant >= -16 && IntConstant <= 15 );
+ assert( DivExponent < 32 );
+#if defined(_XM_NO_INTRINSICS_)
+
+ using DirectX::XMConvertVectorIntToFloat;
+
+ XMVECTORI32 V = { IntConstant, IntConstant, IntConstant, IntConstant };
+ return XMConvertVectorIntToFloat( V.v, DivExponent);
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Splat the int
+ int32x4_t vScale = vdupq_n_s32(IntConstant);
+ // Convert to a float
+ XMVECTOR vResult = vcvtq_f32_s32(vScale);
+ // Convert DivExponent into 1.0f/(1<<DivExponent)
+ uint32_t uScale = 0x3F800000U - (DivExponent << 23);
+ // Splat the scalar value (It's really a float)
+ vScale = vdupq_n_s32(uScale);
+ // Multiply by the reciprocal (Perform a right shift by DivExponent)
+ vResult = vmulq_f32(vResult,reinterpret_cast<const float32x4_t *>(&vScale)[0]);
+ return vResult;
+#else // XM_SSE_INTRINSICS_
+ // Splat the int
+ __m128i vScale = _mm_set1_epi32(IntConstant);
+ // Convert to a float
+ XMVECTOR vResult = _mm_cvtepi32_ps(vScale);
+ // Convert DivExponent into 1.0f/(1<<DivExponent)
+ uint32_t uScale = 0x3F800000U - (DivExponent << 23);
+ // Splat the scalar value (It's really a float)
+ vScale = _mm_set1_epi32(uScale);
+ // Multiply by the reciprocal (Perform a right shift by DivExponent)
+ vResult = _mm_mul_ps(vResult,_mm_castsi128_ps(vScale));
+ return vResult;
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorSplatConstantInt(int32_t IntConstant)
+{
+ assert( IntConstant >= -16 && IntConstant <= 15 );
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTORI32 V = { IntConstant, IntConstant, IntConstant, IntConstant };
+ return V.v;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ int32x4_t V = vdupq_n_s32( IntConstant );
+ return reinterpret_cast<float32x4_t *>(&V)[0];
+#else // XM_SSE_INTRINSICS_
+ __m128i V = _mm_set1_epi32( IntConstant );
+ return reinterpret_cast<__m128 *>(&V)[0];
+#endif
+}
+
+// Implemented for VMX128 intrinsics as #defines aboves
+#endif // _XM_NO_INTRINSICS_ || _XM_SSE_INTRINSICS_ || _XM_ARM_NEON_INTRINSICS_
+
+#include "DirectXMathConvert.inl"
+#include "DirectXMathVector.inl"
+#include "DirectXMathMatrix.inl"
+#include "DirectXMathMisc.inl"
+
+
+#pragma prefast(pop)
+#pragma warning(pop)
+
+}; // namespace DirectX
+
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathConvert.inl b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathConvert.inl
new file mode 100644
index 00000000..c8e39352
--- /dev/null
+++ b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathConvert.inl
@@ -0,0 +1,1962 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathConvert.inl -- SIMD C++ Math library
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//-------------------------------------------------------------------------------------
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+/****************************************************************************
+ *
+ * Data conversion
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+// For VMX128, these routines are all defines in the main header
+
+#pragma warning(push)
+#pragma warning(disable:4701) // Prevent warnings about 'Result' potentially being used without having been initialized
+
+inline XMVECTOR XMConvertVectorIntToFloat
+(
+ FXMVECTOR VInt,
+ uint32_t DivExponent
+)
+{
+ assert(DivExponent<32);
+#if defined(_XM_NO_INTRINSICS_)
+ float fScale = 1.0f / (float)(1U << DivExponent);
+ uint32_t ElementIndex = 0;
+ XMVECTOR Result;
+ do {
+ int32_t iTemp = (int32_t)VInt.vector4_u32[ElementIndex];
+ Result.vector4_f32[ElementIndex] = ((float)iTemp) * fScale;
+ } while (++ElementIndex<4);
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcvtq_f32_s32( VInt );
+ uint32_t uScale = 0x3F800000U - (DivExponent << 23);
+ __n128 vScale = vdupq_n_u32( uScale );
+ return vmulq_f32( vResult, vScale );
+#else // _XM_SSE_INTRINSICS_
+ // Convert to floats
+ XMVECTOR vResult = _mm_cvtepi32_ps(_mm_castps_si128(VInt));
+ // Convert DivExponent into 1.0f/(1<<DivExponent)
+ uint32_t uScale = 0x3F800000U - (DivExponent << 23);
+ // Splat the scalar value
+ __m128i vScale = _mm_set1_epi32(uScale);
+ vResult = _mm_mul_ps(vResult,_mm_castsi128_ps(vScale));
+ return vResult;
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMConvertVectorFloatToInt
+(
+ FXMVECTOR VFloat,
+ uint32_t MulExponent
+)
+{
+ assert(MulExponent<32);
+#if defined(_XM_NO_INTRINSICS_)
+ // Get the scalar factor.
+ float fScale = (float)(1U << MulExponent);
+ uint32_t ElementIndex = 0;
+ XMVECTOR Result;
+ do {
+ int32_t iResult;
+ float fTemp = VFloat.vector4_f32[ElementIndex]*fScale;
+ if (fTemp <= -(65536.0f*32768.0f)) {
+ iResult = (-0x7FFFFFFF)-1;
+ } else if (fTemp > (65536.0f*32768.0f)-128.0f) {
+ iResult = 0x7FFFFFFF;
+ } else {
+ iResult = (int32_t)fTemp;
+ }
+ Result.vector4_u32[ElementIndex] = (uint32_t)iResult;
+ } while (++ElementIndex<4);
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vdupq_n_f32((float)(1U << MulExponent));
+ vResult = vmulq_f32(vResult,VFloat);
+ // In case of positive overflow, detect it
+ __n128 vOverflow = vcgtq_f32(vResult,g_XMMaxInt);
+ // Float to int conversion
+ __n128 vResulti = vcvtq_s32_f32(vResult);
+ // If there was positive overflow, set to 0x7FFFFFFF
+ vResult = vandq_u32(vOverflow,g_XMAbsMask);
+ vOverflow = vbicq_u32(vResulti,vOverflow);
+ vOverflow = vorrq_u32(vOverflow,vResult);
+ return vOverflow;
+#else // _XM_SSE_INTRINSICS_
+ XMVECTOR vResult = _mm_set_ps1((float)(1U << MulExponent));
+ vResult = _mm_mul_ps(vResult,VFloat);
+ // In case of positive overflow, detect it
+ XMVECTOR vOverflow = _mm_cmpgt_ps(vResult,g_XMMaxInt);
+ // Float to int conversion
+ __m128i vResulti = _mm_cvttps_epi32(vResult);
+ // If there was positive overflow, set to 0x7FFFFFFF
+ vResult = _mm_and_ps(vOverflow,g_XMAbsMask);
+ vOverflow = _mm_andnot_ps(vOverflow,_mm_castsi128_ps(vResulti));
+ vOverflow = _mm_or_ps(vOverflow,vResult);
+ return vOverflow;
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMConvertVectorUIntToFloat
+(
+ FXMVECTOR VUInt,
+ uint32_t DivExponent
+)
+{
+ assert(DivExponent<32);
+#if defined(_XM_NO_INTRINSICS_)
+ float fScale = 1.0f / (float)(1U << DivExponent);
+ uint32_t ElementIndex = 0;
+ XMVECTOR Result;
+ do {
+ Result.vector4_f32[ElementIndex] = (float)VUInt.vector4_u32[ElementIndex] * fScale;
+ } while (++ElementIndex<4);
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcvtq_f32_u32( VUInt );
+ uint32_t uScale = 0x3F800000U - (DivExponent << 23);
+ __n128 vScale = vdupq_n_u32( uScale );
+ return vmulq_f32( vResult, vScale );
+#else // _XM_SSE_INTRINSICS_
+ // For the values that are higher than 0x7FFFFFFF, a fixup is needed
+ // Determine which ones need the fix.
+ XMVECTOR vMask = _mm_and_ps(VUInt,g_XMNegativeZero);
+ // Force all values positive
+ XMVECTOR vResult = _mm_xor_ps(VUInt,vMask);
+ // Convert to floats
+ vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
+ // Convert 0x80000000 -> 0xFFFFFFFF
+ __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask),31);
+ // For only the ones that are too big, add the fixup
+ vMask = _mm_and_ps(_mm_castsi128_ps(iMask),g_XMFixUnsigned);
+ vResult = _mm_add_ps(vResult,vMask);
+ // Convert DivExponent into 1.0f/(1<<DivExponent)
+ uint32_t uScale = 0x3F800000U - (DivExponent << 23);
+ // Splat
+ iMask = _mm_set1_epi32(uScale);
+ vResult = _mm_mul_ps(vResult,_mm_castsi128_ps(iMask));
+ return vResult;
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMConvertVectorFloatToUInt
+(
+ FXMVECTOR VFloat,
+ uint32_t MulExponent
+)
+{
+ assert(MulExponent<32);
+#if defined(_XM_NO_INTRINSICS_)
+ // Get the scalar factor.
+ float fScale = (float)(1U << MulExponent);
+ uint32_t ElementIndex = 0;
+ XMVECTOR Result;
+ do {
+ uint32_t uResult;
+ float fTemp = VFloat.vector4_f32[ElementIndex]*fScale;
+ if (fTemp <= 0.0f) {
+ uResult = 0;
+ } else if (fTemp >= (65536.0f*65536.0f)) {
+ uResult = 0xFFFFFFFFU;
+ } else {
+ uResult = (uint32_t)fTemp;
+ }
+ Result.vector4_u32[ElementIndex] = uResult;
+ } while (++ElementIndex<4);
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vdupq_n_f32((float)(1U << MulExponent));
+ vResult = vmulq_f32(vResult,VFloat);
+ // In case of overflow, detect it
+ __n128 vOverflow = vcgtq_f32(vResult,g_XMMaxUInt);
+ // Float to int conversion
+ __n128 vResulti = vcvtq_u32_f32(vResult);
+ // If there was overflow, set to 0xFFFFFFFFU
+ vResult = vbicq_u32(vResulti,vOverflow);
+ vOverflow = vorrq_u32(vOverflow,vResult);
+ return vOverflow;
+#else // _XM_SSE_INTRINSICS_
+ XMVECTOR vResult = _mm_set_ps1(static_cast<float>(1U << MulExponent));
+ vResult = _mm_mul_ps(vResult,VFloat);
+ // Clamp to >=0
+ vResult = _mm_max_ps(vResult,g_XMZero);
+ // Any numbers that are too big, set to 0xFFFFFFFFU
+ XMVECTOR vOverflow = _mm_cmpgt_ps(vResult,g_XMMaxUInt);
+ XMVECTOR vValue = g_XMUnsignedFix;
+ // Too large for a signed integer?
+ XMVECTOR vMask = _mm_cmpge_ps(vResult,vValue);
+ // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise
+ vValue = _mm_and_ps(vValue,vMask);
+ // Perform fixup only on numbers too large (Keeps low bit precision)
+ vResult = _mm_sub_ps(vResult,vValue);
+ __m128i vResulti = _mm_cvttps_epi32(vResult);
+ // Convert from signed to unsigned pnly if greater than 0x80000000
+ vMask = _mm_and_ps(vMask,g_XMNegativeZero);
+ vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti),vMask);
+ // On those that are too large, set to 0xFFFFFFFF
+ vResult = _mm_or_ps(vResult,vOverflow);
+ return vResult;
+#endif
+}
+
+#pragma warning(pop)
+
+#endif // _XM_NO_INTRINSICS_ || _XM_SSE_INTRINSICS_ || _XM_ARM_NEON_INTRINSICS_
+
+/****************************************************************************
+ *
+ * Vector and matrix load operations
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadInt(const uint32_t* pSource)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_u32[0] = *pSource;
+ V.vector4_u32[1] = 0;
+ V.vector4_u32[2] = 0;
+ V.vector4_u32[3] = 0;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 zero = vdupq_n_u32(0);
+ return vld1q_lane_u32( pSource, zero, 0 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_load_ss( reinterpret_cast<const float*>(pSource) );
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadFloat(const float* pSource)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_f32[0] = *pSource;
+ V.vector4_f32[1] = 0.f;
+ V.vector4_f32[2] = 0.f;
+ V.vector4_f32[3] = 0.f;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 zero = vdupq_n_u32(0);
+ return vld1q_lane_f32( pSource, zero, 0 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_load_ss( pSource );
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadInt2
+(
+ const uint32_t* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_u32[0] = pSource[0];
+ V.vector4_u32[1] = pSource[1];
+ V.vector4_u32[2] = 0;
+ V.vector4_u32[3] = 0;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 x = vld1_u32( pSource );
+ __n64 zero = vdup_n_u32(0);
+ return vcombine_u32( x, zero );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 x = _mm_load_ss( reinterpret_cast<const float*>(pSource) );
+ __m128 y = _mm_load_ss( reinterpret_cast<const float*>(pSource+1) );
+ return _mm_unpacklo_ps( x, y );
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadInt2A
+(
+ const uint32_t* pSource
+)
+{
+ assert(pSource);
+ assert(((uintptr_t)pSource & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_u32[0] = pSource[0];
+ V.vector4_u32[1] = pSource[1];
+ V.vector4_u32[2] = 0;
+ V.vector4_u32[3] = 0;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 x = vld1_u32_ex( pSource, 64 );
+ __n64 zero = vdup_n_u32(0);
+ return vcombine_u32( x, zero );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(pSource) );
+ return reinterpret_cast<__m128 *>(&V)[0];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadFloat2
+(
+ const XMFLOAT2* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_f32[0] = pSource->x;
+ V.vector4_f32[1] = pSource->y;
+ V.vector4_f32[2] = 0.f;
+ V.vector4_f32[3] = 0.f;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 x = vld1_f32( reinterpret_cast<const float*>(pSource) );
+ __n64 zero = vdup_n_u32(0);
+ return vcombine_f32( x, zero );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 x = _mm_load_ss( &pSource->x );
+ __m128 y = _mm_load_ss( &pSource->y );
+ return _mm_unpacklo_ps( x, y );
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadFloat2A
+(
+ const XMFLOAT2A* pSource
+)
+{
+ assert(pSource);
+ assert(((uintptr_t)pSource & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_f32[0] = pSource->x;
+ V.vector4_f32[1] = pSource->y;
+ V.vector4_f32[2] = 0.f;
+ V.vector4_f32[3] = 0.f;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 x = vld1_f32_ex( reinterpret_cast<const float*>(pSource), 64 );
+ __n64 zero = vdup_n_u32(0);
+ return vcombine_f32( x, zero );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(pSource) );
+ return reinterpret_cast<__m128 *>(&V)[0];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadSInt2
+(
+ const XMINT2* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_f32[0] = (float)pSource->x;
+ V.vector4_f32[1] = (float)pSource->y;
+ V.vector4_f32[2] = 0.f;
+ V.vector4_f32[3] = 0.f;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 x = vld1_s32( reinterpret_cast<const int32_t*>(pSource) );
+ __n64 v = vcvt_f32_s32( x );
+ __n64 zero = vdup_n_u32(0);
+ return vcombine_s32( v, zero );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 x = _mm_load_ss( reinterpret_cast<const float*>(&pSource->x) );
+ __m128 y = _mm_load_ss( reinterpret_cast<const float*>(&pSource->y) );
+ __m128 V = _mm_unpacklo_ps( x, y );
+ return _mm_cvtepi32_ps(_mm_castps_si128(V));
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadUInt2
+(
+ const XMUINT2* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_f32[0] = (float)pSource->x;
+ V.vector4_f32[1] = (float)pSource->y;
+ V.vector4_f32[2] = 0.f;
+ V.vector4_f32[3] = 0.f;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 x = vld1_u32( reinterpret_cast<const uint32_t*>(pSource) );
+ __n64 v = vcvt_f32_u32( x );
+ __n64 zero = vdup_n_u32(0);
+ return vcombine_u32( v, zero );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 x = _mm_load_ss( reinterpret_cast<const float*>(&pSource->x) );
+ __m128 y = _mm_load_ss( reinterpret_cast<const float*>(&pSource->y) );
+ __m128 V = _mm_unpacklo_ps( x, y );
+ // For the values that are higher than 0x7FFFFFFF, a fixup is needed
+ // Determine which ones need the fix.
+ XMVECTOR vMask = _mm_and_ps(V,g_XMNegativeZero);
+ // Force all values positive
+ XMVECTOR vResult = _mm_xor_ps(V,vMask);
+ // Convert to floats
+ vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
+ // Convert 0x80000000 -> 0xFFFFFFFF
+ __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask),31);
+ // For only the ones that are too big, add the fixup
+ vMask = _mm_and_ps(_mm_castsi128_ps(iMask),g_XMFixUnsigned);
+ vResult = _mm_add_ps(vResult,vMask);
+ return vResult;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadInt3
+(
+ const uint32_t* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_u32[0] = pSource[0];
+ V.vector4_u32[1] = pSource[1];
+ V.vector4_u32[2] = pSource[2];
+ V.vector4_u32[3] = 0;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 x = vld1_u32( pSource );
+ __n64 zero = vdup_n_u32(0);
+ __n64 y = vld1_lane_u32( pSource+2, zero, 0 );
+ return vcombine_u32( x, y );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 x = _mm_load_ss( reinterpret_cast<const float*>(pSource) );
+ __m128 y = _mm_load_ss( reinterpret_cast<const float*>(pSource+1) );
+ __m128 z = _mm_load_ss( reinterpret_cast<const float*>(pSource+2) );
+ __m128 xy = _mm_unpacklo_ps( x, y );
+ return _mm_movelh_ps( xy, z );
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadInt3A
+(
+ const uint32_t* pSource
+)
+{
+ assert(pSource);
+ assert(((uintptr_t)pSource & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_u32[0] = pSource[0];
+ V.vector4_u32[1] = pSource[1];
+ V.vector4_u32[2] = pSource[2];
+ V.vector4_u32[3] = 0;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Reads an extra integer which is zero'd
+ __n128 V = vld1q_u32_ex( pSource, 128 );
+ return vsetq_lane_u32( 0, V, 3 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Reads an extra integer which is zero'd
+ __m128i V = _mm_load_si128( reinterpret_cast<const __m128i*>(pSource) );
+ V = _mm_and_si128( V, g_XMMask3 );
+ return reinterpret_cast<__m128 *>(&V)[0];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadFloat3
+(
+ const XMFLOAT3* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_f32[0] = pSource->x;
+ V.vector4_f32[1] = pSource->y;
+ V.vector4_f32[2] = pSource->z;
+ V.vector4_f32[3] = 0.f;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 x = vld1_f32( reinterpret_cast<const float*>(pSource) );
+ __n64 zero = vdup_n_u32(0);
+ __n64 y = vld1_lane_f32( reinterpret_cast<const float*>(pSource)+2, zero, 0 );
+ return vcombine_f32( x, y );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 x = _mm_load_ss( &pSource->x );
+ __m128 y = _mm_load_ss( &pSource->y );
+ __m128 z = _mm_load_ss( &pSource->z );
+ __m128 xy = _mm_unpacklo_ps( x, y );
+ return _mm_movelh_ps( xy, z );
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadFloat3A
+(
+ const XMFLOAT3A* pSource
+)
+{
+ assert(pSource);
+ assert(((uintptr_t)pSource & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_f32[0] = pSource->x;
+ V.vector4_f32[1] = pSource->y;
+ V.vector4_f32[2] = pSource->z;
+ V.vector4_f32[3] = 0.f;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Reads an extra float which is zero'd
+ __n128 V = vld1q_f32_ex( reinterpret_cast<const float*>(pSource), 128 );
+ return vsetq_lane_f32( 0, V, 3 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Reads an extra float which is zero'd
+ __m128 V = _mm_load_ps( &pSource->x );
+ return _mm_and_ps( V, g_XMMask3 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadSInt3
+(
+ const XMINT3* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR V;
+ V.vector4_f32[0] = (float)pSource->x;
+ V.vector4_f32[1] = (float)pSource->y;
+ V.vector4_f32[2] = (float)pSource->z;
+ V.vector4_f32[3] = 0.f;
+ return V;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 x = vld1_s32( reinterpret_cast<const int32_t*>(pSource) );
+ __n64 zero = vdup_n_u32(0);
+ __n64 y = vld1_lane_s32( reinterpret_cast<const int32_t*>(pSource)+2, zero, 0 );
+ __n128 v = vcombine_s32( x, y );
+ return vcvtq_f32_s32( v );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 x = _mm_load_ss( reinterpret_cast<const float*>(&pSource->x) );
+ __m128 y = _mm_load_ss( reinterpret_cast<const float*>(&pSource->y) );
+ __m128 z = _mm_load_ss( reinterpret_cast<const float*>(&pSource->z) );
+ __m128 xy = _mm_unpacklo_ps( x, y );
+ __m128 V = _mm_movelh_ps( xy, z );
+ return _mm_cvtepi32_ps(_mm_castps_si128(V));
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadUInt3
+(
+ const XMUINT3* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_f32[0] = (float)pSource->x;
+ V.vector4_f32[1] = (float)pSource->y;
+ V.vector4_f32[2] = (float)pSource->z;
+ V.vector4_f32[3] = 0.f;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 x = vld1_u32( reinterpret_cast<const uint32_t*>(pSource) );
+ __n64 zero = vdup_n_u32(0);
+ __n64 y = vld1_lane_u32( reinterpret_cast<const uint32_t*>(pSource)+2, zero, 0 );
+ __n128 v = vcombine_u32( x, y );
+ return vcvtq_f32_u32( v );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 x = _mm_load_ss( reinterpret_cast<const float*>(&pSource->x) );
+ __m128 y = _mm_load_ss( reinterpret_cast<const float*>(&pSource->y) );
+ __m128 z = _mm_load_ss( reinterpret_cast<const float*>(&pSource->z) );
+ __m128 xy = _mm_unpacklo_ps( x, y );
+ __m128 V = _mm_movelh_ps( xy, z );
+ // For the values that are higher than 0x7FFFFFFF, a fixup is needed
+ // Determine which ones need the fix.
+ XMVECTOR vMask = _mm_and_ps(V,g_XMNegativeZero);
+ // Force all values positive
+ XMVECTOR vResult = _mm_xor_ps(V,vMask);
+ // Convert to floats
+ vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
+ // Convert 0x80000000 -> 0xFFFFFFFF
+ __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask),31);
+ // For only the ones that are too big, add the fixup
+ vMask = _mm_and_ps(_mm_castsi128_ps(iMask),g_XMFixUnsigned);
+ vResult = _mm_add_ps(vResult,vMask);
+ return vResult;
+
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadInt4
+(
+ const uint32_t* pSource
+)
+{
+ assert(pSource);
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_u32[0] = pSource[0];
+ V.vector4_u32[1] = pSource[1];
+ V.vector4_u32[2] = pSource[2];
+ V.vector4_u32[3] = pSource[3];
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vld1q_u32( pSource );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_loadu_si128( reinterpret_cast<const __m128i*>(pSource) );
+ return reinterpret_cast<__m128 *>(&V)[0];
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadInt4A
+(
+ const uint32_t* pSource
+)
+{
+ assert(pSource);
+ assert(((uintptr_t)pSource & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_u32[0] = pSource[0];
+ V.vector4_u32[1] = pSource[1];
+ V.vector4_u32[2] = pSource[2];
+ V.vector4_u32[3] = pSource[3];
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vld1q_u32_ex( pSource, 128 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_load_si128( reinterpret_cast<const __m128i*>(pSource) );
+ return reinterpret_cast<__m128 *>(&V)[0];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadFloat4
+(
+ const XMFLOAT4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_f32[0] = pSource->x;
+ V.vector4_f32[1] = pSource->y;
+ V.vector4_f32[2] = pSource->z;
+ V.vector4_f32[3] = pSource->w;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vld1q_f32( reinterpret_cast<const float*>(pSource) );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_loadu_ps( &pSource->x );
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadFloat4A
+(
+ const XMFLOAT4A* pSource
+)
+{
+ assert(pSource);
+ assert(((uintptr_t)pSource & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_f32[0] = pSource->x;
+ V.vector4_f32[1] = pSource->y;
+ V.vector4_f32[2] = pSource->z;
+ V.vector4_f32[3] = pSource->w;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vld1q_f32_ex( reinterpret_cast<const float*>(pSource), 128 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_load_ps( &pSource->x );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadSInt4
+(
+ const XMINT4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR V;
+ V.vector4_f32[0] = (float)pSource->x;
+ V.vector4_f32[1] = (float)pSource->y;
+ V.vector4_f32[2] = (float)pSource->z;
+ V.vector4_f32[3] = (float)pSource->w;
+ return V;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 v = vld1q_s32( reinterpret_cast<const int32_t*>(pSource) );
+ return vcvtq_f32_s32( v );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_loadu_si128( reinterpret_cast<const __m128i*>(pSource) );
+ return _mm_cvtepi32_ps(V);
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR XMLoadUInt4
+(
+ const XMUINT4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR V;
+ V.vector4_f32[0] = (float)pSource->x;
+ V.vector4_f32[1] = (float)pSource->y;
+ V.vector4_f32[2] = (float)pSource->z;
+ V.vector4_f32[3] = (float)pSource->w;
+ return V;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 v = vld1q_u32( reinterpret_cast<const uint32_t*>(pSource) );
+ return vcvtq_f32_u32( v );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_loadu_si128( reinterpret_cast<const __m128i*>(pSource) );
+ // For the values that are higher than 0x7FFFFFFF, a fixup is needed
+ // Determine which ones need the fix.
+ XMVECTOR vMask = _mm_and_ps(_mm_castsi128_ps(V),g_XMNegativeZero);
+ // Force all values positive
+ XMVECTOR vResult = _mm_xor_ps(_mm_castsi128_ps(V),vMask);
+ // Convert to floats
+ vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
+ // Convert 0x80000000 -> 0xFFFFFFFF
+ __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask),31);
+ // For only the ones that are too big, add the fixup
+ vMask = _mm_and_ps(_mm_castsi128_ps(iMask),g_XMFixUnsigned);
+ vResult = _mm_add_ps(vResult,vMask);
+ return vResult;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMMATRIX XMLoadFloat3x3
+(
+ const XMFLOAT3X3* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMMATRIX M;
+ M.r[0].vector4_f32[0] = pSource->m[0][0];
+ M.r[0].vector4_f32[1] = pSource->m[0][1];
+ M.r[0].vector4_f32[2] = pSource->m[0][2];
+ M.r[0].vector4_f32[3] = 0.0f;
+
+ M.r[1].vector4_f32[0] = pSource->m[1][0];
+ M.r[1].vector4_f32[1] = pSource->m[1][1];
+ M.r[1].vector4_f32[2] = pSource->m[1][2];
+ M.r[1].vector4_f32[3] = 0.0f;
+
+ M.r[2].vector4_f32[0] = pSource->m[2][0];
+ M.r[2].vector4_f32[1] = pSource->m[2][1];
+ M.r[2].vector4_f32[2] = pSource->m[2][2];
+ M.r[2].vector4_f32[3] = 0.0f;
+ M.r[3].vector4_f32[0] = 0.0f;
+ M.r[3].vector4_f32[1] = 0.0f;
+ M.r[3].vector4_f32[2] = 0.0f;
+ M.r[3].vector4_f32[3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 v0 = vld1q_f32( &pSource->m[0][0] );
+ __n128 v1 = vld1q_f32( &pSource->m[1][1] );
+ __n64 v2 = vcreate_f32( (uint64_t)*(const uint32_t*)&pSource->m[2][2] );
+ __n128 T = vextq_f32( v0, v1, 3 );
+
+ XMMATRIX M;
+ M.r[0] = vandq_u32( v0, g_XMMask3 );
+ M.r[1] = vandq_u32( T, g_XMMask3 );
+ M.r[2] = vcombine_f32( vget_high_f32(v1), v2 );
+ M.r[3] = g_XMIdentityR3;
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 Z = _mm_setzero_ps();
+
+ __m128 V1 = _mm_loadu_ps( &pSource->m[0][0] );
+ __m128 V2 = _mm_loadu_ps( &pSource->m[1][1] );
+ __m128 V3 = _mm_load_ss( &pSource->m[2][2] );
+
+ __m128 T1 = _mm_unpackhi_ps( V1, Z );
+ __m128 T2 = _mm_unpacklo_ps( V2, Z );
+ __m128 T3 = _mm_shuffle_ps( V3, T2, _MM_SHUFFLE( 0, 1, 0, 0 ) );
+ __m128 T4 = _mm_movehl_ps( T2, T3 );
+ __m128 T5 = _mm_movehl_ps( Z, T1 );
+
+ XMMATRIX M;
+ M.r[0] = _mm_movelh_ps( V1, T1 );
+ M.r[1] = _mm_add_ps( T4, T5 );
+ M.r[2] = _mm_shuffle_ps( V2, V3, _MM_SHUFFLE(1, 0, 3, 2) );
+ M.r[3] = g_XMIdentityR3;
+ return M;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMMATRIX XMLoadFloat4x3
+(
+ const XMFLOAT4X3* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMMATRIX M;
+ M.r[0].vector4_f32[0] = pSource->m[0][0];
+ M.r[0].vector4_f32[1] = pSource->m[0][1];
+ M.r[0].vector4_f32[2] = pSource->m[0][2];
+ M.r[0].vector4_f32[3] = 0.0f;
+
+ M.r[1].vector4_f32[0] = pSource->m[1][0];
+ M.r[1].vector4_f32[1] = pSource->m[1][1];
+ M.r[1].vector4_f32[2] = pSource->m[1][2];
+ M.r[1].vector4_f32[3] = 0.0f;
+
+ M.r[2].vector4_f32[0] = pSource->m[2][0];
+ M.r[2].vector4_f32[1] = pSource->m[2][1];
+ M.r[2].vector4_f32[2] = pSource->m[2][2];
+ M.r[2].vector4_f32[3] = 0.0f;
+
+ M.r[3].vector4_f32[0] = pSource->m[3][0];
+ M.r[3].vector4_f32[1] = pSource->m[3][1];
+ M.r[3].vector4_f32[2] = pSource->m[3][2];
+ M.r[3].vector4_f32[3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 v0 = vld1q_f32( &pSource->m[0][0] );
+ __n128 v1 = vld1q_f32( &pSource->m[1][1] );
+ __n128 v2 = vld1q_f32( &pSource->m[2][2] );
+
+ __n128 T1 = vextq_f32( v0, v1, 3 );
+ __n128 T2 = vcombine_f32( vget_high_f32(v1), vget_low_f32(v2) );
+ __n128 T3 = vextq_f32( v2, v2, 1 );
+
+ XMMATRIX M;
+ M.r[0] = vandq_u32( v0, g_XMMask3 );
+ M.r[1] = vandq_u32( T1, g_XMMask3 );
+ M.r[2] = vandq_u32( T2, g_XMMask3 );
+ M.r[3] = vsetq_lane_f32( 1.f, T3, 3 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Use unaligned load instructions to
+ // load the 12 floats
+ // vTemp1 = x1,y1,z1,x2
+ XMVECTOR vTemp1 = _mm_loadu_ps(&pSource->m[0][0]);
+ // vTemp2 = y2,z2,x3,y3
+ XMVECTOR vTemp2 = _mm_loadu_ps(&pSource->m[1][1]);
+ // vTemp4 = z3,x4,y4,z4
+ XMVECTOR vTemp4 = _mm_loadu_ps(&pSource->m[2][2]);
+ // vTemp3 = x3,y3,z3,z3
+ XMVECTOR vTemp3 = _mm_shuffle_ps(vTemp2,vTemp4,_MM_SHUFFLE(0,0,3,2));
+ // vTemp2 = y2,z2,x2,x2
+ vTemp2 = _mm_shuffle_ps(vTemp2,vTemp1,_MM_SHUFFLE(3,3,1,0));
+ // vTemp2 = x2,y2,z2,z2
+ vTemp2 = XM_PERMUTE_PS(vTemp2,_MM_SHUFFLE(1,1,0,2));
+ // vTemp1 = x1,y1,z1,0
+ vTemp1 = _mm_and_ps(vTemp1,g_XMMask3);
+ // vTemp2 = x2,y2,z2,0
+ vTemp2 = _mm_and_ps(vTemp2,g_XMMask3);
+ // vTemp3 = x3,y3,z3,0
+ vTemp3 = _mm_and_ps(vTemp3,g_XMMask3);
+ // vTemp4i = x4,y4,z4,0
+ __m128i vTemp4i = _mm_srli_si128(_mm_castps_si128(vTemp4),32/8);
+ // vTemp4i = x4,y4,z4,1.0f
+ vTemp4i = _mm_or_si128(vTemp4i,g_XMIdentityR3);
+ XMMATRIX M(vTemp1,
+ vTemp2,
+ vTemp3,
+ _mm_castsi128_ps(vTemp4i));
+ return M;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMMATRIX XMLoadFloat4x3A
+(
+ const XMFLOAT4X3A* pSource
+)
+{
+ assert(pSource);
+ assert(((uintptr_t)pSource & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMMATRIX M;
+ M.r[0].vector4_f32[0] = pSource->m[0][0];
+ M.r[0].vector4_f32[1] = pSource->m[0][1];
+ M.r[0].vector4_f32[2] = pSource->m[0][2];
+ M.r[0].vector4_f32[3] = 0.0f;
+
+ M.r[1].vector4_f32[0] = pSource->m[1][0];
+ M.r[1].vector4_f32[1] = pSource->m[1][1];
+ M.r[1].vector4_f32[2] = pSource->m[1][2];
+ M.r[1].vector4_f32[3] = 0.0f;
+
+ M.r[2].vector4_f32[0] = pSource->m[2][0];
+ M.r[2].vector4_f32[1] = pSource->m[2][1];
+ M.r[2].vector4_f32[2] = pSource->m[2][2];
+ M.r[2].vector4_f32[3] = 0.0f;
+
+ M.r[3].vector4_f32[0] = pSource->m[3][0];
+ M.r[3].vector4_f32[1] = pSource->m[3][1];
+ M.r[3].vector4_f32[2] = pSource->m[3][2];
+ M.r[3].vector4_f32[3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 v0 = vld1q_f32_ex( &pSource->m[0][0], 128 );
+ __n128 v1 = vld1q_f32_ex( &pSource->m[1][1], 128 );
+ __n128 v2 = vld1q_f32_ex( &pSource->m[2][2], 128 );
+
+ __n128 T1 = vextq_f32( v0, v1, 3 );
+ __n128 T2 = vcombine_f32( vget_high_f32(v1), vget_low_f32(v2) );
+ __n128 T3 = vextq_f32( v2, v2, 1 );
+
+ XMMATRIX M;
+ M.r[0] = vandq_u32( v0, g_XMMask3 );
+ M.r[1] = vandq_u32( T1, g_XMMask3 );
+ M.r[2] = vandq_u32( T2, g_XMMask3 );
+ M.r[3] = vsetq_lane_f32( 1.f, T3, 3 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Use aligned load instructions to
+ // load the 12 floats
+ // vTemp1 = x1,y1,z1,x2
+ XMVECTOR vTemp1 = _mm_load_ps(&pSource->m[0][0]);
+ // vTemp2 = y2,z2,x3,y3
+ XMVECTOR vTemp2 = _mm_load_ps(&pSource->m[1][1]);
+ // vTemp4 = z3,x4,y4,z4
+ XMVECTOR vTemp4 = _mm_load_ps(&pSource->m[2][2]);
+ // vTemp3 = x3,y3,z3,z3
+ XMVECTOR vTemp3 = _mm_shuffle_ps(vTemp2,vTemp4,_MM_SHUFFLE(0,0,3,2));
+ // vTemp2 = y2,z2,x2,x2
+ vTemp2 = _mm_shuffle_ps(vTemp2,vTemp1,_MM_SHUFFLE(3,3,1,0));
+ // vTemp2 = x2,y2,z2,z2
+ vTemp2 = XM_PERMUTE_PS(vTemp2,_MM_SHUFFLE(1,1,0,2));
+ // vTemp1 = x1,y1,z1,0
+ vTemp1 = _mm_and_ps(vTemp1,g_XMMask3);
+ // vTemp2 = x2,y2,z2,0
+ vTemp2 = _mm_and_ps(vTemp2,g_XMMask3);
+ // vTemp3 = x3,y3,z3,0
+ vTemp3 = _mm_and_ps(vTemp3,g_XMMask3);
+ // vTemp4i = x4,y4,z4,0
+ __m128i vTemp4i = _mm_srli_si128(_mm_castps_si128(vTemp4),32/8);
+ // vTemp4i = x4,y4,z4,1.0f
+ vTemp4i = _mm_or_si128(vTemp4i,g_XMIdentityR3);
+ XMMATRIX M(vTemp1,
+ vTemp2,
+ vTemp3,
+ _mm_castsi128_ps(vTemp4i));
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMMATRIX XMLoadFloat4x4
+(
+ const XMFLOAT4X4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMMATRIX M;
+ M.r[0].vector4_f32[0] = pSource->m[0][0];
+ M.r[0].vector4_f32[1] = pSource->m[0][1];
+ M.r[0].vector4_f32[2] = pSource->m[0][2];
+ M.r[0].vector4_f32[3] = pSource->m[0][3];
+
+ M.r[1].vector4_f32[0] = pSource->m[1][0];
+ M.r[1].vector4_f32[1] = pSource->m[1][1];
+ M.r[1].vector4_f32[2] = pSource->m[1][2];
+ M.r[1].vector4_f32[3] = pSource->m[1][3];
+
+ M.r[2].vector4_f32[0] = pSource->m[2][0];
+ M.r[2].vector4_f32[1] = pSource->m[2][1];
+ M.r[2].vector4_f32[2] = pSource->m[2][2];
+ M.r[2].vector4_f32[3] = pSource->m[2][3];
+
+ M.r[3].vector4_f32[0] = pSource->m[3][0];
+ M.r[3].vector4_f32[1] = pSource->m[3][1];
+ M.r[3].vector4_f32[2] = pSource->m[3][2];
+ M.r[3].vector4_f32[3] = pSource->m[3][3];
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMMATRIX M;
+ M.r[0] = vld1q_f32( reinterpret_cast<const float*>(&pSource->_11) );
+ M.r[1] = vld1q_f32( reinterpret_cast<const float*>(&pSource->_21) );
+ M.r[2] = vld1q_f32( reinterpret_cast<const float*>(&pSource->_31) );
+ M.r[3] = vld1q_f32( reinterpret_cast<const float*>(&pSource->_41) );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ M.r[0] = _mm_loadu_ps( &pSource->_11 );
+ M.r[1] = _mm_loadu_ps( &pSource->_21 );
+ M.r[2] = _mm_loadu_ps( &pSource->_31 );
+ M.r[3] = _mm_loadu_ps( &pSource->_41 );
+ return M;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMMATRIX XMLoadFloat4x4A
+(
+ const XMFLOAT4X4A* pSource
+)
+{
+ assert(pSource);
+ assert(((uintptr_t)pSource & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMMATRIX M;
+ M.r[0].vector4_f32[0] = pSource->m[0][0];
+ M.r[0].vector4_f32[1] = pSource->m[0][1];
+ M.r[0].vector4_f32[2] = pSource->m[0][2];
+ M.r[0].vector4_f32[3] = pSource->m[0][3];
+
+ M.r[1].vector4_f32[0] = pSource->m[1][0];
+ M.r[1].vector4_f32[1] = pSource->m[1][1];
+ M.r[1].vector4_f32[2] = pSource->m[1][2];
+ M.r[1].vector4_f32[3] = pSource->m[1][3];
+
+ M.r[2].vector4_f32[0] = pSource->m[2][0];
+ M.r[2].vector4_f32[1] = pSource->m[2][1];
+ M.r[2].vector4_f32[2] = pSource->m[2][2];
+ M.r[2].vector4_f32[3] = pSource->m[2][3];
+
+ M.r[3].vector4_f32[0] = pSource->m[3][0];
+ M.r[3].vector4_f32[1] = pSource->m[3][1];
+ M.r[3].vector4_f32[2] = pSource->m[3][2];
+ M.r[3].vector4_f32[3] = pSource->m[3][3];
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMMATRIX M;
+ M.r[0] = vld1q_f32_ex( reinterpret_cast<const float*>(&pSource->_11), 128 );
+ M.r[1] = vld1q_f32_ex( reinterpret_cast<const float*>(&pSource->_21), 128 );
+ M.r[2] = vld1q_f32_ex( reinterpret_cast<const float*>(&pSource->_31), 128 );
+ M.r[3] = vld1q_f32_ex( reinterpret_cast<const float*>(&pSource->_41), 128 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ M.r[0] = _mm_load_ps( &pSource->_11 );
+ M.r[1] = _mm_load_ps( &pSource->_21 );
+ M.r[2] = _mm_load_ps( &pSource->_31 );
+ M.r[3] = _mm_load_ps( &pSource->_41 );
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+/****************************************************************************
+ *
+ * Vector and matrix store operations
+ *
+ ****************************************************************************/
+_Use_decl_annotations_
+inline void XMStoreInt
+(
+ uint32_t* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+ *pDestination = XMVectorGetIntX( V );
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_lane_u32( pDestination, V, 0 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ _mm_store_ss( reinterpret_cast<float*>(pDestination), V );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreFloat
+(
+ float* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+ *pDestination = XMVectorGetX( V );
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_lane_f32( pDestination, V, 0 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ _mm_store_ss( pDestination, V );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreInt2
+(
+ uint32_t* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination[0] = V.vector4_u32[0];
+ pDestination[1] = V.vector4_u32[1];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_u32(V);
+ vst1_u32( pDestination, VL );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR T = XM_PERMUTE_PS( V, _MM_SHUFFLE( 1, 1, 1, 1 ) );
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination[0]), V );
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination[1]), T );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreInt2A
+(
+ uint32_t* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+ assert(((uintptr_t)pDestination & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination[0] = V.vector4_u32[0];
+ pDestination[1] = V.vector4_u32[1];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_u32(V);
+ vst1_u32_ex( pDestination, VL, 64 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ _mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreFloat2
+(
+ XMFLOAT2* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination->x = V.vector4_f32[0];
+ pDestination->y = V.vector4_f32[1];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32(V);
+ vst1_f32( reinterpret_cast<float*>(pDestination), VL );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR T = XM_PERMUTE_PS( V, _MM_SHUFFLE( 1, 1, 1, 1 ) );
+ _mm_store_ss( &pDestination->x, V );
+ _mm_store_ss( &pDestination->y, T );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreFloat2A
+(
+ XMFLOAT2A* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+ assert(((uintptr_t)pDestination & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination->x = V.vector4_f32[0];
+ pDestination->y = V.vector4_f32[1];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32(V);
+ vst1_f32_ex( reinterpret_cast<float*>(pDestination), VL, 64 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ _mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreSInt2
+(
+ XMINT2* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination->x = (int32_t)V.vector4_f32[0];
+ pDestination->y = (int32_t)V.vector4_f32[1];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 v = vget_low_s32(V);
+ v = vcvt_s32_f32( v );
+ vst1_s32( reinterpret_cast<int32_t*>(pDestination), v );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // In case of positive overflow, detect it
+ XMVECTOR vOverflow = _mm_cmpgt_ps(V,g_XMMaxInt);
+ // Float to int conversion
+ __m128i vResulti = _mm_cvttps_epi32(V);
+ // If there was positive overflow, set to 0x7FFFFFFF
+ XMVECTOR vResult = _mm_and_ps(vOverflow,g_XMAbsMask);
+ vOverflow = _mm_andnot_ps(vOverflow,_mm_castsi128_ps(vResulti));
+ vOverflow = _mm_or_ps(vOverflow,vResult);
+ // Write two ints
+ XMVECTOR T = XM_PERMUTE_PS( vOverflow, _MM_SHUFFLE( 1, 1, 1, 1 ) );
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination->x), vOverflow );
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination->y), T );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreUInt2
+(
+ XMUINT2* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination->x = (uint32_t)V.vector4_f32[0];
+ pDestination->y = (uint32_t)V.vector4_f32[1];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 v = vget_low_u32(V);
+ v = vcvt_u32_f32( v );
+ vst1_u32( reinterpret_cast<uint32_t*>(pDestination), v );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Clamp to >=0
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ // Any numbers that are too big, set to 0xFFFFFFFFU
+ XMVECTOR vOverflow = _mm_cmpgt_ps(vResult,g_XMMaxUInt);
+ XMVECTOR vValue = g_XMUnsignedFix;
+ // Too large for a signed integer?
+ XMVECTOR vMask = _mm_cmpge_ps(vResult,vValue);
+ // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise
+ vValue = _mm_and_ps(vValue,vMask);
+ // Perform fixup only on numbers too large (Keeps low bit precision)
+ vResult = _mm_sub_ps(vResult,vValue);
+ __m128i vResulti = _mm_cvttps_epi32(vResult);
+ // Convert from signed to unsigned pnly if greater than 0x80000000
+ vMask = _mm_and_ps(vMask,g_XMNegativeZero);
+ vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti),vMask);
+ // On those that are too large, set to 0xFFFFFFFF
+ vResult = _mm_or_ps(vResult,vOverflow);
+ // Write two uints
+ XMVECTOR T = XM_PERMUTE_PS( vResult, _MM_SHUFFLE( 1, 1, 1, 1 ) );
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination->x), vResult );
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination->y), T );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreInt3
+(
+ uint32_t* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination[0] = V.vector4_u32[0];
+ pDestination[1] = V.vector4_u32[1];
+ pDestination[2] = V.vector4_u32[2];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_u32(V);
+ vst1_u32( pDestination, VL );
+ vst1q_lane_u32( pDestination+2, V, 2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR T1 = XM_PERMUTE_PS(V,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR T2 = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
+ _mm_store_ss( reinterpret_cast<float*>(pDestination), V );
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination[1]), T1 );
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination[2]), T2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreInt3A
+(
+ uint32_t* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+ assert(((uintptr_t)pDestination & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination[0] = V.vector4_u32[0];
+ pDestination[1] = V.vector4_u32[1];
+ pDestination[2] = V.vector4_u32[2];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_u32(V);
+ vst1_u32_ex( pDestination, VL, 64 );
+ vst1q_lane_u32( pDestination+2, V, 2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR T = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
+ _mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination[2]), T );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreFloat3
+(
+ XMFLOAT3* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination->x = V.vector4_f32[0];
+ pDestination->y = V.vector4_f32[1];
+ pDestination->z = V.vector4_f32[2];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32(V);
+ vst1_f32( reinterpret_cast<float*>(pDestination), VL );
+ vst1q_lane_f32( reinterpret_cast<float*>(pDestination)+2, V, 2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR T1 = XM_PERMUTE_PS(V,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR T2 = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
+ _mm_store_ss( &pDestination->x, V );
+ _mm_store_ss( &pDestination->y, T1 );
+ _mm_store_ss( &pDestination->z, T2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreFloat3A
+(
+ XMFLOAT3A* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+ assert(((uintptr_t)pDestination & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination->x = V.vector4_f32[0];
+ pDestination->y = V.vector4_f32[1];
+ pDestination->z = V.vector4_f32[2];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32(V);
+ vst1_f32_ex( reinterpret_cast<float*>(pDestination), VL, 64 );
+ vst1q_lane_f32( reinterpret_cast<float*>(pDestination)+2, V, 2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR T = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
+ _mm_storel_epi64( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
+ _mm_store_ss( &pDestination->z, T );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreSInt3
+(
+ XMINT3* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination->x = (int32_t)V.vector4_f32[0];
+ pDestination->y = (int32_t)V.vector4_f32[1];
+ pDestination->z = (int32_t)V.vector4_f32[2];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 v = vcvtq_s32_f32(V);
+ __n64 vL = vget_low_s32(v);
+ vst1_s32( reinterpret_cast<int32_t*>(pDestination), vL );
+ vst1q_lane_s32( reinterpret_cast<int32_t*>(pDestination)+2, v, 2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // In case of positive overflow, detect it
+ XMVECTOR vOverflow = _mm_cmpgt_ps(V,g_XMMaxInt);
+ // Float to int conversion
+ __m128i vResulti = _mm_cvttps_epi32(V);
+ // If there was positive overflow, set to 0x7FFFFFFF
+ XMVECTOR vResult = _mm_and_ps(vOverflow,g_XMAbsMask);
+ vOverflow = _mm_andnot_ps(vOverflow,_mm_castsi128_ps(vResulti));
+ vOverflow = _mm_or_ps(vOverflow,vResult);
+ // Write 3 uints
+ XMVECTOR T1 = XM_PERMUTE_PS(vOverflow,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR T2 = XM_PERMUTE_PS(vOverflow,_MM_SHUFFLE(2,2,2,2));
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination->x), vOverflow );
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination->y), T1 );
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination->z), T2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreUInt3
+(
+ XMUINT3* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination->x = (uint32_t)V.vector4_f32[0];
+ pDestination->y = (uint32_t)V.vector4_f32[1];
+ pDestination->z = (uint32_t)V.vector4_f32[2];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 v = vcvtq_u32_f32(V);
+ __n64 vL = vget_low_u32(v);
+ vst1_u32( reinterpret_cast<uint32_t*>(pDestination), vL );
+ vst1q_lane_u32( reinterpret_cast<uint32_t*>(pDestination)+2, v, 2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Clamp to >=0
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ // Any numbers that are too big, set to 0xFFFFFFFFU
+ XMVECTOR vOverflow = _mm_cmpgt_ps(vResult,g_XMMaxUInt);
+ XMVECTOR vValue = g_XMUnsignedFix;
+ // Too large for a signed integer?
+ XMVECTOR vMask = _mm_cmpge_ps(vResult,vValue);
+ // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise
+ vValue = _mm_and_ps(vValue,vMask);
+ // Perform fixup only on numbers too large (Keeps low bit precision)
+ vResult = _mm_sub_ps(vResult,vValue);
+ __m128i vResulti = _mm_cvttps_epi32(vResult);
+ // Convert from signed to unsigned pnly if greater than 0x80000000
+ vMask = _mm_and_ps(vMask,g_XMNegativeZero);
+ vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti),vMask);
+ // On those that are too large, set to 0xFFFFFFFF
+ vResult = _mm_or_ps(vResult,vOverflow);
+ // Write 3 uints
+ XMVECTOR T1 = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR T2 = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(2,2,2,2));
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination->x), vResult );
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination->y), T1 );
+ _mm_store_ss( reinterpret_cast<float*>(&pDestination->z), T2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreInt4
+(
+ uint32_t* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination[0] = V.vector4_u32[0];
+ pDestination[1] = V.vector4_u32[1];
+ pDestination[2] = V.vector4_u32[2];
+ pDestination[3] = V.vector4_u32[3];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_u32( pDestination, V );
+#elif defined(_XM_SSE_INTRINSICS_)
+ _mm_storeu_si128( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreInt4A
+(
+ uint32_t* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+ assert(((uintptr_t)pDestination & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination[0] = V.vector4_u32[0];
+ pDestination[1] = V.vector4_u32[1];
+ pDestination[2] = V.vector4_u32[2];
+ pDestination[3] = V.vector4_u32[3];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_u32_ex( pDestination, V, 128 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ _mm_store_si128( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(V) );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreFloat4
+(
+ XMFLOAT4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination->x = V.vector4_f32[0];
+ pDestination->y = V.vector4_f32[1];
+ pDestination->z = V.vector4_f32[2];
+ pDestination->w = V.vector4_f32[3];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_f32( reinterpret_cast<float*>(pDestination), V );
+#elif defined(_XM_SSE_INTRINSICS_)
+ _mm_storeu_ps( &pDestination->x, V );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreFloat4A
+(
+ XMFLOAT4A* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+ assert(((uintptr_t)pDestination & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination->x = V.vector4_f32[0];
+ pDestination->y = V.vector4_f32[1];
+ pDestination->z = V.vector4_f32[2];
+ pDestination->w = V.vector4_f32[3];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_f32_ex( reinterpret_cast<float*>(pDestination), V, 128 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ _mm_store_ps( &pDestination->x, V );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreSInt4
+(
+ XMINT4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination->x = (int32_t)V.vector4_f32[0];
+ pDestination->y = (int32_t)V.vector4_f32[1];
+ pDestination->z = (int32_t)V.vector4_f32[2];
+ pDestination->w = (int32_t)V.vector4_f32[3];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 v = vcvtq_s32_f32(V);
+ vst1q_s32( reinterpret_cast<int32_t*>(pDestination), v );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // In case of positive overflow, detect it
+ XMVECTOR vOverflow = _mm_cmpgt_ps(V,g_XMMaxInt);
+ // Float to int conversion
+ __m128i vResulti = _mm_cvttps_epi32(V);
+ // If there was positive overflow, set to 0x7FFFFFFF
+ XMVECTOR vResult = _mm_and_ps(vOverflow,g_XMAbsMask);
+ vOverflow = _mm_andnot_ps(vOverflow,_mm_castsi128_ps(vResulti));
+ vOverflow = _mm_or_ps(vOverflow,vResult);
+ _mm_storeu_si128( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(vOverflow) );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreUInt4
+(
+ XMUINT4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+ pDestination->x = (uint32_t)V.vector4_f32[0];
+ pDestination->y = (uint32_t)V.vector4_f32[1];
+ pDestination->z = (uint32_t)V.vector4_f32[2];
+ pDestination->w = (uint32_t)V.vector4_f32[3];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 v = vcvtq_u32_f32(V);
+ vst1q_u32( reinterpret_cast<uint32_t*>(pDestination), v );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Clamp to >=0
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ // Any numbers that are too big, set to 0xFFFFFFFFU
+ XMVECTOR vOverflow = _mm_cmpgt_ps(vResult,g_XMMaxUInt);
+ XMVECTOR vValue = g_XMUnsignedFix;
+ // Too large for a signed integer?
+ XMVECTOR vMask = _mm_cmpge_ps(vResult,vValue);
+ // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise
+ vValue = _mm_and_ps(vValue,vMask);
+ // Perform fixup only on numbers too large (Keeps low bit precision)
+ vResult = _mm_sub_ps(vResult,vValue);
+ __m128i vResulti = _mm_cvttps_epi32(vResult);
+ // Convert from signed to unsigned pnly if greater than 0x80000000
+ vMask = _mm_and_ps(vMask,g_XMNegativeZero);
+ vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti),vMask);
+ // On those that are too large, set to 0xFFFFFFFF
+ vResult = _mm_or_ps(vResult,vOverflow);
+ _mm_storeu_si128( reinterpret_cast<__m128i*>(pDestination), _mm_castps_si128(vResult) );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreFloat3x3
+(
+ XMFLOAT3X3* pDestination,
+ CXMMATRIX M
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+
+ pDestination->m[0][0] = M.r[0].vector4_f32[0];
+ pDestination->m[0][1] = M.r[0].vector4_f32[1];
+ pDestination->m[0][2] = M.r[0].vector4_f32[2];
+
+ pDestination->m[1][0] = M.r[1].vector4_f32[0];
+ pDestination->m[1][1] = M.r[1].vector4_f32[1];
+ pDestination->m[1][2] = M.r[1].vector4_f32[2];
+
+ pDestination->m[2][0] = M.r[2].vector4_f32[0];
+ pDestination->m[2][1] = M.r[2].vector4_f32[1];
+ pDestination->m[2][2] = M.r[2].vector4_f32[2];
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 T1 = vextq_f32( M.r[0], M.r[1], 1 );
+ __n128 T2 = vbslq_f32( g_XMMask3, M.r[0], T1 );
+ vst1q_f32( &pDestination->m[0][0], T2 );
+
+ T1 = vextq_f32( M.r[1], M.r[1], 1 );
+ T2 = vcombine_f32( vget_low_f32(T1), vget_low_f32(M.r[2]) );
+ vst1q_f32( &pDestination->m[1][1], T2 );
+
+ vst1q_lane_f32( &pDestination->m[2][2], M.r[2], 2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp1 = M.r[0];
+ XMVECTOR vTemp2 = M.r[1];
+ XMVECTOR vTemp3 = M.r[2];
+ XMVECTOR vWork = _mm_shuffle_ps(vTemp1,vTemp2,_MM_SHUFFLE(0,0,2,2));
+ vTemp1 = _mm_shuffle_ps(vTemp1,vWork,_MM_SHUFFLE(2,0,1,0));
+ _mm_storeu_ps(&pDestination->m[0][0],vTemp1);
+ vTemp2 = _mm_shuffle_ps(vTemp2,vTemp3,_MM_SHUFFLE(1,0,2,1));
+ _mm_storeu_ps(&pDestination->m[1][1],vTemp2);
+ vTemp3 = XM_PERMUTE_PS(vTemp3,_MM_SHUFFLE(2,2,2,2));
+ _mm_store_ss(&pDestination->m[2][2],vTemp3);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreFloat4x3
+(
+ XMFLOAT4X3* pDestination,
+ CXMMATRIX M
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+
+ pDestination->m[0][0] = M.r[0].vector4_f32[0];
+ pDestination->m[0][1] = M.r[0].vector4_f32[1];
+ pDestination->m[0][2] = M.r[0].vector4_f32[2];
+
+ pDestination->m[1][0] = M.r[1].vector4_f32[0];
+ pDestination->m[1][1] = M.r[1].vector4_f32[1];
+ pDestination->m[1][2] = M.r[1].vector4_f32[2];
+
+ pDestination->m[2][0] = M.r[2].vector4_f32[0];
+ pDestination->m[2][1] = M.r[2].vector4_f32[1];
+ pDestination->m[2][2] = M.r[2].vector4_f32[2];
+
+ pDestination->m[3][0] = M.r[3].vector4_f32[0];
+ pDestination->m[3][1] = M.r[3].vector4_f32[1];
+ pDestination->m[3][2] = M.r[3].vector4_f32[2];
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 T1 = vextq_f32( M.r[0], M.r[1], 1 );
+ __n128 T2 = vbslq_f32( g_XMMask3, M.r[0], T1 );
+ vst1q_f32( &pDestination->m[0][0], T2 );
+
+ T1 = vextq_f32( M.r[1], M.r[1], 1 );
+ T2 = vcombine_f32( vget_low_f32(T1), vget_low_f32(M.r[2]) );
+ vst1q_f32( &pDestination->m[1][1], T2 );
+
+ T1 = vdupq_lane_f32( vget_high_f32( M.r[2] ), 0 );
+ T2 = vextq_f32( T1, M.r[3], 3 );
+ vst1q_f32( &pDestination->m[2][2], T2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp1 = M.r[0];
+ XMVECTOR vTemp2 = M.r[1];
+ XMVECTOR vTemp3 = M.r[2];
+ XMVECTOR vTemp4 = M.r[3];
+ XMVECTOR vTemp2x = _mm_shuffle_ps(vTemp2,vTemp3,_MM_SHUFFLE(1,0,2,1));
+ vTemp2 = _mm_shuffle_ps(vTemp2,vTemp1,_MM_SHUFFLE(2,2,0,0));
+ vTemp1 = _mm_shuffle_ps(vTemp1,vTemp2,_MM_SHUFFLE(0,2,1,0));
+ vTemp3 = _mm_shuffle_ps(vTemp3,vTemp4,_MM_SHUFFLE(0,0,2,2));
+ vTemp3 = _mm_shuffle_ps(vTemp3,vTemp4,_MM_SHUFFLE(2,1,2,0));
+ _mm_storeu_ps(&pDestination->m[0][0],vTemp1);
+ _mm_storeu_ps(&pDestination->m[1][1],vTemp2x);
+ _mm_storeu_ps(&pDestination->m[2][2],vTemp3);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreFloat4x3A
+(
+ XMFLOAT4X3A* pDestination,
+ CXMMATRIX M
+)
+{
+ assert(pDestination);
+ assert(((uintptr_t)pDestination & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+
+ pDestination->m[0][0] = M.r[0].vector4_f32[0];
+ pDestination->m[0][1] = M.r[0].vector4_f32[1];
+ pDestination->m[0][2] = M.r[0].vector4_f32[2];
+
+ pDestination->m[1][0] = M.r[1].vector4_f32[0];
+ pDestination->m[1][1] = M.r[1].vector4_f32[1];
+ pDestination->m[1][2] = M.r[1].vector4_f32[2];
+
+ pDestination->m[2][0] = M.r[2].vector4_f32[0];
+ pDestination->m[2][1] = M.r[2].vector4_f32[1];
+ pDestination->m[2][2] = M.r[2].vector4_f32[2];
+
+ pDestination->m[3][0] = M.r[3].vector4_f32[0];
+ pDestination->m[3][1] = M.r[3].vector4_f32[1];
+ pDestination->m[3][2] = M.r[3].vector4_f32[2];
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 T1 = vextq_f32( M.r[0], M.r[1], 1 );
+ __n128 T2 = vbslq_f32( g_XMMask3, M.r[0], T1 );
+ vst1q_f32_ex( &pDestination->m[0][0], T2, 128 );
+
+ T1 = vextq_f32( M.r[1], M.r[1], 1 );
+ T2 = vcombine_f32( vget_low_f32(T1), vget_low_f32(M.r[2]) );
+ vst1q_f32_ex( &pDestination->m[1][1], T2, 128 );
+
+ T1 = vdupq_lane_f32( vget_high_f32( M.r[2] ), 0 );
+ T2 = vextq_f32( T1, M.r[3], 3 );
+ vst1q_f32_ex( &pDestination->m[2][2], T2, 128 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // x1,y1,z1,w1
+ XMVECTOR vTemp1 = M.r[0];
+ // x2,y2,z2,w2
+ XMVECTOR vTemp2 = M.r[1];
+ // x3,y3,z3,w3
+ XMVECTOR vTemp3 = M.r[2];
+ // x4,y4,z4,w4
+ XMVECTOR vTemp4 = M.r[3];
+ // z1,z1,x2,y2
+ XMVECTOR vTemp = _mm_shuffle_ps(vTemp1,vTemp2,_MM_SHUFFLE(1,0,2,2));
+ // y2,z2,x3,y3 (Final)
+ vTemp2 = _mm_shuffle_ps(vTemp2,vTemp3,_MM_SHUFFLE(1,0,2,1));
+ // x1,y1,z1,x2 (Final)
+ vTemp1 = _mm_shuffle_ps(vTemp1,vTemp,_MM_SHUFFLE(2,0,1,0));
+ // z3,z3,x4,x4
+ vTemp3 = _mm_shuffle_ps(vTemp3,vTemp4,_MM_SHUFFLE(0,0,2,2));
+ // z3,x4,y4,z4 (Final)
+ vTemp3 = _mm_shuffle_ps(vTemp3,vTemp4,_MM_SHUFFLE(2,1,2,0));
+ // Store in 3 operations
+ _mm_store_ps(&pDestination->m[0][0],vTemp1);
+ _mm_store_ps(&pDestination->m[1][1],vTemp2);
+ _mm_store_ps(&pDestination->m[2][2],vTemp3);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreFloat4x4
+(
+ XMFLOAT4X4* pDestination,
+ CXMMATRIX M
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+
+ pDestination->m[0][0] = M.r[0].vector4_f32[0];
+ pDestination->m[0][1] = M.r[0].vector4_f32[1];
+ pDestination->m[0][2] = M.r[0].vector4_f32[2];
+ pDestination->m[0][3] = M.r[0].vector4_f32[3];
+
+ pDestination->m[1][0] = M.r[1].vector4_f32[0];
+ pDestination->m[1][1] = M.r[1].vector4_f32[1];
+ pDestination->m[1][2] = M.r[1].vector4_f32[2];
+ pDestination->m[1][3] = M.r[1].vector4_f32[3];
+
+ pDestination->m[2][0] = M.r[2].vector4_f32[0];
+ pDestination->m[2][1] = M.r[2].vector4_f32[1];
+ pDestination->m[2][2] = M.r[2].vector4_f32[2];
+ pDestination->m[2][3] = M.r[2].vector4_f32[3];
+
+ pDestination->m[3][0] = M.r[3].vector4_f32[0];
+ pDestination->m[3][1] = M.r[3].vector4_f32[1];
+ pDestination->m[3][2] = M.r[3].vector4_f32[2];
+ pDestination->m[3][3] = M.r[3].vector4_f32[3];
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_f32( reinterpret_cast<float*>(&pDestination->_11), M.r[0] );
+ vst1q_f32( reinterpret_cast<float*>(&pDestination->_21), M.r[1] );
+ vst1q_f32( reinterpret_cast<float*>(&pDestination->_31), M.r[2] );
+ vst1q_f32( reinterpret_cast<float*>(&pDestination->_41), M.r[3] );
+#elif defined(_XM_SSE_INTRINSICS_)
+ _mm_storeu_ps( &pDestination->_11, M.r[0] );
+ _mm_storeu_ps( &pDestination->_21, M.r[1] );
+ _mm_storeu_ps( &pDestination->_31, M.r[2] );
+ _mm_storeu_ps( &pDestination->_41, M.r[3] );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMStoreFloat4x4A
+(
+ XMFLOAT4X4A* pDestination,
+ CXMMATRIX M
+)
+{
+ assert(pDestination);
+ assert(((uintptr_t)pDestination & 0xF) == 0);
+#if defined(_XM_NO_INTRINSICS_)
+
+ pDestination->m[0][0] = M.r[0].vector4_f32[0];
+ pDestination->m[0][1] = M.r[0].vector4_f32[1];
+ pDestination->m[0][2] = M.r[0].vector4_f32[2];
+ pDestination->m[0][3] = M.r[0].vector4_f32[3];
+
+ pDestination->m[1][0] = M.r[1].vector4_f32[0];
+ pDestination->m[1][1] = M.r[1].vector4_f32[1];
+ pDestination->m[1][2] = M.r[1].vector4_f32[2];
+ pDestination->m[1][3] = M.r[1].vector4_f32[3];
+
+ pDestination->m[2][0] = M.r[2].vector4_f32[0];
+ pDestination->m[2][1] = M.r[2].vector4_f32[1];
+ pDestination->m[2][2] = M.r[2].vector4_f32[2];
+ pDestination->m[2][3] = M.r[2].vector4_f32[3];
+
+ pDestination->m[3][0] = M.r[3].vector4_f32[0];
+ pDestination->m[3][1] = M.r[3].vector4_f32[1];
+ pDestination->m[3][2] = M.r[3].vector4_f32[2];
+ pDestination->m[3][3] = M.r[3].vector4_f32[3];
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_f32_ex( reinterpret_cast<float*>(&pDestination->_11), M.r[0], 128 );
+ vst1q_f32_ex( reinterpret_cast<float*>(&pDestination->_21), M.r[1], 128 );
+ vst1q_f32_ex( reinterpret_cast<float*>(&pDestination->_31), M.r[2], 128 );
+ vst1q_f32_ex( reinterpret_cast<float*>(&pDestination->_41), M.r[3], 128 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ _mm_store_ps( &pDestination->_11, M.r[0] );
+ _mm_store_ps( &pDestination->_21, M.r[1] );
+ _mm_store_ps( &pDestination->_31, M.r[2] );
+ _mm_store_ps( &pDestination->_41, M.r[3] );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMatrix.inl b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMatrix.inl
new file mode 100644
index 00000000..d665d333
--- /dev/null
+++ b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMatrix.inl
@@ -0,0 +1,3414 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathMatrix.inl -- SIMD C++ Math library
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//-------------------------------------------------------------------------------------
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+/****************************************************************************
+ *
+ * Matrix
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+// Return true if any entry in the matrix is NaN
+inline bool XMMatrixIsNaN
+(
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ size_t i = 16;
+ const uint32_t *pWork = (const uint32_t *)(&M.m[0][0]);
+ do {
+ // Fetch value into integer unit
+ uint32_t uTest = pWork[0];
+ // Remove sign
+ uTest &= 0x7FFFFFFFU;
+ // NaN is 0x7F800001 through 0x7FFFFFFF inclusive
+ uTest -= 0x7F800001U;
+ if (uTest<0x007FFFFFU) {
+ break; // NaN found
+ }
+ ++pWork; // Next entry
+ } while (--i);
+ return (i!=0); // i == 0 if nothing matched
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Load in registers
+ XMVECTOR vX = M.r[0];
+ XMVECTOR vY = M.r[1];
+ XMVECTOR vZ = M.r[2];
+ XMVECTOR vW = M.r[3];
+ // Test themselves to check for NaN
+ vX = vmvnq_u32(vceqq_f32(vX, vX));
+ vY = vmvnq_u32(vceqq_f32(vY, vY));
+ vZ = vmvnq_u32(vceqq_f32(vZ, vZ));
+ vW = vmvnq_u32(vceqq_f32(vW, vW));
+ // Or all the results
+ vX = vorrq_u32(vX,vZ);
+ vY = vorrq_u32(vY,vW);
+ vX = vorrq_u32(vX,vY);
+ // If any tested true, return true
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vX), vget_high_u8(vX));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+ return (r != 0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Load in registers
+ XMVECTOR vX = M.r[0];
+ XMVECTOR vY = M.r[1];
+ XMVECTOR vZ = M.r[2];
+ XMVECTOR vW = M.r[3];
+ // Test themselves to check for NaN
+ vX = _mm_cmpneq_ps(vX,vX);
+ vY = _mm_cmpneq_ps(vY,vY);
+ vZ = _mm_cmpneq_ps(vZ,vZ);
+ vW = _mm_cmpneq_ps(vW,vW);
+ // Or all the results
+ vX = _mm_or_ps(vX,vZ);
+ vY = _mm_or_ps(vY,vW);
+ vX = _mm_or_ps(vX,vY);
+ // If any tested true, return true
+ return (_mm_movemask_ps(vX)!=0);
+#else
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+// Return true if any entry in the matrix is +/-INF
+inline bool XMMatrixIsInfinite
+(
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ size_t i = 16;
+ const uint32_t *pWork = (const uint32_t *)(&M.m[0][0]);
+ do {
+ // Fetch value into integer unit
+ uint32_t uTest = pWork[0];
+ // Remove sign
+ uTest &= 0x7FFFFFFFU;
+ // INF is 0x7F800000
+ if (uTest==0x7F800000U) {
+ break; // INF found
+ }
+ ++pWork; // Next entry
+ } while (--i);
+ return (i!=0); // i == 0 if nothing matched
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Mask off the sign bits
+ XMVECTOR vTemp1 = vandq_u32(M.r[0],g_XMAbsMask);
+ XMVECTOR vTemp2 = vandq_u32(M.r[1],g_XMAbsMask);
+ XMVECTOR vTemp3 = vandq_u32(M.r[2],g_XMAbsMask);
+ XMVECTOR vTemp4 = vandq_u32(M.r[3],g_XMAbsMask);
+ // Compare to infinity
+ vTemp1 = vceqq_f32(vTemp1,g_XMInfinity);
+ vTemp2 = vceqq_f32(vTemp2,g_XMInfinity);
+ vTemp3 = vceqq_f32(vTemp3,g_XMInfinity);
+ vTemp4 = vceqq_f32(vTemp4,g_XMInfinity);
+ // Or the answers together
+ vTemp1 = vorrq_u32(vTemp1,vTemp2);
+ vTemp3 = vorrq_u32(vTemp3,vTemp4);
+ vTemp1 = vorrq_u32(vTemp1,vTemp3);
+ // If any are infinity, the signs are true.
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+ return (r != 0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Mask off the sign bits
+ XMVECTOR vTemp1 = _mm_and_ps(M.r[0],g_XMAbsMask);
+ XMVECTOR vTemp2 = _mm_and_ps(M.r[1],g_XMAbsMask);
+ XMVECTOR vTemp3 = _mm_and_ps(M.r[2],g_XMAbsMask);
+ XMVECTOR vTemp4 = _mm_and_ps(M.r[3],g_XMAbsMask);
+ // Compare to infinity
+ vTemp1 = _mm_cmpeq_ps(vTemp1,g_XMInfinity);
+ vTemp2 = _mm_cmpeq_ps(vTemp2,g_XMInfinity);
+ vTemp3 = _mm_cmpeq_ps(vTemp3,g_XMInfinity);
+ vTemp4 = _mm_cmpeq_ps(vTemp4,g_XMInfinity);
+ // Or the answers together
+ vTemp1 = _mm_or_ps(vTemp1,vTemp2);
+ vTemp3 = _mm_or_ps(vTemp3,vTemp4);
+ vTemp1 = _mm_or_ps(vTemp1,vTemp3);
+ // If any are infinity, the signs are true.
+ return (_mm_movemask_ps(vTemp1)!=0);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Return true if the XMMatrix is equal to identity
+inline bool XMMatrixIsIdentity
+(
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ // Use the integer pipeline to reduce branching to a minimum
+ const uint32_t *pWork = (const uint32_t*)(&M.m[0][0]);
+ // Convert 1.0f to zero and or them together
+ uint32_t uOne = pWork[0]^0x3F800000U;
+ // Or all the 0.0f entries together
+ uint32_t uZero = pWork[1];
+ uZero |= pWork[2];
+ uZero |= pWork[3];
+ // 2nd row
+ uZero |= pWork[4];
+ uOne |= pWork[5]^0x3F800000U;
+ uZero |= pWork[6];
+ uZero |= pWork[7];
+ // 3rd row
+ uZero |= pWork[8];
+ uZero |= pWork[9];
+ uOne |= pWork[10]^0x3F800000U;
+ uZero |= pWork[11];
+ // 4th row
+ uZero |= pWork[12];
+ uZero |= pWork[13];
+ uZero |= pWork[14];
+ uOne |= pWork[15]^0x3F800000U;
+ // If all zero entries are zero, the uZero==0
+ uZero &= 0x7FFFFFFF; // Allow -0.0f
+ // If all 1.0f entries are 1.0f, then uOne==0
+ uOne |= uZero;
+ return (uOne==0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR vTemp1 = vceqq_f32(M.r[0],g_XMIdentityR0);
+ XMVECTOR vTemp2 = vceqq_f32(M.r[1],g_XMIdentityR1);
+ XMVECTOR vTemp3 = vceqq_f32(M.r[2],g_XMIdentityR2);
+ XMVECTOR vTemp4 = vceqq_f32(M.r[3],g_XMIdentityR3);
+ vTemp1 = vandq_u32(vTemp1,vTemp2);
+ vTemp3 = vandq_u32(vTemp3,vTemp4);
+ vTemp1 = vandq_u32(vTemp1,vTemp3);
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+ return ( r == 0xFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp1 = _mm_cmpeq_ps(M.r[0],g_XMIdentityR0);
+ XMVECTOR vTemp2 = _mm_cmpeq_ps(M.r[1],g_XMIdentityR1);
+ XMVECTOR vTemp3 = _mm_cmpeq_ps(M.r[2],g_XMIdentityR2);
+ XMVECTOR vTemp4 = _mm_cmpeq_ps(M.r[3],g_XMIdentityR3);
+ vTemp1 = _mm_and_ps(vTemp1,vTemp2);
+ vTemp3 = _mm_and_ps(vTemp3,vTemp4);
+ vTemp1 = _mm_and_ps(vTemp1,vTemp3);
+ return (_mm_movemask_ps(vTemp1)==0x0f);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+// Perform a 4x4 matrix multiply by a 4x4 matrix
+inline XMMATRIX XMMatrixMultiply
+(
+ CXMMATRIX M1,
+ CXMMATRIX M2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMMATRIX mResult;
+ // Cache the invariants in registers
+ float x = M1.m[0][0];
+ float y = M1.m[0][1];
+ float z = M1.m[0][2];
+ float w = M1.m[0][3];
+ // Perform the operation on the first row
+ mResult.m[0][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
+ mResult.m[0][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
+ mResult.m[0][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
+ mResult.m[0][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
+ // Repeat for all the other rows
+ x = M1.m[1][0];
+ y = M1.m[1][1];
+ z = M1.m[1][2];
+ w = M1.m[1][3];
+ mResult.m[1][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
+ mResult.m[1][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
+ mResult.m[1][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
+ mResult.m[1][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
+ x = M1.m[2][0];
+ y = M1.m[2][1];
+ z = M1.m[2][2];
+ w = M1.m[2][3];
+ mResult.m[2][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
+ mResult.m[2][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
+ mResult.m[2][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
+ mResult.m[2][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
+ x = M1.m[3][0];
+ y = M1.m[3][1];
+ z = M1.m[3][2];
+ w = M1.m[3][3];
+ mResult.m[3][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
+ mResult.m[3][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
+ mResult.m[3][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
+ mResult.m[3][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
+ return mResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMMATRIX mResult;
+ __n64 VL = vget_low_f32( M1.r[0] );
+ __n64 VH = vget_high_f32( M1.r[0] );
+ // Splat the component X,Y,Z then W
+ XMVECTOR vX = vdupq_lane_f32(VL, 0);
+ XMVECTOR vY = vdupq_lane_f32(VL, 1);
+ XMVECTOR vZ = vdupq_lane_f32(VH, 0);
+ XMVECTOR vW = vdupq_lane_f32(VH, 1);
+ // Perform the operation on the first row
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ mResult.r[0] = vaddq_f32( vZ, vW );
+ // Repeat for the other 3 rows
+ VL = vget_low_f32( M1.r[1] );
+ VH = vget_high_f32( M1.r[1] );
+ vX = vdupq_lane_f32(VL, 0);
+ vY = vdupq_lane_f32(VL, 1);
+ vZ = vdupq_lane_f32(VH, 0);
+ vW = vdupq_lane_f32(VH, 1);
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ mResult.r[1] = vaddq_f32( vZ, vW );
+ VL = vget_low_f32( M1.r[2] );
+ VH = vget_high_f32( M1.r[2] );
+ vX = vdupq_lane_f32(VL, 0);
+ vY = vdupq_lane_f32(VL, 1);
+ vZ = vdupq_lane_f32(VH, 0);
+ vW = vdupq_lane_f32(VH, 1);
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ mResult.r[2] = vaddq_f32( vZ, vW );
+ VL = vget_low_f32( M1.r[3] );
+ VH = vget_high_f32( M1.r[3] );
+ vX = vdupq_lane_f32(VL, 0);
+ vY = vdupq_lane_f32(VL, 1);
+ vZ = vdupq_lane_f32(VH, 0);
+ vW = vdupq_lane_f32(VH, 1);
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ mResult.r[3] = vaddq_f32( vZ, vW );
+ return mResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX mResult;
+ // Use vW to hold the original row
+ XMVECTOR vW = M1.r[0];
+ // Splat the component X,Y,Z then W
+ XMVECTOR vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ XMVECTOR vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ // Perform the operation on the first row
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ // Perform a binary add to reduce cumulative errors
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ mResult.r[0] = vX;
+ // Repeat for the other 3 rows
+ vW = M1.r[1];
+ vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ mResult.r[1] = vX;
+ vW = M1.r[2];
+ vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ mResult.r[2] = vX;
+ vW = M1.r[3];
+ vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ mResult.r[3] = vX;
+ return mResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixMultiplyTranspose
+(
+ CXMMATRIX M1,
+ CXMMATRIX M2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMMATRIX mResult;
+ // Cache the invariants in registers
+ float x = M2.m[0][0];
+ float y = M2.m[1][0];
+ float z = M2.m[2][0];
+ float w = M2.m[3][0];
+ // Perform the operation on the first row
+ mResult.m[0][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
+ mResult.m[0][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
+ mResult.m[0][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
+ mResult.m[0][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
+ // Repeat for all the other rows
+ x = M2.m[0][1];
+ y = M2.m[1][1];
+ z = M2.m[2][1];
+ w = M2.m[3][1];
+ mResult.m[1][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
+ mResult.m[1][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
+ mResult.m[1][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
+ mResult.m[1][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
+ x = M2.m[0][2];
+ y = M2.m[1][2];
+ z = M2.m[2][2];
+ w = M2.m[3][2];
+ mResult.m[2][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
+ mResult.m[2][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
+ mResult.m[2][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
+ mResult.m[2][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
+ x = M2.m[0][3];
+ y = M2.m[1][3];
+ z = M2.m[2][3];
+ w = M2.m[3][3];
+ mResult.m[3][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
+ mResult.m[3][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
+ mResult.m[3][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
+ mResult.m[3][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
+ return mResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32( M1.r[0] );
+ __n64 VH = vget_high_f32( M1.r[0] );
+ // Splat the component X,Y,Z then W
+ XMVECTOR vX = vdupq_lane_f32(VL, 0);
+ XMVECTOR vY = vdupq_lane_f32(VL, 1);
+ XMVECTOR vZ = vdupq_lane_f32(VH, 0);
+ XMVECTOR vW = vdupq_lane_f32(VH, 1);
+ // Perform the operation on the first row
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ __n128 r0 = vaddq_f32( vZ, vW );
+ // Repeat for the other 3 rows
+ VL = vget_low_f32( M1.r[1] );
+ VH = vget_high_f32( M1.r[1] );
+ vX = vdupq_lane_f32(VL, 0);
+ vY = vdupq_lane_f32(VL, 1);
+ vZ = vdupq_lane_f32(VH, 0);
+ vW = vdupq_lane_f32(VH, 1);
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ __n128 r1 = vaddq_f32( vZ, vW );
+ VL = vget_low_f32( M1.r[2] );
+ VH = vget_high_f32( M1.r[2] );
+ vX = vdupq_lane_f32(VL, 0);
+ vY = vdupq_lane_f32(VL, 1);
+ vZ = vdupq_lane_f32(VH, 0);
+ vW = vdupq_lane_f32(VH, 1);
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ __n128 r2 = vaddq_f32( vZ, vW );
+ VL = vget_low_f32( M1.r[3] );
+ VH = vget_high_f32( M1.r[3] );
+ vX = vdupq_lane_f32(VL, 0);
+ vY = vdupq_lane_f32(VL, 1);
+ vZ = vdupq_lane_f32(VH, 0);
+ vW = vdupq_lane_f32(VH, 1);
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ __n128 r3 = vaddq_f32( vZ, vW );
+
+ // Transpose result
+ float32x4x2_t P0 = vzipq_f32( r0, r2 );
+ float32x4x2_t P1 = vzipq_f32( r1, r3 );
+
+ float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
+ float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
+
+ XMMATRIX mResult;
+ mResult.r[0] = T0.val[0];
+ mResult.r[1] = T0.val[1];
+ mResult.r[2] = T1.val[0];
+ mResult.r[3] = T1.val[1];
+ return mResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Use vW to hold the original row
+ XMVECTOR vW = M1.r[0];
+ // Splat the component X,Y,Z then W
+ XMVECTOR vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ XMVECTOR vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ // Perform the operation on the first row
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ // Perform a binary add to reduce cumulative errors
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ __m128 r0 = vX;
+ // Repeat for the other 3 rows
+ vW = M1.r[1];
+ vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ __m128 r1 = vX;
+ vW = M1.r[2];
+ vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ __m128 r2 = vX;
+ vW = M1.r[3];
+ vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ __m128 r3 = vX;
+
+ // x.x,x.y,y.x,y.y
+ XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
+ // x.z,x.w,y.z,y.w
+ XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
+ // z.x,z.y,w.x,w.y
+ XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
+ // z.z,z.w,w.z,w.w
+ XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
+
+ XMMATRIX mResult;
+ // x.x,y.x,z.x,w.x
+ mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
+ // x.y,y.y,z.y,w.y
+ mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
+ // x.z,y.z,z.z,w.z
+ mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
+ // x.w,y.w,z.w,w.w
+ mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
+ return mResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixTranspose
+(
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ // Original matrix:
+ //
+ // m00m01m02m03
+ // m10m11m12m13
+ // m20m21m22m23
+ // m30m31m32m33
+
+ XMMATRIX P;
+ P.r[0] = XMVectorMergeXY(M.r[0], M.r[2]); // m00m20m01m21
+ P.r[1] = XMVectorMergeXY(M.r[1], M.r[3]); // m10m30m11m31
+ P.r[2] = XMVectorMergeZW(M.r[0], M.r[2]); // m02m22m03m23
+ P.r[3] = XMVectorMergeZW(M.r[1], M.r[3]); // m12m32m13m33
+
+ XMMATRIX MT;
+ MT.r[0] = XMVectorMergeXY(P.r[0], P.r[1]); // m00m10m20m30
+ MT.r[1] = XMVectorMergeZW(P.r[0], P.r[1]); // m01m11m21m31
+ MT.r[2] = XMVectorMergeXY(P.r[2], P.r[3]); // m02m12m22m32
+ MT.r[3] = XMVectorMergeZW(P.r[2], P.r[3]); // m03m13m23m33
+ return MT;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
+ float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
+
+ float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
+ float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
+
+ XMMATRIX mResult;
+ mResult.r[0] = T0.val[0];
+ mResult.r[1] = T0.val[1];
+ mResult.r[2] = T1.val[0];
+ mResult.r[3] = T1.val[1];
+ return mResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // x.x,x.y,y.x,y.y
+ XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0],M.r[1],_MM_SHUFFLE(1,0,1,0));
+ // x.z,x.w,y.z,y.w
+ XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0],M.r[1],_MM_SHUFFLE(3,2,3,2));
+ // z.x,z.y,w.x,w.y
+ XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2],M.r[3],_MM_SHUFFLE(1,0,1,0));
+ // z.z,z.w,w.z,w.w
+ XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2],M.r[3],_MM_SHUFFLE(3,2,3,2));
+ XMMATRIX mResult;
+
+ // x.x,y.x,z.x,w.x
+ mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
+ // x.y,y.y,z.y,w.y
+ mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
+ // x.z,y.z,z.z,w.z
+ mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
+ // x.w,y.w,z.w,w.w
+ mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
+ return mResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Return the inverse and the determinant of a 4x4 matrix
+_Use_decl_annotations_
+inline XMMATRIX XMMatrixInverse
+(
+ XMVECTOR* pDeterminant,
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMMATRIX MT = XMMatrixTranspose(M);
+
+ XMVECTOR V0[4], V1[4];
+ V0[0] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[2]);
+ V1[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[3]);
+ V0[1] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[0]);
+ V1[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[1]);
+ V0[2] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z>(MT.r[2], MT.r[0]);
+ V1[2] = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W>(MT.r[3], MT.r[1]);
+
+ XMVECTOR D0 = XMVectorMultiply(V0[0], V1[0]);
+ XMVECTOR D1 = XMVectorMultiply(V0[1], V1[1]);
+ XMVECTOR D2 = XMVectorMultiply(V0[2], V1[2]);
+
+ V0[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[2]);
+ V1[0] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[3]);
+ V0[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[0]);
+ V1[1] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[1]);
+ V0[2] = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W>(MT.r[2], MT.r[0]);
+ V1[2] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z>(MT.r[3], MT.r[1]);
+
+ D0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], D0);
+ D1 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], D1);
+ D2 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], D2);
+
+ V0[0] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y>(MT.r[1]);
+ V1[0] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X>(D0, D2);
+ V0[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_X>(MT.r[0]);
+ V1[1] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0Z>(D0, D2);
+ V0[2] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y>(MT.r[3]);
+ V1[2] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X>(D1, D2);
+ V0[3] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_X>(MT.r[2]);
+ V1[3] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0Z>(D1, D2);
+
+ XMVECTOR C0 = XMVectorMultiply(V0[0], V1[0]);
+ XMVECTOR C2 = XMVectorMultiply(V0[1], V1[1]);
+ XMVECTOR C4 = XMVectorMultiply(V0[2], V1[2]);
+ XMVECTOR C6 = XMVectorMultiply(V0[3], V1[3]);
+
+ V0[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y, XM_SWIZZLE_Z>(MT.r[1]);
+ V1[0] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1X>(D0, D2);
+ V0[1] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[0]);
+ V1[1] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0X>(D0, D2);
+ V0[2] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y, XM_SWIZZLE_Z>(MT.r[3]);
+ V1[2] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1Z>(D1, D2);
+ V0[3] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[2]);
+ V1[3] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(D1, D2);
+
+ C0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
+ C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
+ C4 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
+ C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
+
+ V0[0] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[1]);
+ V1[0] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1X, XM_PERMUTE_0Z>(D0, D2);
+ V0[1] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[0]);
+ V1[1] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1X>(D0, D2);
+ V0[2] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[3]);
+ V1[2] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1W, XM_PERMUTE_1Z, XM_PERMUTE_0Z>(D1, D2);
+ V0[3] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[2]);
+ V1[3] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z>(D1, D2);
+
+ XMVECTOR C1 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
+ C0 = XMVectorMultiplyAdd(V0[0], V1[0], C0);
+ XMVECTOR C3 = XMVectorMultiplyAdd(V0[1], V1[1], C2);
+ C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
+ XMVECTOR C5 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
+ C4 = XMVectorMultiplyAdd(V0[2], V1[2], C4);
+ XMVECTOR C7 = XMVectorMultiplyAdd(V0[3], V1[3], C6);
+ C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
+
+ XMMATRIX R;
+ R.r[0] = XMVectorSelect(C0, C1, g_XMSelect0101.v);
+ R.r[1] = XMVectorSelect(C2, C3, g_XMSelect0101.v);
+ R.r[2] = XMVectorSelect(C4, C5, g_XMSelect0101.v);
+ R.r[3] = XMVectorSelect(C6, C7, g_XMSelect0101.v);
+
+ XMVECTOR Determinant = XMVector4Dot(R.r[0], MT.r[0]);
+
+ if (pDeterminant != NULL)
+ *pDeterminant = Determinant;
+
+ XMVECTOR Reciprocal = XMVectorReciprocal(Determinant);
+
+ XMMATRIX Result;
+ Result.r[0] = XMVectorMultiply(R.r[0], Reciprocal);
+ Result.r[1] = XMVectorMultiply(R.r[1], Reciprocal);
+ Result.r[2] = XMVectorMultiply(R.r[2], Reciprocal);
+ Result.r[3] = XMVectorMultiply(R.r[3], Reciprocal);
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX MT = XMMatrixTranspose(M);
+ XMVECTOR V00 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(1,1,0,0));
+ XMVECTOR V10 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(3,2,3,2));
+ XMVECTOR V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(1,1,0,0));
+ XMVECTOR V11 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(3,2,3,2));
+ XMVECTOR V02 = _mm_shuffle_ps(MT.r[2], MT.r[0],_MM_SHUFFLE(2,0,2,0));
+ XMVECTOR V12 = _mm_shuffle_ps(MT.r[3], MT.r[1],_MM_SHUFFLE(3,1,3,1));
+
+ XMVECTOR D0 = _mm_mul_ps(V00,V10);
+ XMVECTOR D1 = _mm_mul_ps(V01,V11);
+ XMVECTOR D2 = _mm_mul_ps(V02,V12);
+
+ V00 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(3,2,3,2));
+ V10 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(1,1,0,0));
+ V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(3,2,3,2));
+ V11 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(1,1,0,0));
+ V02 = _mm_shuffle_ps(MT.r[2],MT.r[0],_MM_SHUFFLE(3,1,3,1));
+ V12 = _mm_shuffle_ps(MT.r[3],MT.r[1],_MM_SHUFFLE(2,0,2,0));
+
+ V00 = _mm_mul_ps(V00,V10);
+ V01 = _mm_mul_ps(V01,V11);
+ V02 = _mm_mul_ps(V02,V12);
+ D0 = _mm_sub_ps(D0,V00);
+ D1 = _mm_sub_ps(D1,V01);
+ D2 = _mm_sub_ps(D2,V02);
+ // V11 = D0Y,D0W,D2Y,D2Y
+ V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,1,3,1));
+ V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(1,0,2,1));
+ V10 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(0,3,0,2));
+ V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(0,1,0,2));
+ V11 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(2,1,2,1));
+ // V13 = D1Y,D1W,D2W,D2W
+ XMVECTOR V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,3,3,1));
+ V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(1,0,2,1));
+ V12 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(0,3,0,2));
+ XMVECTOR V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(0,1,0,2));
+ V13 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(2,1,2,1));
+
+ XMVECTOR C0 = _mm_mul_ps(V00,V10);
+ XMVECTOR C2 = _mm_mul_ps(V01,V11);
+ XMVECTOR C4 = _mm_mul_ps(V02,V12);
+ XMVECTOR C6 = _mm_mul_ps(V03,V13);
+
+ // V11 = D0X,D0Y,D2X,D2X
+ V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(0,0,1,0));
+ V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(2,1,3,2));
+ V10 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(2,1,0,3));
+ V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(1,3,2,3));
+ V11 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(0,2,1,2));
+ // V13 = D1X,D1Y,D2Z,D2Z
+ V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(2,2,1,0));
+ V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(2,1,3,2));
+ V12 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(2,1,0,3));
+ V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(1,3,2,3));
+ V13 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(0,2,1,2));
+
+ V00 = _mm_mul_ps(V00,V10);
+ V01 = _mm_mul_ps(V01,V11);
+ V02 = _mm_mul_ps(V02,V12);
+ V03 = _mm_mul_ps(V03,V13);
+ C0 = _mm_sub_ps(C0,V00);
+ C2 = _mm_sub_ps(C2,V01);
+ C4 = _mm_sub_ps(C4,V02);
+ C6 = _mm_sub_ps(C6,V03);
+
+ V00 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(0,3,0,3));
+ // V10 = D0Z,D0Z,D2X,D2Y
+ V10 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,2,2));
+ V10 = XM_PERMUTE_PS(V10,_MM_SHUFFLE(0,2,3,0));
+ V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(2,0,3,1));
+ // V11 = D0X,D0W,D2X,D2Y
+ V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,3,0));
+ V11 = XM_PERMUTE_PS(V11,_MM_SHUFFLE(2,1,0,3));
+ V02 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(0,3,0,3));
+ // V12 = D1Z,D1Z,D2Z,D2W
+ V12 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,2,2));
+ V12 = XM_PERMUTE_PS(V12,_MM_SHUFFLE(0,2,3,0));
+ V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(2,0,3,1));
+ // V13 = D1X,D1W,D2Z,D2W
+ V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,3,0));
+ V13 = XM_PERMUTE_PS(V13,_MM_SHUFFLE(2,1,0,3));
+
+ V00 = _mm_mul_ps(V00,V10);
+ V01 = _mm_mul_ps(V01,V11);
+ V02 = _mm_mul_ps(V02,V12);
+ V03 = _mm_mul_ps(V03,V13);
+ XMVECTOR C1 = _mm_sub_ps(C0,V00);
+ C0 = _mm_add_ps(C0,V00);
+ XMVECTOR C3 = _mm_add_ps(C2,V01);
+ C2 = _mm_sub_ps(C2,V01);
+ XMVECTOR C5 = _mm_sub_ps(C4,V02);
+ C4 = _mm_add_ps(C4,V02);
+ XMVECTOR C7 = _mm_add_ps(C6,V03);
+ C6 = _mm_sub_ps(C6,V03);
+
+ C0 = _mm_shuffle_ps(C0,C1,_MM_SHUFFLE(3,1,2,0));
+ C2 = _mm_shuffle_ps(C2,C3,_MM_SHUFFLE(3,1,2,0));
+ C4 = _mm_shuffle_ps(C4,C5,_MM_SHUFFLE(3,1,2,0));
+ C6 = _mm_shuffle_ps(C6,C7,_MM_SHUFFLE(3,1,2,0));
+ C0 = XM_PERMUTE_PS(C0,_MM_SHUFFLE(3,1,2,0));
+ C2 = XM_PERMUTE_PS(C2,_MM_SHUFFLE(3,1,2,0));
+ C4 = XM_PERMUTE_PS(C4,_MM_SHUFFLE(3,1,2,0));
+ C6 = XM_PERMUTE_PS(C6,_MM_SHUFFLE(3,1,2,0));
+ // Get the determinate
+ XMVECTOR vTemp = XMVector4Dot(C0,MT.r[0]);
+ if (pDeterminant != NULL)
+ *pDeterminant = vTemp;
+ vTemp = _mm_div_ps(g_XMOne,vTemp);
+ XMMATRIX mResult;
+ mResult.r[0] = _mm_mul_ps(C0,vTemp);
+ mResult.r[1] = _mm_mul_ps(C2,vTemp);
+ mResult.r[2] = _mm_mul_ps(C4,vTemp);
+ mResult.r[3] = _mm_mul_ps(C6,vTemp);
+ return mResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMMatrixDeterminant
+(
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Sign = {1.0f, -1.0f, 1.0f, -1.0f};
+
+ XMVECTOR V0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]);
+ XMVECTOR V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[3]);
+ XMVECTOR V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]);
+ XMVECTOR V3 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[3]);
+ XMVECTOR V4 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[2]);
+ XMVECTOR V5 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[3]);
+
+ XMVECTOR P0 = XMVectorMultiply(V0, V1);
+ XMVECTOR P1 = XMVectorMultiply(V2, V3);
+ XMVECTOR P2 = XMVectorMultiply(V4, V5);
+
+ V0 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[2]);
+ V1 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[3]);
+ V2 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[2]);
+ V3 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[3]);
+ V4 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[2]);
+ V5 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[3]);
+
+ P0 = XMVectorNegativeMultiplySubtract(V0, V1, P0);
+ P1 = XMVectorNegativeMultiplySubtract(V2, V3, P1);
+ P2 = XMVectorNegativeMultiplySubtract(V4, V5, P2);
+
+ V0 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[1]);
+ V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[1]);
+ V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[1]);
+
+ XMVECTOR S = XMVectorMultiply(M.r[0], Sign.v);
+ XMVECTOR R = XMVectorMultiply(V0, P0);
+ R = XMVectorNegativeMultiplySubtract(V1, P1, R);
+ R = XMVectorMultiplyAdd(V2, P2, R);
+
+ return XMVector4Dot(S, R);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+#define XM3RANKDECOMPOSE(a, b, c, x, y, z) \
+ if((x) < (y)) \
+ { \
+ if((y) < (z)) \
+ { \
+ (a) = 2; \
+ (b) = 1; \
+ (c) = 0; \
+ } \
+ else \
+ { \
+ (a) = 1; \
+ \
+ if((x) < (z)) \
+ { \
+ (b) = 2; \
+ (c) = 0; \
+ } \
+ else \
+ { \
+ (b) = 0; \
+ (c) = 2; \
+ } \
+ } \
+ } \
+ else \
+ { \
+ if((x) < (z)) \
+ { \
+ (a) = 2; \
+ (b) = 0; \
+ (c) = 1; \
+ } \
+ else \
+ { \
+ (a) = 0; \
+ \
+ if((y) < (z)) \
+ { \
+ (b) = 2; \
+ (c) = 1; \
+ } \
+ else \
+ { \
+ (b) = 1; \
+ (c) = 2; \
+ } \
+ } \
+ }
+
+#define XM3_DECOMP_EPSILON 0.0001f
+
+_Use_decl_annotations_
+inline bool XMMatrixDecompose
+(
+ XMVECTOR *outScale,
+ XMVECTOR *outRotQuat,
+ XMVECTOR *outTrans,
+ CXMMATRIX M
+)
+{
+ static const XMVECTOR *pvCanonicalBasis[3] = {
+ &g_XMIdentityR0.v,
+ &g_XMIdentityR1.v,
+ &g_XMIdentityR2.v
+ };
+
+ assert( outScale != NULL );
+ assert( outRotQuat != NULL );
+ assert( outTrans != NULL );
+
+ // Get the translation
+ outTrans[0] = M.r[3];
+
+ XMVECTOR *ppvBasis[3];
+ XMMATRIX matTemp;
+ ppvBasis[0] = &matTemp.r[0];
+ ppvBasis[1] = &matTemp.r[1];
+ ppvBasis[2] = &matTemp.r[2];
+
+ matTemp.r[0] = M.r[0];
+ matTemp.r[1] = M.r[1];
+ matTemp.r[2] = M.r[2];
+ matTemp.r[3] = g_XMIdentityR3.v;
+
+ float *pfScales = (float *)outScale;
+
+ size_t a, b, c;
+ XMVectorGetXPtr(&pfScales[0],XMVector3Length(ppvBasis[0][0]));
+ XMVectorGetXPtr(&pfScales[1],XMVector3Length(ppvBasis[1][0]));
+ XMVectorGetXPtr(&pfScales[2],XMVector3Length(ppvBasis[2][0]));
+ pfScales[3] = 0.f;
+
+ XM3RANKDECOMPOSE(a, b, c, pfScales[0], pfScales[1], pfScales[2])
+
+ if(pfScales[a] < XM3_DECOMP_EPSILON)
+ {
+ ppvBasis[a][0] = pvCanonicalBasis[a][0];
+ }
+ ppvBasis[a][0] = XMVector3Normalize(ppvBasis[a][0]);
+
+ if(pfScales[b] < XM3_DECOMP_EPSILON)
+ {
+ size_t aa, bb, cc;
+ float fAbsX, fAbsY, fAbsZ;
+
+ fAbsX = fabsf(XMVectorGetX(ppvBasis[a][0]));
+ fAbsY = fabsf(XMVectorGetY(ppvBasis[a][0]));
+ fAbsZ = fabsf(XMVectorGetZ(ppvBasis[a][0]));
+
+ XM3RANKDECOMPOSE(aa, bb, cc, fAbsX, fAbsY, fAbsZ)
+
+ ppvBasis[b][0] = XMVector3Cross(ppvBasis[a][0],pvCanonicalBasis[cc][0]);
+ }
+
+ ppvBasis[b][0] = XMVector3Normalize(ppvBasis[b][0]);
+
+ if(pfScales[c] < XM3_DECOMP_EPSILON)
+ {
+ ppvBasis[c][0] = XMVector3Cross(ppvBasis[a][0],ppvBasis[b][0]);
+ }
+
+ ppvBasis[c][0] = XMVector3Normalize(ppvBasis[c][0]);
+
+ float fDet = XMVectorGetX(XMMatrixDeterminant(matTemp));
+
+ // use Kramer's rule to check for handedness of coordinate system
+ if(fDet < 0.0f)
+ {
+ // switch coordinate system by negating the scale and inverting the basis vector on the x-axis
+ pfScales[a] = -pfScales[a];
+ ppvBasis[a][0] = XMVectorNegate(ppvBasis[a][0]);
+
+ fDet = -fDet;
+ }
+
+ fDet -= 1.0f;
+ fDet *= fDet;
+
+ if(XM3_DECOMP_EPSILON < fDet)
+ {
+ // Non-SRT matrix encountered
+ return false;
+ }
+
+ // generate the quaternion from the matrix
+ outRotQuat[0] = XMQuaternionRotationMatrix(matTemp);
+ return true;
+}
+
+#undef XM3_DECOMP_EPSILON
+#undef XM3RANKDECOMPOSE
+
+//------------------------------------------------------------------------------
+// Transformation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixIdentity()
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMMATRIX M;
+ M.r[0] = g_XMIdentityR0.v;
+ M.r[1] = g_XMIdentityR1.v;
+ M.r[2] = g_XMIdentityR2.v;
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixSet
+(
+ float m00, float m01, float m02, float m03,
+ float m10, float m11, float m12, float m13,
+ float m20, float m21, float m22, float m23,
+ float m30, float m31, float m32, float m33
+)
+{
+ XMMATRIX M;
+#if defined(_XM_NO_INTRINSICS_)
+ M.m[0][0] = m00; M.m[0][1] = m01; M.m[0][2] = m02; M.m[0][3] = m03;
+ M.m[1][0] = m10; M.m[1][1] = m11; M.m[1][2] = m12; M.m[1][3] = m13;
+ M.m[2][0] = m20; M.m[2][1] = m21; M.m[2][2] = m22; M.m[2][3] = m23;
+ M.m[3][0] = m30; M.m[3][1] = m31; M.m[3][2] = m32; M.m[3][3] = m33;
+#else
+ M.r[0] = XMVectorSet(m00, m01, m02, m03);
+ M.r[1] = XMVectorSet(m10, m11, m12, m13);
+ M.r[2] = XMVectorSet(m20, m21, m22, m23);
+ M.r[3] = XMVectorSet(m30, m31, m32, m33);
+#endif
+ return M;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixTranslation
+(
+ float OffsetX,
+ float OffsetY,
+ float OffsetZ
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMMATRIX M;
+ M.m[0][0] = 1.0f;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = 1.0f;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = 1.0f;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = OffsetX;
+ M.m[3][1] = OffsetY;
+ M.m[3][2] = OffsetZ;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMMATRIX M;
+ M.r[0] = g_XMIdentityR0.v;
+ M.r[1] = g_XMIdentityR1.v;
+ M.r[2] = g_XMIdentityR2.v;
+ M.r[3] = XMVectorSet(OffsetX, OffsetY, OffsetZ, 1.f );
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixTranslationFromVector
+(
+ FXMVECTOR Offset
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMMATRIX M;
+ M.m[0][0] = 1.0f;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = 1.0f;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = 1.0f;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = Offset.vector4_f32[0];
+ M.m[3][1] = Offset.vector4_f32[1];
+ M.m[3][2] = Offset.vector4_f32[2];
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMMATRIX M;
+ M.r[0] = g_XMIdentityR0.v;
+ M.r[1] = g_XMIdentityR1.v;
+ M.r[2] = g_XMIdentityR2.v;
+ M.r[3] = XMVectorSelect( g_XMIdentityR3.v, Offset, g_XMSelect1110.v );
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixScaling
+(
+ float ScaleX,
+ float ScaleY,
+ float ScaleZ
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMMATRIX M;
+ M.m[0][0] = ScaleX;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = ScaleY;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = ScaleZ;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = 0.0f;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ const XMVECTOR Zero = vdupq_n_f32(0);
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( ScaleX, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( ScaleY, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( ScaleZ, Zero, 2 );
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ M.r[0] = _mm_set_ps( 0, 0, 0, ScaleX );
+ M.r[1] = _mm_set_ps( 0, 0, ScaleY, 0 );
+ M.r[2] = _mm_set_ps( 0, ScaleZ, 0, 0 );
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixScalingFromVector
+(
+ FXMVECTOR Scale
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMMATRIX M;
+ M.m[0][0] = Scale.vector4_f32[0];
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = Scale.vector4_f32[1];
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = Scale.vector4_f32[2];
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = 0.0f;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMMATRIX M;
+ M.r[0] = vandq_u32(Scale,g_XMMaskX);
+ M.r[1] = vandq_u32(Scale,g_XMMaskY);
+ M.r[2] = vandq_u32(Scale,g_XMMaskZ);
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ M.r[0] = _mm_and_ps(Scale,g_XMMaskX);
+ M.r[1] = _mm_and_ps(Scale,g_XMMaskY);
+ M.r[2] = _mm_and_ps(Scale,g_XMMaskZ);
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationX
+(
+ float Angle
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ XMMATRIX M;
+ M.m[0][0] = 1.0f;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = fCosAngle;
+ M.m[1][2] = fSinAngle;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = -fSinAngle;
+ M.m[2][2] = fCosAngle;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = 0.0f;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMVECTOR T1 = vsetq_lane_f32( fCosAngle, Zero, 1 );
+ T1 = vsetq_lane_f32( fSinAngle, T1, 2 );
+
+ XMVECTOR T2 = vsetq_lane_f32( -fSinAngle, Zero, 1 );
+ T2 = vsetq_lane_f32( fCosAngle, T2, 2 );
+
+ XMMATRIX M;
+ M.r[0] = g_XMIdentityR0.v;
+ M.r[1] = T1;
+ M.r[2] = T2;
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ float SinAngle;
+ float CosAngle;
+ XMScalarSinCos(&SinAngle, &CosAngle, Angle);
+
+ XMVECTOR vSin = _mm_set_ss(SinAngle);
+ XMVECTOR vCos = _mm_set_ss(CosAngle);
+ // x = 0,y = cos,z = sin, w = 0
+ vCos = _mm_shuffle_ps(vCos,vSin,_MM_SHUFFLE(3,0,0,3));
+ XMMATRIX M;
+ M.r[0] = g_XMIdentityR0;
+ M.r[1] = vCos;
+ // x = 0,y = sin,z = cos, w = 0
+ vCos = XM_PERMUTE_PS(vCos,_MM_SHUFFLE(3,1,2,0));
+ // x = 0,y = -sin,z = cos, w = 0
+ vCos = _mm_mul_ps(vCos,g_XMNegateY);
+ M.r[2] = vCos;
+ M.r[3] = g_XMIdentityR3;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationY
+(
+ float Angle
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ XMMATRIX M;
+ M.m[0][0] = fCosAngle;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = -fSinAngle;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = 1.0f;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = fSinAngle;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fCosAngle;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = 0.0f;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMVECTOR T0 = vsetq_lane_f32( fCosAngle, Zero, 0 );
+ T0 = vsetq_lane_f32( -fSinAngle, T0, 2 );
+
+ XMVECTOR T2 = vsetq_lane_f32( fSinAngle, Zero, 0 );
+ T2 = vsetq_lane_f32( fCosAngle, T2, 2 );
+
+ XMMATRIX M;
+ M.r[0] = T0;
+ M.r[1] = g_XMIdentityR1.v;
+ M.r[2] = T2;
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ float SinAngle;
+ float CosAngle;
+ XMScalarSinCos(&SinAngle, &CosAngle, Angle);
+
+ XMVECTOR vSin = _mm_set_ss(SinAngle);
+ XMVECTOR vCos = _mm_set_ss(CosAngle);
+ // x = sin,y = 0,z = cos, w = 0
+ vSin = _mm_shuffle_ps(vSin,vCos,_MM_SHUFFLE(3,0,3,0));
+ XMMATRIX M;
+ M.r[2] = vSin;
+ M.r[1] = g_XMIdentityR1;
+ // x = cos,y = 0,z = sin, w = 0
+ vSin = XM_PERMUTE_PS(vSin,_MM_SHUFFLE(3,0,1,2));
+ // x = cos,y = 0,z = -sin, w = 0
+ vSin = _mm_mul_ps(vSin,g_XMNegateZ);
+ M.r[0] = vSin;
+ M.r[3] = g_XMIdentityR3;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationZ
+(
+ float Angle
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ XMMATRIX M;
+ M.m[0][0] = fCosAngle;
+ M.m[0][1] = fSinAngle;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = -fSinAngle;
+ M.m[1][1] = fCosAngle;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = 1.0f;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = 0.0f;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMVECTOR T0 = vsetq_lane_f32( fCosAngle, Zero, 0 );
+ T0 = vsetq_lane_f32( fSinAngle, T0, 1 );
+
+ XMVECTOR T1 = vsetq_lane_f32( -fSinAngle, Zero, 0 );
+ T1 = vsetq_lane_f32( fCosAngle, T1, 1 );
+
+ XMMATRIX M;
+ M.r[0] = T0;
+ M.r[1] = T1;
+ M.r[2] = g_XMIdentityR2.v;
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ float SinAngle;
+ float CosAngle;
+ XMScalarSinCos(&SinAngle, &CosAngle, Angle);
+
+ XMVECTOR vSin = _mm_set_ss(SinAngle);
+ XMVECTOR vCos = _mm_set_ss(CosAngle);
+ // x = cos,y = sin,z = 0, w = 0
+ vCos = _mm_unpacklo_ps(vCos,vSin);
+ XMMATRIX M;
+ M.r[0] = vCos;
+ // x = sin,y = cos,z = 0, w = 0
+ vCos = XM_PERMUTE_PS(vCos,_MM_SHUFFLE(3,2,0,1));
+ // x = cos,y = -sin,z = 0, w = 0
+ vCos = _mm_mul_ps(vCos,g_XMNegateX);
+ M.r[1] = vCos;
+ M.r[2] = g_XMIdentityR2;
+ M.r[3] = g_XMIdentityR3;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationRollPitchYaw
+(
+ float Pitch,
+ float Yaw,
+ float Roll
+)
+{
+ XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
+ return XMMatrixRotationRollPitchYawFromVector(Angles);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationRollPitchYawFromVector
+(
+ FXMVECTOR Angles // <Pitch, Yaw, Roll, undefined>
+)
+{
+ XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
+ return XMMatrixRotationQuaternion(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationNormal
+(
+ FXMVECTOR NormalAxis,
+ float Angle
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ XMVECTOR A = XMVectorSet(fSinAngle, fCosAngle, 1.0f - fCosAngle, 0.0f);
+
+ XMVECTOR C2 = XMVectorSplatZ(A);
+ XMVECTOR C1 = XMVectorSplatY(A);
+ XMVECTOR C0 = XMVectorSplatX(A);
+
+ XMVECTOR N0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_W>(NormalAxis);
+ XMVECTOR N1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>(NormalAxis);
+
+ XMVECTOR V0 = XMVectorMultiply(C2, N0);
+ V0 = XMVectorMultiply(V0, N1);
+
+ XMVECTOR R0 = XMVectorMultiply(C2, NormalAxis);
+ R0 = XMVectorMultiplyAdd(R0, NormalAxis, C1);
+
+ XMVECTOR R1 = XMVectorMultiplyAdd(C0, NormalAxis, V0);
+ XMVECTOR R2 = XMVectorNegativeMultiplySubtract(C0, NormalAxis, V0);
+
+ V0 = XMVectorSelect(A, R0, g_XMSelect1110.v);
+ XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(R1, R2);
+ XMVECTOR V2 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1X>(R1, R2);
+
+ XMMATRIX M;
+ M.r[0] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W>(V0, V1);
+ M.r[1] = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W>(V0, V1);
+ M.r[2] = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W>(V0, V2);
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ XMVECTOR C2 = _mm_set_ps1(1.0f - fCosAngle);
+ XMVECTOR C1 = _mm_set_ps1(fCosAngle);
+ XMVECTOR C0 = _mm_set_ps1(fSinAngle);
+
+ XMVECTOR N0 = XM_PERMUTE_PS(NormalAxis,_MM_SHUFFLE(3,0,2,1));
+ XMVECTOR N1 = XM_PERMUTE_PS(NormalAxis,_MM_SHUFFLE(3,1,0,2));
+
+ XMVECTOR V0 = _mm_mul_ps(C2, N0);
+ V0 = _mm_mul_ps(V0, N1);
+
+ XMVECTOR R0 = _mm_mul_ps(C2, NormalAxis);
+ R0 = _mm_mul_ps(R0, NormalAxis);
+ R0 = _mm_add_ps(R0, C1);
+
+ XMVECTOR R1 = _mm_mul_ps(C0, NormalAxis);
+ R1 = _mm_add_ps(R1, V0);
+ XMVECTOR R2 = _mm_mul_ps(C0, NormalAxis);
+ R2 = _mm_sub_ps(V0,R2);
+
+ V0 = _mm_and_ps(R0,g_XMMask3);
+ XMVECTOR V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,1,2,0));
+ V1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(0,3,2,1));
+ XMVECTOR V2 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(0,0,1,1));
+ V2 = XM_PERMUTE_PS(V2,_MM_SHUFFLE(2,0,2,0));
+
+ R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(1,0,3,0));
+ R2 = XM_PERMUTE_PS(R2,_MM_SHUFFLE(1,3,2,0));
+
+ XMMATRIX M;
+ M.r[0] = R2;
+
+ R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(3,2,3,1));
+ R2 = XM_PERMUTE_PS(R2,_MM_SHUFFLE(1,3,0,2));
+ M.r[1] = R2;
+
+ V2 = _mm_shuffle_ps(V2,V0,_MM_SHUFFLE(3,2,1,0));
+ M.r[2] = V2;
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationAxis
+(
+ FXMVECTOR Axis,
+ float Angle
+)
+{
+ assert(!XMVector3Equal(Axis, XMVectorZero()));
+ assert(!XMVector3IsInfinite(Axis));
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR Normal = XMVector3Normalize(Axis);
+ return XMMatrixRotationNormal(Normal, Angle);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationQuaternion
+(
+ FXMVECTOR Quaternion
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Constant1110 = {1.0f, 1.0f, 1.0f, 0.0f};
+
+ XMVECTOR Q0 = XMVectorAdd(Quaternion, Quaternion);
+ XMVECTOR Q1 = XMVectorMultiply(Quaternion, Q0);
+
+ XMVECTOR V0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_1W>(Q1, Constant1110.v);
+ XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1W>(Q1, Constant1110.v);
+ XMVECTOR R0 = XMVectorSubtract(Constant1110, V0);
+ R0 = XMVectorSubtract(R0, V1);
+
+ V0 = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>(Quaternion);
+ V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_W>(Q0);
+ V0 = XMVectorMultiply(V0, V1);
+
+ V1 = XMVectorSplatW(Quaternion);
+ XMVECTOR V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_W>(Q0);
+ V1 = XMVectorMultiply(V1, V2);
+
+ XMVECTOR R1 = XMVectorAdd(V0, V1);
+ XMVECTOR R2 = XMVectorSubtract(V0, V1);
+
+ V0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z>(R1, R2);
+ V1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1Z, XM_PERMUTE_0X, XM_PERMUTE_1Z>(R1, R2);
+
+ XMMATRIX M;
+ M.r[0] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W>(R0, V0);
+ M.r[1] = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W>(R0, V0);
+ M.r[2] = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W>(R0, V1);
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Constant1110 = {1.0f, 1.0f, 1.0f, 0.0f};
+
+ XMVECTOR Q0 = _mm_add_ps(Quaternion,Quaternion);
+ XMVECTOR Q1 = _mm_mul_ps(Quaternion,Q0);
+
+ XMVECTOR V0 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(3,0,0,1));
+ V0 = _mm_and_ps(V0,g_XMMask3);
+ XMVECTOR V1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(3,1,2,2));
+ V1 = _mm_and_ps(V1,g_XMMask3);
+ XMVECTOR R0 = _mm_sub_ps(Constant1110,V0);
+ R0 = _mm_sub_ps(R0, V1);
+
+ V0 = XM_PERMUTE_PS(Quaternion,_MM_SHUFFLE(3,1,0,0));
+ V1 = XM_PERMUTE_PS(Q0,_MM_SHUFFLE(3,2,1,2));
+ V0 = _mm_mul_ps(V0, V1);
+
+ V1 = XM_PERMUTE_PS(Quaternion,_MM_SHUFFLE(3,3,3,3));
+ XMVECTOR V2 = XM_PERMUTE_PS(Q0,_MM_SHUFFLE(3,0,2,1));
+ V1 = _mm_mul_ps(V1, V2);
+
+ XMVECTOR R1 = _mm_add_ps(V0, V1);
+ XMVECTOR R2 = _mm_sub_ps(V0, V1);
+
+ V0 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(1,0,2,1));
+ V0 = XM_PERMUTE_PS(V0,_MM_SHUFFLE(1,3,2,0));
+ V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,2,0,0));
+ V1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(2,0,2,0));
+
+ Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(1,0,3,0));
+ Q1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(1,3,2,0));
+
+ XMMATRIX M;
+ M.r[0] = Q1;
+
+ Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(3,2,3,1));
+ Q1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(1,3,0,2));
+ M.r[1] = Q1;
+
+ Q1 = _mm_shuffle_ps(V1,R0,_MM_SHUFFLE(3,2,1,0));
+ M.r[2] = Q1;
+ M.r[3] = g_XMIdentityR3;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixTransformation2D
+(
+ FXMVECTOR ScalingOrigin,
+ float ScalingOrientation,
+ FXMVECTOR Scaling,
+ FXMVECTOR RotationOrigin,
+ float Rotation,
+ GXMVECTOR Translation
+)
+{
+ // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation *
+ // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
+
+ XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1100.v, ScalingOrigin, g_XMSelect1100.v);
+ XMVECTOR NegScalingOrigin = XMVectorNegate(VScalingOrigin);
+
+ XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
+ XMMATRIX MScalingOrientation = XMMatrixRotationZ(ScalingOrientation);
+ XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
+ XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v);
+ XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling);
+ XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v);
+ XMMATRIX MRotation = XMMatrixRotationZ(Rotation);
+ XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation,g_XMSelect1100.v);
+
+ XMMATRIX M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
+ M = XMMatrixMultiply(M, MScaling);
+ M = XMMatrixMultiply(M, MScalingOrientation);
+ M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
+ M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
+ M = XMMatrixMultiply(M, MRotation);
+ M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
+ M.r[3] = XMVectorAdd(M.r[3], VTranslation);
+
+ return M;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixTransformation
+(
+ FXMVECTOR ScalingOrigin,
+ FXMVECTOR ScalingOrientationQuaternion,
+ FXMVECTOR Scaling,
+ GXMVECTOR RotationOrigin,
+ CXMVECTOR RotationQuaternion,
+ CXMVECTOR Translation
+)
+{
+ // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation *
+ // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
+
+ XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1110.v, ScalingOrigin, g_XMSelect1110.v);
+ XMVECTOR NegScalingOrigin = XMVectorNegate(ScalingOrigin);
+
+ XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
+ XMMATRIX MScalingOrientation = XMMatrixRotationQuaternion(ScalingOrientationQuaternion);
+ XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
+ XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling);
+ XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin, g_XMSelect1110.v);
+ XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
+ XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation, g_XMSelect1110.v);
+
+ XMMATRIX M;
+ M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
+ M = XMMatrixMultiply(M, MScaling);
+ M = XMMatrixMultiply(M, MScalingOrientation);
+ M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
+ M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
+ M = XMMatrixMultiply(M, MRotation);
+ M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
+ M.r[3] = XMVectorAdd(M.r[3], VTranslation);
+ return M;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixAffineTransformation2D
+(
+ FXMVECTOR Scaling,
+ FXMVECTOR RotationOrigin,
+ float Rotation,
+ FXMVECTOR Translation
+)
+{
+ // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
+
+ XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v);
+ XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling);
+ XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v);
+ XMMATRIX MRotation = XMMatrixRotationZ(Rotation);
+ XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation,g_XMSelect1100.v);
+
+ XMMATRIX M;
+ M = MScaling;
+ M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
+ M = XMMatrixMultiply(M, MRotation);
+ M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
+ M.r[3] = XMVectorAdd(M.r[3], VTranslation);
+ return M;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixAffineTransformation
+(
+ FXMVECTOR Scaling,
+ FXMVECTOR RotationOrigin,
+ FXMVECTOR RotationQuaternion,
+ GXMVECTOR Translation
+)
+{
+ // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
+
+ XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling);
+ XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin,g_XMSelect1110.v);
+ XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
+ XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation,g_XMSelect1110.v);
+
+ XMMATRIX M;
+ M = MScaling;
+ M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
+ M = XMMatrixMultiply(M, MRotation);
+ M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
+ M.r[3] = XMVectorAdd(M.r[3], VTranslation);
+ return M;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixReflect
+(
+ FXMVECTOR ReflectionPlane
+)
+{
+ assert(!XMVector3Equal(ReflectionPlane, XMVectorZero()));
+ assert(!XMPlaneIsInfinite(ReflectionPlane));
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 NegativeTwo = {-2.0f, -2.0f, -2.0f, 0.0f};
+
+ XMVECTOR P = XMPlaneNormalize(ReflectionPlane);
+ XMVECTOR S = XMVectorMultiply(P, NegativeTwo);
+
+ XMVECTOR A = XMVectorSplatX(P);
+ XMVECTOR B = XMVectorSplatY(P);
+ XMVECTOR C = XMVectorSplatZ(P);
+ XMVECTOR D = XMVectorSplatW(P);
+
+ XMMATRIX M;
+ M.r[0] = XMVectorMultiplyAdd(A, S, g_XMIdentityR0.v);
+ M.r[1] = XMVectorMultiplyAdd(B, S, g_XMIdentityR1.v);
+ M.r[2] = XMVectorMultiplyAdd(C, S, g_XMIdentityR2.v);
+ M.r[3] = XMVectorMultiplyAdd(D, S, g_XMIdentityR3.v);
+ return M;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixShadow
+(
+ FXMVECTOR ShadowPlane,
+ FXMVECTOR LightPosition
+)
+{
+ static const XMVECTORU32 Select0001 = {XM_SELECT_0, XM_SELECT_0, XM_SELECT_0, XM_SELECT_1};
+
+ assert(!XMVector3Equal(ShadowPlane, XMVectorZero()));
+ assert(!XMPlaneIsInfinite(ShadowPlane));
+
+ XMVECTOR P = XMPlaneNormalize(ShadowPlane);
+ XMVECTOR Dot = XMPlaneDot(P, LightPosition);
+ P = XMVectorNegate(P);
+ XMVECTOR D = XMVectorSplatW(P);
+ XMVECTOR C = XMVectorSplatZ(P);
+ XMVECTOR B = XMVectorSplatY(P);
+ XMVECTOR A = XMVectorSplatX(P);
+ Dot = XMVectorSelect(Select0001.v, Dot, Select0001.v);
+
+ XMMATRIX M;
+ M.r[3] = XMVectorMultiplyAdd(D, LightPosition, Dot);
+ Dot = XMVectorRotateLeft(Dot, 1);
+ M.r[2] = XMVectorMultiplyAdd(C, LightPosition, Dot);
+ Dot = XMVectorRotateLeft(Dot, 1);
+ M.r[1] = XMVectorMultiplyAdd(B, LightPosition, Dot);
+ Dot = XMVectorRotateLeft(Dot, 1);
+ M.r[0] = XMVectorMultiplyAdd(A, LightPosition, Dot);
+ return M;
+}
+
+//------------------------------------------------------------------------------
+// View and projection initialization operations
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixLookAtLH
+(
+ FXMVECTOR EyePosition,
+ FXMVECTOR FocusPosition,
+ FXMVECTOR UpDirection
+)
+{
+ XMVECTOR EyeDirection = XMVectorSubtract(FocusPosition, EyePosition);
+ return XMMatrixLookToLH(EyePosition, EyeDirection, UpDirection);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixLookAtRH
+(
+ FXMVECTOR EyePosition,
+ FXMVECTOR FocusPosition,
+ FXMVECTOR UpDirection
+)
+{
+ XMVECTOR NegEyeDirection = XMVectorSubtract(EyePosition, FocusPosition);
+ return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixLookToLH
+(
+ FXMVECTOR EyePosition,
+ FXMVECTOR EyeDirection,
+ FXMVECTOR UpDirection
+)
+{
+ assert(!XMVector3Equal(EyeDirection, XMVectorZero()));
+ assert(!XMVector3IsInfinite(EyeDirection));
+ assert(!XMVector3Equal(UpDirection, XMVectorZero()));
+ assert(!XMVector3IsInfinite(UpDirection));
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR R2 = XMVector3Normalize(EyeDirection);
+
+ XMVECTOR R0 = XMVector3Cross(UpDirection, R2);
+ R0 = XMVector3Normalize(R0);
+
+ XMVECTOR R1 = XMVector3Cross(R2, R0);
+
+ XMVECTOR NegEyePosition = XMVectorNegate(EyePosition);
+
+ XMVECTOR D0 = XMVector3Dot(R0, NegEyePosition);
+ XMVECTOR D1 = XMVector3Dot(R1, NegEyePosition);
+ XMVECTOR D2 = XMVector3Dot(R2, NegEyePosition);
+
+ XMMATRIX M;
+ M.r[0] = XMVectorSelect(D0, R0, g_XMSelect1110.v);
+ M.r[1] = XMVectorSelect(D1, R1, g_XMSelect1110.v);
+ M.r[2] = XMVectorSelect(D2, R2, g_XMSelect1110.v);
+ M.r[3] = g_XMIdentityR3.v;
+
+ M = XMMatrixTranspose(M);
+
+ return M;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixLookToRH
+(
+ FXMVECTOR EyePosition,
+ FXMVECTOR EyeDirection,
+ FXMVECTOR UpDirection
+)
+{
+ XMVECTOR NegEyeDirection = XMVectorNegate(EyeDirection);
+ return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixPerspectiveLH
+(
+ float ViewWidth,
+ float ViewHeight,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float TwoNearZ = NearZ + NearZ;
+ float fRange = FarZ / (FarZ - NearZ);
+
+ XMMATRIX M;
+ M.m[0][0] = TwoNearZ / ViewWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = TwoNearZ / ViewHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = 1.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = -fRange * NearZ;
+ M.m[3][3] = 0.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float TwoNearZ = NearZ + NearZ;
+ float fRange = FarZ / (FarZ - NearZ);
+ const XMVECTOR Zero = vdupq_n_f32(0);
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( TwoNearZ / ViewWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( TwoNearZ / ViewHeight, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, g_XMIdentityR3.v, 2 );
+ M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float TwoNearZ = NearZ + NearZ;
+ float fRange = FarZ / (FarZ - NearZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ TwoNearZ / ViewWidth,
+ TwoNearZ / ViewHeight,
+ fRange,
+ -fRange * NearZ
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // TwoNearZ / ViewWidth,0,0,0
+ M.r[0] = vTemp;
+ // 0,TwoNearZ / ViewHeight,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // x=fRange,y=-fRange * NearZ,0,1.0f
+ vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
+ // 0,0,fRange,1.0f
+ vTemp = _mm_setzero_ps();
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
+ M.r[2] = vTemp;
+ // 0,0,-fRange * NearZ,0
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
+ M.r[3] = vTemp;
+
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixPerspectiveRH
+(
+ float ViewWidth,
+ float ViewHeight,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float TwoNearZ = NearZ + NearZ;
+ float fRange = FarZ / (NearZ - FarZ);
+
+ XMMATRIX M;
+ M.m[0][0] = TwoNearZ / ViewWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = TwoNearZ / ViewHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = -1.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = fRange * NearZ;
+ M.m[3][3] = 0.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float TwoNearZ = NearZ + NearZ;
+ float fRange = FarZ / (NearZ - FarZ);
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( TwoNearZ / ViewWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( TwoNearZ / ViewHeight, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, g_XMNegIdentityR3.v, 2 );
+ M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float TwoNearZ = NearZ + NearZ;
+ float fRange = FarZ / (NearZ-FarZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ TwoNearZ / ViewWidth,
+ TwoNearZ / ViewHeight,
+ fRange,
+ fRange * NearZ
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // TwoNearZ / ViewWidth,0,0,0
+ M.r[0] = vTemp;
+ // 0,TwoNearZ / ViewHeight,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // x=fRange,y=-fRange * NearZ,0,-1.0f
+ vValues = _mm_shuffle_ps(vValues,g_XMNegIdentityR3,_MM_SHUFFLE(3,2,3,2));
+ // 0,0,fRange,-1.0f
+ vTemp = _mm_setzero_ps();
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
+ M.r[2] = vTemp;
+ // 0,0,-fRange * NearZ,0
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
+ M.r[3] = vTemp;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixPerspectiveFovLH
+(
+ float FovAngleY,
+ float AspectHByW,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
+ assert(!XMScalarNearEqual(AspectHByW, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float SinFov;
+ float CosFov;
+ XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
+
+ float Height = CosFov / SinFov;
+ float Width = Height / AspectHByW;
+ float fRange = FarZ / (FarZ-NearZ);
+
+ XMMATRIX M;
+ M.m[0][0] = Width;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = Height;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = 1.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = -fRange * NearZ;
+ M.m[3][3] = 0.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float SinFov;
+ float CosFov;
+ XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
+
+ float fRange = FarZ / (FarZ-NearZ);
+ float Height = CosFov / SinFov;
+ float Width = Height / AspectHByW;
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( Width, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( Height, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, g_XMIdentityR3.v, 2 );
+ M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ float SinFov;
+ float CosFov;
+ XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
+
+ float fRange = FarZ / (FarZ-NearZ);
+ // Note: This is recorded on the stack
+ float Height = CosFov / SinFov;
+ XMVECTOR rMem = {
+ Height / AspectHByW,
+ Height,
+ fRange,
+ -fRange * NearZ
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // CosFov / SinFov,0,0,0
+ XMMATRIX M;
+ M.r[0] = vTemp;
+ // 0,Height / AspectHByW,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // x=fRange,y=-fRange * NearZ,0,1.0f
+ vTemp = _mm_setzero_ps();
+ vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
+ // 0,0,fRange,1.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
+ M.r[2] = vTemp;
+ // 0,0,-fRange * NearZ,0.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
+ M.r[3] = vTemp;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixPerspectiveFovRH
+(
+ float FovAngleY,
+ float AspectHByW,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
+ assert(!XMScalarNearEqual(AspectHByW, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float SinFov;
+ float CosFov;
+ XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
+
+ float Height = CosFov / SinFov;
+ float Width = Height / AspectHByW;
+ float fRange = FarZ / (NearZ-FarZ);
+
+ XMMATRIX M;
+ M.m[0][0] = Width;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = Height;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = -1.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = fRange * NearZ;
+ M.m[3][3] = 0.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float SinFov;
+ float CosFov;
+ XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
+ float fRange = FarZ / (NearZ-FarZ);
+ float Height = CosFov / SinFov;
+ float Width = Height / AspectHByW;
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( Width, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( Height, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, g_XMNegIdentityR3.v, 2 );
+ M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ float SinFov;
+ float CosFov;
+ XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
+ float fRange = FarZ / (NearZ-FarZ);
+ // Note: This is recorded on the stack
+ float Height = CosFov / SinFov;
+ XMVECTOR rMem = {
+ Height / AspectHByW,
+ Height,
+ fRange,
+ fRange * NearZ
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // CosFov / SinFov,0,0,0
+ XMMATRIX M;
+ M.r[0] = vTemp;
+ // 0,Height / AspectHByW,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // x=fRange,y=-fRange * NearZ,0,-1.0f
+ vTemp = _mm_setzero_ps();
+ vValues = _mm_shuffle_ps(vValues,g_XMNegIdentityR3,_MM_SHUFFLE(3,2,3,2));
+ // 0,0,fRange,-1.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
+ M.r[2] = vTemp;
+ // 0,0,fRange * NearZ,0.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
+ M.r[3] = vTemp;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixPerspectiveOffCenterLH
+(
+ float ViewLeft,
+ float ViewRight,
+ float ViewBottom,
+ float ViewTop,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
+ assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float TwoNearZ = NearZ + NearZ;
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = FarZ / (FarZ-NearZ);
+
+ XMMATRIX M;
+ M.m[0][0] = TwoNearZ * ReciprocalWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = TwoNearZ * ReciprocalHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = -(ViewLeft + ViewRight) * ReciprocalWidth;
+ M.m[2][1] = -(ViewTop + ViewBottom) * ReciprocalHeight;
+ M.m[2][2] = fRange;
+ M.m[2][3] = 1.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = -fRange * NearZ;
+ M.m[3][3] = 0.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float TwoNearZ = NearZ + NearZ;
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = FarZ / (FarZ-NearZ);
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( TwoNearZ * ReciprocalWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( TwoNearZ * ReciprocalHeight, Zero, 1 );
+ M.r[2] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
+ -(ViewTop + ViewBottom) * ReciprocalHeight,
+ fRange,
+ 1.0f);
+ M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float TwoNearZ = NearZ+NearZ;
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = FarZ / (FarZ-NearZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ TwoNearZ*ReciprocalWidth,
+ TwoNearZ*ReciprocalHeight,
+ -fRange * NearZ,
+ 0
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // TwoNearZ*ReciprocalWidth,0,0,0
+ M.r[0] = vTemp;
+ // 0,TwoNearZ*ReciprocalHeight,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // 0,0,fRange,1.0f
+ M.r[2] = XMVectorSet( -(ViewLeft + ViewRight) * ReciprocalWidth,
+ -(ViewTop + ViewBottom) * ReciprocalHeight,
+ fRange,
+ 1.0f );
+ // 0,0,-fRange * NearZ,0.0f
+ vValues = _mm_and_ps(vValues,g_XMMaskZ);
+ M.r[3] = vValues;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixPerspectiveOffCenterRH
+(
+ float ViewLeft,
+ float ViewRight,
+ float ViewBottom,
+ float ViewTop,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
+ assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float TwoNearZ = NearZ + NearZ;
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = FarZ / (NearZ-FarZ);
+
+ XMMATRIX M;
+ M.m[0][0] = TwoNearZ * ReciprocalWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = TwoNearZ * ReciprocalHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = (ViewLeft + ViewRight) * ReciprocalWidth;
+ M.m[2][1] = (ViewTop + ViewBottom) * ReciprocalHeight;
+ M.m[2][2] = fRange;
+ M.m[2][3] = -1.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = fRange * NearZ;
+ M.m[3][3] = 0.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float TwoNearZ = NearZ + NearZ;
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = FarZ / (NearZ-FarZ);
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( TwoNearZ * ReciprocalWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( TwoNearZ * ReciprocalHeight, Zero, 1 );
+ M.r[2] = XMVectorSet((ViewLeft + ViewRight) * ReciprocalWidth,
+ (ViewTop + ViewBottom) * ReciprocalHeight,
+ fRange,
+ -1.0f);
+ M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float TwoNearZ = NearZ+NearZ;
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = FarZ / (NearZ-FarZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ TwoNearZ*ReciprocalWidth,
+ TwoNearZ*ReciprocalHeight,
+ fRange * NearZ,
+ 0
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // TwoNearZ*ReciprocalWidth,0,0,0
+ M.r[0] = vTemp;
+ // 0,TwoNearZ*ReciprocalHeight,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // 0,0,fRange,1.0f
+ M.r[2] = XMVectorSet( (ViewLeft + ViewRight) * ReciprocalWidth,
+ (ViewTop + ViewBottom) * ReciprocalHeight,
+ fRange,
+ -1.0f );
+ // 0,0,-fRange * NearZ,0.0f
+ vValues = _mm_and_ps(vValues,g_XMMaskZ);
+ M.r[3] = vValues;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixOrthographicLH
+(
+ float ViewWidth,
+ float ViewHeight,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float fRange = 1.0f / (FarZ-NearZ);
+
+ XMMATRIX M;
+ M.m[0][0] = 2.0f / ViewWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = 2.0f / ViewHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = -fRange * NearZ;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float fRange = 1.0f / (FarZ-NearZ);
+
+ const XMVECTOR Zero = vdupq_n_f32(0);
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( 2.0f / ViewWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( 2.0f / ViewHeight, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
+ M.r[3] = vsetq_lane_f32( -fRange * NearZ, g_XMIdentityR3.v, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float fRange = 1.0f / (FarZ-NearZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ 2.0f / ViewWidth,
+ 2.0f / ViewHeight,
+ fRange,
+ -fRange * NearZ
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // 2.0f / ViewWidth,0,0,0
+ M.r[0] = vTemp;
+ // 0,2.0f / ViewHeight,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // x=fRange,y=-fRange * NearZ,0,1.0f
+ vTemp = _mm_setzero_ps();
+ vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
+ // 0,0,fRange,0.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,0,0,0));
+ M.r[2] = vTemp;
+ // 0,0,-fRange * NearZ,1.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0));
+ M.r[3] = vTemp;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixOrthographicRH
+(
+ float ViewWidth,
+ float ViewHeight,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float fRange = 1.0f / (NearZ-FarZ);
+
+ XMMATRIX M;
+ M.m[0][0] = 2.0f / ViewWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = 2.0f / ViewHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = fRange * NearZ;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float fRange = 1.0f / (NearZ-FarZ);
+
+ const XMVECTOR Zero = vdupq_n_f32(0);
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( 2.0f / ViewWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( 2.0f / ViewHeight, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
+ M.r[3] = vsetq_lane_f32( fRange * NearZ, g_XMIdentityR3.v, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float fRange = 1.0f / (NearZ-FarZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ 2.0f / ViewWidth,
+ 2.0f / ViewHeight,
+ fRange,
+ fRange * NearZ
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // 2.0f / ViewWidth,0,0,0
+ M.r[0] = vTemp;
+ // 0,2.0f / ViewHeight,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // x=fRange,y=fRange * NearZ,0,1.0f
+ vTemp = _mm_setzero_ps();
+ vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
+ // 0,0,fRange,0.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,0,0,0));
+ M.r[2] = vTemp;
+ // 0,0,fRange * NearZ,1.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0));
+ M.r[3] = vTemp;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixOrthographicOffCenterLH
+(
+ float ViewLeft,
+ float ViewRight,
+ float ViewBottom,
+ float ViewTop,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
+ assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = 1.0f / (FarZ-NearZ);
+
+ XMMATRIX M;
+ M.m[0][0] = ReciprocalWidth + ReciprocalWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = ReciprocalHeight + ReciprocalHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = -(ViewLeft + ViewRight) * ReciprocalWidth;
+ M.m[3][1] = -(ViewTop + ViewBottom) * ReciprocalHeight;
+ M.m[3][2] = -fRange * NearZ;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = 1.0f / (FarZ-NearZ);
+ const XMVECTOR Zero = vdupq_n_f32(0);
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( ReciprocalWidth + ReciprocalWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( ReciprocalHeight + ReciprocalHeight, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
+ M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
+ -(ViewTop + ViewBottom) * ReciprocalHeight,
+ -fRange * NearZ,
+ 1.0f);
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = 1.0f / (FarZ-NearZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ fReciprocalWidth,
+ fReciprocalHeight,
+ fRange,
+ 1.0f
+ };
+ XMVECTOR rMem2 = {
+ -(ViewLeft + ViewRight),
+ -(ViewTop + ViewBottom),
+ -NearZ,
+ 1.0f
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // fReciprocalWidth*2,0,0,0
+ vTemp = _mm_add_ss(vTemp,vTemp);
+ M.r[0] = vTemp;
+ // 0,fReciprocalHeight*2,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ vTemp = _mm_add_ps(vTemp,vTemp);
+ M.r[1] = vTemp;
+ // 0,0,fRange,0.0f
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskZ);
+ M.r[2] = vTemp;
+ // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f
+ vValues = _mm_mul_ps(vValues,rMem2);
+ M.r[3] = vValues;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixOrthographicOffCenterRH
+(
+ float ViewLeft,
+ float ViewRight,
+ float ViewBottom,
+ float ViewTop,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
+ assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = 1.0f / (NearZ-FarZ);
+
+ XMMATRIX M;
+ M.m[0][0] = ReciprocalWidth + ReciprocalWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = ReciprocalHeight + ReciprocalHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = 0.0f;
+
+ M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
+ -(ViewTop + ViewBottom) * ReciprocalHeight,
+ fRange * NearZ,
+ 1.0f);
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = 1.0f / (NearZ-FarZ);
+ const XMVECTOR Zero = vdupq_n_f32(0);
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( ReciprocalWidth + ReciprocalWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( ReciprocalHeight + ReciprocalHeight, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
+ M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
+ -(ViewTop + ViewBottom) * ReciprocalHeight,
+ fRange * NearZ,
+ 1.0f);
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = 1.0f / (NearZ-FarZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ fReciprocalWidth,
+ fReciprocalHeight,
+ fRange,
+ 1.0f
+ };
+ XMVECTOR rMem2 = {
+ -(ViewLeft + ViewRight),
+ -(ViewTop + ViewBottom),
+ NearZ,
+ 1.0f
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // fReciprocalWidth*2,0,0,0
+ vTemp = _mm_add_ss(vTemp,vTemp);
+ M.r[0] = vTemp;
+ // 0,fReciprocalHeight*2,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ vTemp = _mm_add_ps(vTemp,vTemp);
+ M.r[1] = vTemp;
+ // 0,0,fRange,0.0f
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskZ);
+ M.r[2] = vTemp;
+ // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f
+ vValues = _mm_mul_ps(vValues,rMem2);
+ M.r[3] = vValues;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+
+/****************************************************************************
+ *
+ * XMMATRIX operators and methods
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX::XMMATRIX
+(
+ float m00, float m01, float m02, float m03,
+ float m10, float m11, float m12, float m13,
+ float m20, float m21, float m22, float m23,
+ float m30, float m31, float m32, float m33
+)
+{
+ r[0] = XMVectorSet(m00, m01, m02, m03);
+ r[1] = XMVectorSet(m10, m11, m12, m13);
+ r[2] = XMVectorSet(m20, m21, m22, m23);
+ r[3] = XMVectorSet(m30, m31, m32, m33);
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMMATRIX::XMMATRIX
+(
+ const float* pArray
+)
+{
+ assert( pArray != NULL );
+ r[0] = XMLoadFloat4((const XMFLOAT4*)pArray);
+ r[1] = XMLoadFloat4((const XMFLOAT4*)(pArray + 4));
+ r[2] = XMLoadFloat4((const XMFLOAT4*)(pArray + 8));
+ r[3] = XMLoadFloat4((const XMFLOAT4*)(pArray + 12));
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMATRIX::operator- () const
+{
+ XMMATRIX R;
+ R.r[0] = XMVectorNegate( r[0] );
+ R.r[1] = XMVectorNegate( r[1] );
+ R.r[2] = XMVectorNegate( r[2] );
+ R.r[3] = XMVectorNegate( r[3] );
+ return R;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX& XMMATRIX::operator+= (CXMMATRIX M)
+{
+ r[0] = XMVectorAdd( r[0], M.r[0] );
+ r[1] = XMVectorAdd( r[1], M.r[1] );
+ r[2] = XMVectorAdd( r[2], M.r[2] );
+ r[3] = XMVectorAdd( r[3], M.r[3] );
+ return *this;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX& XMMATRIX::operator-= (CXMMATRIX M)
+{
+ r[0] = XMVectorSubtract( r[0], M.r[0] );
+ r[1] = XMVectorSubtract( r[1], M.r[1] );
+ r[2] = XMVectorSubtract( r[2], M.r[2] );
+ r[3] = XMVectorSubtract( r[3], M.r[3] );
+ return *this;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX& XMMATRIX::operator*=(CXMMATRIX M)
+{
+ *this = XMMatrixMultiply( *this, M );
+ return *this;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX& XMMATRIX::operator*= (float S)
+{
+ r[0] = XMVectorScale( r[0], S );
+ r[1] = XMVectorScale( r[1], S );
+ r[2] = XMVectorScale( r[2], S );
+ r[3] = XMVectorScale( r[3], S );
+ return *this;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX& XMMATRIX::operator/= (float S)
+{
+ assert( S != 0.0f );
+ float t = 1.0f / S;
+ r[0] = XMVectorScale( r[0], t );
+ r[1] = XMVectorScale( r[1], t );
+ r[2] = XMVectorScale( r[2], t );
+ r[3] = XMVectorScale( r[3], t );
+ return *this;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMATRIX::operator+ (CXMMATRIX M) const
+{
+ XMMATRIX R;
+ R.r[0] = XMVectorAdd( r[0], M.r[0] );
+ R.r[1] = XMVectorAdd( r[1], M.r[1] );
+ R.r[2] = XMVectorAdd( r[2], M.r[2] );
+ R.r[3] = XMVectorAdd( r[3], M.r[3] );
+ return R;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMATRIX::operator- (CXMMATRIX M) const
+{
+ XMMATRIX R;
+ R.r[0] = XMVectorSubtract( r[0], M.r[0] );
+ R.r[1] = XMVectorSubtract( r[1], M.r[1] );
+ R.r[2] = XMVectorSubtract( r[2], M.r[2] );
+ R.r[3] = XMVectorSubtract( r[3], M.r[3] );
+ return R;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMATRIX::operator*(CXMMATRIX M) const
+{
+ return XMMatrixMultiply(*this, M);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMATRIX::operator* (float S) const
+{
+ XMMATRIX R;
+ R.r[0] = XMVectorScale( r[0], S );
+ R.r[1] = XMVectorScale( r[1], S );
+ R.r[2] = XMVectorScale( r[2], S );
+ R.r[3] = XMVectorScale( r[3], S );
+ return R;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMATRIX::operator/ (float S) const
+{
+ assert( S != 0.0f );
+ XMMATRIX R;
+ float t = 1.0f / S;
+ R.r[0] = XMVectorScale( r[0], t );
+ R.r[1] = XMVectorScale( r[1], t );
+ R.r[2] = XMVectorScale( r[2], t );
+ R.r[3] = XMVectorScale( r[3], t );
+ return R;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX operator*
+(
+ float S,
+ CXMMATRIX M
+)
+{
+ XMMATRIX R;
+ R.r[0] = XMVectorScale( M.r[0], S );
+ R.r[1] = XMVectorScale( M.r[1], S );
+ R.r[2] = XMVectorScale( M.r[2], S );
+ R.r[3] = XMVectorScale( M.r[3], S );
+ return R;
+}
+
+/****************************************************************************
+ *
+ * XMFLOAT3X3 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT3X3::XMFLOAT3X3
+(
+ float m00, float m01, float m02,
+ float m10, float m11, float m12,
+ float m20, float m21, float m22
+)
+{
+ m[0][0] = m00;
+ m[0][1] = m01;
+ m[0][2] = m02;
+
+ m[1][0] = m10;
+ m[1][1] = m11;
+ m[1][2] = m12;
+
+ m[2][0] = m20;
+ m[2][1] = m21;
+ m[2][2] = m22;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMFLOAT3X3::XMFLOAT3X3
+(
+ const float* pArray
+)
+{
+ assert( pArray != NULL );
+ for (size_t Row = 0; Row < 3; Row++)
+ {
+ for (size_t Column = 0; Column < 3; Column++)
+ {
+ m[Row][Column] = pArray[Row * 3 + Column];
+ }
+ }
+}
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT3X3& XMFLOAT3X3::operator=
+(
+ const XMFLOAT3X3& Float3x3
+)
+{
+ _11 = Float3x3._11;
+ _12 = Float3x3._12;
+ _13 = Float3x3._13;
+ _21 = Float3x3._21;
+ _22 = Float3x3._22;
+ _23 = Float3x3._23;
+ _31 = Float3x3._31;
+ _32 = Float3x3._32;
+ _33 = Float3x3._33;
+
+ return *this;
+}
+
+/****************************************************************************
+ *
+ * XMFLOAT4X3 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT4X3::XMFLOAT4X3
+(
+ float m00, float m01, float m02,
+ float m10, float m11, float m12,
+ float m20, float m21, float m22,
+ float m30, float m31, float m32
+)
+{
+ m[0][0] = m00;
+ m[0][1] = m01;
+ m[0][2] = m02;
+
+ m[1][0] = m10;
+ m[1][1] = m11;
+ m[1][2] = m12;
+
+ m[2][0] = m20;
+ m[2][1] = m21;
+ m[2][2] = m22;
+
+ m[3][0] = m30;
+ m[3][1] = m31;
+ m[3][2] = m32;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMFLOAT4X3::XMFLOAT4X3
+(
+ const float* pArray
+)
+{
+ assert( pArray != NULL );
+
+ m[0][0] = pArray[0];
+ m[0][1] = pArray[1];
+ m[0][2] = pArray[2];
+
+ m[1][0] = pArray[3];
+ m[1][1] = pArray[4];
+ m[1][2] = pArray[5];
+
+ m[2][0] = pArray[6];
+ m[2][1] = pArray[7];
+ m[2][2] = pArray[8];
+
+ m[3][0] = pArray[9];
+ m[3][1] = pArray[10];
+ m[3][2] = pArray[11];
+}
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT4X3& XMFLOAT4X3::operator=
+(
+ const XMFLOAT4X3& Float4x3
+)
+{
+ XMVECTOR V1 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._11);
+ XMVECTOR V2 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._22);
+ XMVECTOR V3 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._33);
+
+ XMStoreFloat4((XMFLOAT4*)&_11, V1);
+ XMStoreFloat4((XMFLOAT4*)&_22, V2);
+ XMStoreFloat4((XMFLOAT4*)&_33, V3);
+
+ return *this;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT4X3A& XMFLOAT4X3A::operator=
+(
+ const XMFLOAT4X3A& Float4x3
+)
+{
+ XMVECTOR V1 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._11);
+ XMVECTOR V2 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._22);
+ XMVECTOR V3 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._33);
+
+ XMStoreFloat4A((XMFLOAT4A*)&_11, V1);
+ XMStoreFloat4A((XMFLOAT4A*)&_22, V2);
+ XMStoreFloat4A((XMFLOAT4A*)&_33, V3);
+
+ return *this;
+}
+
+/****************************************************************************
+ *
+ * XMFLOAT4X4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT4X4::XMFLOAT4X4
+(
+ float m00, float m01, float m02, float m03,
+ float m10, float m11, float m12, float m13,
+ float m20, float m21, float m22, float m23,
+ float m30, float m31, float m32, float m33
+)
+{
+ m[0][0] = m00;
+ m[0][1] = m01;
+ m[0][2] = m02;
+ m[0][3] = m03;
+
+ m[1][0] = m10;
+ m[1][1] = m11;
+ m[1][2] = m12;
+ m[1][3] = m13;
+
+ m[2][0] = m20;
+ m[2][1] = m21;
+ m[2][2] = m22;
+ m[2][3] = m23;
+
+ m[3][0] = m30;
+ m[3][1] = m31;
+ m[3][2] = m32;
+ m[3][3] = m33;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMFLOAT4X4::XMFLOAT4X4
+(
+ const float* pArray
+)
+{
+ assert( pArray != NULL );
+
+ m[0][0] = pArray[0];
+ m[0][1] = pArray[1];
+ m[0][2] = pArray[2];
+ m[0][3] = pArray[3];
+
+ m[1][0] = pArray[4];
+ m[1][1] = pArray[5];
+ m[1][2] = pArray[6];
+ m[1][3] = pArray[7];
+
+ m[2][0] = pArray[8];
+ m[2][1] = pArray[9];
+ m[2][2] = pArray[10];
+ m[2][3] = pArray[11];
+
+ m[3][0] = pArray[12];
+ m[3][1] = pArray[13];
+ m[3][2] = pArray[14];
+ m[3][3] = pArray[15];
+}
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT4X4& XMFLOAT4X4::operator=
+(
+ const XMFLOAT4X4& Float4x4
+)
+{
+ XMVECTOR V1 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._11);
+ XMVECTOR V2 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._21);
+ XMVECTOR V3 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._31);
+ XMVECTOR V4 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._41);
+
+ XMStoreFloat4((XMFLOAT4*)&_11, V1);
+ XMStoreFloat4((XMFLOAT4*)&_21, V2);
+ XMStoreFloat4((XMFLOAT4*)&_31, V3);
+ XMStoreFloat4((XMFLOAT4*)&_41, V4);
+
+ return *this;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT4X4A& XMFLOAT4X4A::operator=
+(
+ const XMFLOAT4X4A& Float4x4
+)
+{
+ XMVECTOR V1 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._11);
+ XMVECTOR V2 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._21);
+ XMVECTOR V3 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._31);
+ XMVECTOR V4 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._41);
+
+ XMStoreFloat4A((XMFLOAT4A*)&_11, V1);
+ XMStoreFloat4A((XMFLOAT4A*)&_21, V2);
+ XMStoreFloat4A((XMFLOAT4A*)&_31, V3);
+ XMStoreFloat4A((XMFLOAT4A*)&_41, V4);
+
+ return *this;
+}
+
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl
new file mode 100644
index 00000000..f3461e6c
--- /dev/null
+++ b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl
@@ -0,0 +1,2501 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathMisc.inl -- SIMD C++ Math library
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//-------------------------------------------------------------------------------------
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+/****************************************************************************
+ *
+ * Quaternion
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline bool XMQuaternionEqual
+(
+ FXMVECTOR Q1,
+ FXMVECTOR Q2
+)
+{
+ return XMVector4Equal(Q1, Q2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMQuaternionNotEqual
+(
+ FXMVECTOR Q1,
+ FXMVECTOR Q2
+)
+{
+ return XMVector4NotEqual(Q1, Q2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMQuaternionIsNaN
+(
+ FXMVECTOR Q
+)
+{
+ return XMVector4IsNaN(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMQuaternionIsInfinite
+(
+ FXMVECTOR Q
+)
+{
+ return XMVector4IsInfinite(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMQuaternionIsIdentity
+(
+ FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ return XMVector4Equal(Q, g_XMIdentityR3.v);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionDot
+(
+ FXMVECTOR Q1,
+ FXMVECTOR Q2
+)
+{
+ return XMVector4Dot(Q1, Q2);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionMultiply
+(
+ FXMVECTOR Q1,
+ FXMVECTOR Q2
+)
+{
+ // Returns the product Q2*Q1 (which is the concatenation of a rotation Q1 followed by the rotation Q2)
+
+ // [ (Q2.w * Q1.x) + (Q2.x * Q1.w) + (Q2.y * Q1.z) - (Q2.z * Q1.y),
+ // (Q2.w * Q1.y) - (Q2.x * Q1.z) + (Q2.y * Q1.w) + (Q2.z * Q1.x),
+ // (Q2.w * Q1.z) + (Q2.x * Q1.y) - (Q2.y * Q1.x) + (Q2.z * Q1.w),
+ // (Q2.w * Q1.w) - (Q2.x * Q1.x) - (Q2.y * Q1.y) - (Q2.z * Q1.z) ]
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result = {
+ (Q2.vector4_f32[3] * Q1.vector4_f32[0]) + (Q2.vector4_f32[0] * Q1.vector4_f32[3]) + (Q2.vector4_f32[1] * Q1.vector4_f32[2]) - (Q2.vector4_f32[2] * Q1.vector4_f32[1]),
+ (Q2.vector4_f32[3] * Q1.vector4_f32[1]) - (Q2.vector4_f32[0] * Q1.vector4_f32[2]) + (Q2.vector4_f32[1] * Q1.vector4_f32[3]) + (Q2.vector4_f32[2] * Q1.vector4_f32[0]),
+ (Q2.vector4_f32[3] * Q1.vector4_f32[2]) + (Q2.vector4_f32[0] * Q1.vector4_f32[1]) - (Q2.vector4_f32[1] * Q1.vector4_f32[0]) + (Q2.vector4_f32[2] * Q1.vector4_f32[3]),
+ (Q2.vector4_f32[3] * Q1.vector4_f32[3]) - (Q2.vector4_f32[0] * Q1.vector4_f32[0]) - (Q2.vector4_f32[1] * Q1.vector4_f32[1]) - (Q2.vector4_f32[2] * Q1.vector4_f32[2]) };
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f};
+ static const XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f};
+ static const XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f};
+
+ __n64 Q2L = vget_low_f32(Q2);
+ __n64 Q2H = vget_high_f32(Q2);
+
+ __n128 Q2X = vdupq_lane_f32( Q2L, 0 );
+ __n128 Q2Y = vdupq_lane_f32( Q2L, 1 );
+ __n128 Q2Z = vdupq_lane_f32( Q2H, 0 );
+ __n128 vResult = vdupq_lane_f32( Q2H, 1 );
+ vResult = vmulq_f32(vResult,Q1);
+
+ // Mul by Q1WZYX
+ __n128 vTemp = vrev64q_u32(Q1);
+ vTemp = vcombine_f32( vget_high_f32(vTemp), vget_low_f32(vTemp) );
+ Q2X = vmulq_f32(Q2X,vTemp);
+ vResult = vmlaq_f32( vResult, Q2X, ControlWZYX );
+
+ // Mul by Q1ZWXY
+ vTemp = vrev64q_u32(vTemp);
+ Q2Y = vmulq_f32(Q2Y,vTemp);
+ vResult = vmlaq_f32(vResult, Q2Y, ControlZWXY);
+
+ // Mul by Q1YXWZ
+ vTemp = vrev64q_u32(vTemp);
+ vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp));
+ Q2Z = vmulq_f32(Q2Z,vTemp);
+ vResult = vmlaq_f32(vResult, Q2Z, ControlYXWZ);
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f};
+ static const XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f};
+ static const XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f};
+ // Copy to SSE registers and use as few as possible for x86
+ XMVECTOR Q2X = Q2;
+ XMVECTOR Q2Y = Q2;
+ XMVECTOR Q2Z = Q2;
+ XMVECTOR vResult = Q2;
+ // Splat with one instruction
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,3,3,3));
+ Q2X = XM_PERMUTE_PS(Q2X,_MM_SHUFFLE(0,0,0,0));
+ Q2Y = XM_PERMUTE_PS(Q2Y,_MM_SHUFFLE(1,1,1,1));
+ Q2Z = XM_PERMUTE_PS(Q2Z,_MM_SHUFFLE(2,2,2,2));
+ // Retire Q1 and perform Q1*Q2W
+ vResult = _mm_mul_ps(vResult,Q1);
+ XMVECTOR Q1Shuffle = Q1;
+ // Shuffle the copies of Q1
+ Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
+ // Mul by Q1WZYX
+ Q2X = _mm_mul_ps(Q2X,Q1Shuffle);
+ Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(2,3,0,1));
+ // Flip the signs on y and z
+ Q2X = _mm_mul_ps(Q2X,ControlWZYX);
+ // Mul by Q1ZWXY
+ Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle);
+ Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
+ // Flip the signs on z and w
+ Q2Y = _mm_mul_ps(Q2Y,ControlZWXY);
+ // Mul by Q1YXWZ
+ Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle);
+ vResult = _mm_add_ps(vResult,Q2X);
+ // Flip the signs on x and w
+ Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ);
+ Q2Y = _mm_add_ps(Q2Y,Q2Z);
+ vResult = _mm_add_ps(vResult,Q2Y);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionLengthSq
+(
+ FXMVECTOR Q
+)
+{
+ return XMVector4LengthSq(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionReciprocalLength
+(
+ FXMVECTOR Q
+)
+{
+ return XMVector4ReciprocalLength(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionLength
+(
+ FXMVECTOR Q
+)
+{
+ return XMVector4Length(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionNormalizeEst
+(
+ FXMVECTOR Q
+)
+{
+ return XMVector4NormalizeEst(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionNormalize
+(
+ FXMVECTOR Q
+)
+{
+ return XMVector4Normalize(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionConjugate
+(
+ FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result = {
+ -Q.vector4_f32[0],
+ -Q.vector4_f32[1],
+ -Q.vector4_f32[2],
+ Q.vector4_f32[3]
+ };
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};
+ return vmulq_f32(Q, NegativeOne3.v );
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};
+ return _mm_mul_ps(Q,NegativeOne3);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionInverse
+(
+ FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ const XMVECTOR Zero = XMVectorZero();
+
+ XMVECTOR L = XMVector4LengthSq(Q);
+ XMVECTOR Conjugate = XMQuaternionConjugate(Q);
+
+ XMVECTOR Control = XMVectorLessOrEqual(L, g_XMEpsilon.v);
+
+ XMVECTOR Result = XMVectorDivide(Conjugate, L);
+
+ Result = XMVectorSelect(Result, Zero, Control);
+
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionLn
+(
+ FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
+
+ XMVECTOR QW = XMVectorSplatW(Q);
+ XMVECTOR Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v);
+
+ XMVECTOR ControlW = XMVectorInBounds(QW, OneMinusEpsilon.v);
+
+ XMVECTOR Theta = XMVectorACos(QW);
+ XMVECTOR SinTheta = XMVectorSin(Theta);
+
+ XMVECTOR S = XMVectorDivide(Theta,SinTheta);
+
+ XMVECTOR Result = XMVectorMultiply(Q0, S);
+ Result = XMVectorSelect(Q0, Result, ControlW);
+
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionExp
+(
+ FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR Theta = XMVector3Length(Q);
+
+ XMVECTOR SinTheta, CosTheta;
+ XMVectorSinCos(&SinTheta, &CosTheta, Theta);
+
+ XMVECTOR S = XMVectorDivide(SinTheta, Theta);
+
+ XMVECTOR Result = XMVectorMultiply(Q, S);
+
+ const XMVECTOR Zero = XMVectorZero();
+ XMVECTOR Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v);
+ Result = XMVectorSelect(Result, Q, Control);
+
+ Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);
+
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionSlerp
+(
+ FXMVECTOR Q0,
+ FXMVECTOR Q1,
+ float t
+)
+{
+ XMVECTOR T = XMVectorReplicate(t);
+ return XMQuaternionSlerpV(Q0, Q1, T);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionSlerpV
+(
+ FXMVECTOR Q0,
+ FXMVECTOR Q1,
+ FXMVECTOR T
+)
+{
+ assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
+
+ // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
+
+ XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1);
+
+ const XMVECTOR Zero = XMVectorZero();
+ XMVECTOR Control = XMVectorLess(CosOmega, Zero);
+ XMVECTOR Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control);
+
+ CosOmega = XMVectorMultiply(CosOmega, Sign);
+
+ Control = XMVectorLess(CosOmega, OneMinusEpsilon);
+
+ XMVECTOR SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v);
+ SinOmega = XMVectorSqrt(SinOmega);
+
+ XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega);
+
+ XMVECTOR SignMask = XMVectorSplatSignMask();
+ XMVECTOR V01 = XMVectorShiftLeft(T, Zero, 2);
+ SignMask = XMVectorShiftLeft(SignMask, Zero, 3);
+ V01 = XMVectorXorInt(V01, SignMask);
+ V01 = XMVectorAdd(g_XMIdentityR0.v, V01);
+
+ XMVECTOR InvSinOmega = XMVectorReciprocal(SinOmega);
+
+ XMVECTOR S0 = XMVectorMultiply(V01, Omega);
+ S0 = XMVectorSin(S0);
+ S0 = XMVectorMultiply(S0, InvSinOmega);
+
+ S0 = XMVectorSelect(V01, S0, Control);
+
+ XMVECTOR S1 = XMVectorSplatY(S0);
+ S0 = XMVectorSplatX(S0);
+
+ S1 = XMVectorMultiply(S1, Sign);
+
+ XMVECTOR Result = XMVectorMultiply(Q0, S0);
+ Result = XMVectorMultiplyAdd(Q1, S1, Result);
+
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
+ static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000};
+ static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000};
+
+ XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1);
+
+ const XMVECTOR Zero = XMVectorZero();
+ XMVECTOR Control = XMVectorLess(CosOmega, Zero);
+ XMVECTOR Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control);
+
+ CosOmega = _mm_mul_ps(CosOmega, Sign);
+
+ Control = XMVectorLess(CosOmega, OneMinusEpsilon);
+
+ XMVECTOR SinOmega = _mm_mul_ps(CosOmega,CosOmega);
+ SinOmega = _mm_sub_ps(g_XMOne,SinOmega);
+ SinOmega = _mm_sqrt_ps(SinOmega);
+
+ XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega);
+
+ XMVECTOR V01 = XM_PERMUTE_PS(T,_MM_SHUFFLE(2,3,0,1));
+ V01 = _mm_and_ps(V01,MaskXY);
+ V01 = _mm_xor_ps(V01,SignMask2);
+ V01 = _mm_add_ps(g_XMIdentityR0, V01);
+
+ XMVECTOR S0 = _mm_mul_ps(V01, Omega);
+ S0 = XMVectorSin(S0);
+ S0 = _mm_div_ps(S0, SinOmega);
+
+ S0 = XMVectorSelect(V01, S0, Control);
+
+ XMVECTOR S1 = XMVectorSplatY(S0);
+ S0 = XMVectorSplatX(S0);
+
+ S1 = _mm_mul_ps(S1, Sign);
+ XMVECTOR Result = _mm_mul_ps(Q0, S0);
+ S1 = _mm_mul_ps(S1, Q1);
+ Result = _mm_add_ps(Result,S1);
+ return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionSquad
+(
+ FXMVECTOR Q0,
+ FXMVECTOR Q1,
+ FXMVECTOR Q2,
+ GXMVECTOR Q3,
+ float t
+)
+{
+ XMVECTOR T = XMVectorReplicate(t);
+ return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionSquadV
+(
+ FXMVECTOR Q0,
+ FXMVECTOR Q1,
+ FXMVECTOR Q2,
+ GXMVECTOR Q3,
+ CXMVECTOR T
+)
+{
+ assert( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) );
+
+ XMVECTOR TP = T;
+ const XMVECTOR Two = XMVectorSplatConstant(2, 0);
+
+ XMVECTOR Q03 = XMQuaternionSlerpV(Q0, Q3, T);
+ XMVECTOR Q12 = XMQuaternionSlerpV(Q1, Q2, T);
+
+ TP = XMVectorNegativeMultiplySubtract(TP, TP, TP);
+ TP = XMVectorMultiply(TP, Two);
+
+ XMVECTOR Result = XMQuaternionSlerpV(Q03, Q12, TP);
+
+ return Result;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMQuaternionSquadSetup
+(
+ XMVECTOR* pA,
+ XMVECTOR* pB,
+ XMVECTOR* pC,
+ FXMVECTOR Q0,
+ FXMVECTOR Q1,
+ FXMVECTOR Q2,
+ GXMVECTOR Q3
+)
+{
+ assert(pA);
+ assert(pB);
+ assert(pC);
+
+ XMVECTOR LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2));
+ XMVECTOR LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2));
+ XMVECTOR SQ2 = XMVectorNegate(Q2);
+
+ XMVECTOR Control1 = XMVectorLess(LS12, LD12);
+ SQ2 = XMVectorSelect(Q2, SQ2, Control1);
+
+ XMVECTOR LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1));
+ XMVECTOR LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1));
+ XMVECTOR SQ0 = XMVectorNegate(Q0);
+
+ XMVECTOR LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3));
+ XMVECTOR LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3));
+ XMVECTOR SQ3 = XMVectorNegate(Q3);
+
+ XMVECTOR Control0 = XMVectorLess(LS01, LD01);
+ XMVECTOR Control2 = XMVectorLess(LS23, LD23);
+
+ SQ0 = XMVectorSelect(Q0, SQ0, Control0);
+ SQ3 = XMVectorSelect(Q3, SQ3, Control2);
+
+ XMVECTOR InvQ1 = XMQuaternionInverse(Q1);
+ XMVECTOR InvQ2 = XMQuaternionInverse(SQ2);
+
+ XMVECTOR LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0));
+ XMVECTOR LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2));
+ XMVECTOR LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1));
+ XMVECTOR LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3));
+
+ const XMVECTOR NegativeOneQuarter = XMVectorSplatConstant(-1, 2);
+
+ XMVECTOR ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter);
+ XMVECTOR ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter);
+ ExpQ02 = XMQuaternionExp(ExpQ02);
+ ExpQ13 = XMQuaternionExp(ExpQ13);
+
+ *pA = XMQuaternionMultiply(Q1, ExpQ02);
+ *pB = XMQuaternionMultiply(SQ2, ExpQ13);
+ *pC = SQ2;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionBaryCentric
+(
+ FXMVECTOR Q0,
+ FXMVECTOR Q1,
+ FXMVECTOR Q2,
+ float f,
+ float g
+)
+{
+ float s = f + g;
+
+ XMVECTOR Result;
+ if ((s < 0.00001f) && (s > -0.00001f))
+ {
+ Result = Q0;
+ }
+ else
+ {
+ XMVECTOR Q01 = XMQuaternionSlerp(Q0, Q1, s);
+ XMVECTOR Q02 = XMQuaternionSlerp(Q0, Q2, s);
+
+ Result = XMQuaternionSlerp(Q01, Q02, g / s);
+ }
+
+ return Result;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionBaryCentricV
+(
+ FXMVECTOR Q0,
+ FXMVECTOR Q1,
+ FXMVECTOR Q2,
+ GXMVECTOR F,
+ CXMVECTOR G
+)
+{
+ assert( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) );
+ assert( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) );
+
+ const XMVECTOR Epsilon = XMVectorSplatConstant(1, 16);
+
+ XMVECTOR S = XMVectorAdd(F, G);
+
+ XMVECTOR Result;
+ if (XMVector4InBounds(S, Epsilon))
+ {
+ Result = Q0;
+ }
+ else
+ {
+ XMVECTOR Q01 = XMQuaternionSlerpV(Q0, Q1, S);
+ XMVECTOR Q02 = XMQuaternionSlerpV(Q0, Q2, S);
+ XMVECTOR GS = XMVectorReciprocal(S);
+ GS = XMVectorMultiply(G, GS);
+
+ Result = XMQuaternionSlerpV(Q01, Q02, GS);
+ }
+
+ return Result;
+}
+
+//------------------------------------------------------------------------------
+// Transformation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionIdentity()
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ return g_XMIdentityR3.v;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionRotationRollPitchYaw
+(
+ float Pitch,
+ float Yaw,
+ float Roll
+)
+{
+ XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
+ XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
+ return Q;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionRotationRollPitchYawFromVector
+(
+ FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Sign = {1.0f, -1.0f, -1.0f, 1.0f};
+
+ XMVECTOR HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);
+
+ XMVECTOR SinAngles, CosAngles;
+ XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
+
+ XMVECTOR P0 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(SinAngles, CosAngles);
+ XMVECTOR Y0 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(SinAngles, CosAngles);
+ XMVECTOR R0 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(SinAngles, CosAngles);
+ XMVECTOR P1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(CosAngles, SinAngles);
+ XMVECTOR Y1 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(CosAngles, SinAngles);
+ XMVECTOR R1 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(CosAngles, SinAngles);
+
+ XMVECTOR Q1 = XMVectorMultiply(P1, Sign.v);
+ XMVECTOR Q0 = XMVectorMultiply(P0, Y0);
+ Q1 = XMVectorMultiply(Q1, Y1);
+ Q0 = XMVectorMultiply(Q0, R0);
+ XMVECTOR Q = XMVectorMultiplyAdd(Q1, R1, Q0);
+
+ return Q;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionRotationNormal
+(
+ FXMVECTOR NormalAxis,
+ float Angle
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v);
+
+ float SinV, CosV;
+ XMScalarSinCos(&SinV, &CosV, 0.5f * Angle);
+
+ XMVECTOR Scale = XMVectorSet( SinV, SinV, SinV, CosV );
+ return XMVectorMultiply(N, Scale);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3);
+ N = _mm_or_ps(N,g_XMIdentityR3);
+ XMVECTOR Scale = _mm_set_ps1(0.5f * Angle);
+ XMVECTOR vSine;
+ XMVECTOR vCosine;
+ XMVectorSinCos(&vSine,&vCosine,Scale);
+ Scale = _mm_and_ps(vSine,g_XMMask3);
+ vCosine = _mm_and_ps(vCosine,g_XMMaskW);
+ Scale = _mm_or_ps(Scale,vCosine);
+ N = _mm_mul_ps(N,Scale);
+ return N;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionRotationAxis
+(
+ FXMVECTOR Axis,
+ float Angle
+)
+{
+ assert(!XMVector3Equal(Axis, XMVectorZero()));
+ assert(!XMVector3IsInfinite(Axis));
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR Normal = XMVector3Normalize(Axis);
+ XMVECTOR Q = XMQuaternionRotationNormal(Normal, Angle);
+ return Q;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionRotationMatrix
+(
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTORF32 q;
+ float r22 = M.m[2][2];
+ if (r22 <= 0.f) // x^2 + y^2 >= z^2 + w^2
+ {
+ float dif10 = M.m[1][1] - M.m[0][0];
+ float omr22 = 1.f - r22;
+ if (dif10 <= 0.f) // x^2 >= y^2
+ {
+ float fourXSqr = omr22 - dif10;
+ float inv4x = 0.5f / sqrtf(fourXSqr);
+ q.f[0] = fourXSqr*inv4x;
+ q.f[1] = (M.m[0][1] + M.m[1][0])*inv4x;
+ q.f[2] = (M.m[0][2] + M.m[2][0])*inv4x;
+ q.f[3] = (M.m[1][2] - M.m[2][1])*inv4x;
+ }
+ else // y^2 >= x^2
+ {
+ float fourYSqr = omr22 + dif10;
+ float inv4y = 0.5f / sqrtf(fourYSqr);
+ q.f[0] = (M.m[0][1] + M.m[1][0])*inv4y;
+ q.f[1] = fourYSqr*inv4y;
+ q.f[2] = (M.m[1][2] + M.m[2][1])*inv4y;
+ q.f[3] = (M.m[2][0] - M.m[0][2])*inv4y;
+ }
+ }
+ else // z^2 + w^2 >= x^2 + y^2
+ {
+ float sum10 = M.m[1][1] + M.m[0][0];
+ float opr22 = 1.f + r22;
+ if (sum10 <= 0.f) // z^2 >= w^2
+ {
+ float fourZSqr = opr22 - sum10;
+ float inv4z = 0.5f / sqrtf(fourZSqr);
+ q.f[0] = (M.m[0][2] + M.m[2][0])*inv4z;
+ q.f[1] = (M.m[1][2] + M.m[2][1])*inv4z;
+ q.f[2] = fourZSqr*inv4z;
+ q.f[3] = (M.m[0][1] - M.m[1][0])*inv4z;
+ }
+ else // w^2 >= z^2
+ {
+ float fourWSqr = opr22 + sum10;
+ float inv4w = 0.5f / sqrtf(fourWSqr);
+ q.f[0] = (M.m[1][2] - M.m[2][1])*inv4w;
+ q.f[1] = (M.m[2][0] - M.m[0][2])*inv4w;
+ q.f[2] = (M.m[0][1] - M.m[1][0])*inv4w;
+ q.f[3] = fourWSqr*inv4w;
+ }
+ }
+ return q.v;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const XMVECTORF32 XMPMMP = {+1.0f, -1.0f, -1.0f, +1.0f};
+ static const XMVECTORF32 XMMPMP = {-1.0f, +1.0f, -1.0f, +1.0f};
+ static const XMVECTORF32 XMMMPP = {-1.0f, -1.0f, +1.0f, +1.0f};
+ static const XMVECTORU32 Select0110 = { XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0 };
+ static const XMVECTORU32 Select0010 = { XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 };
+
+ XMVECTOR r0 = M.r[0];
+ XMVECTOR r1 = M.r[1];
+ XMVECTOR r2 = M.r[2];
+
+ XMVECTOR r00 = vdupq_lane_f32(vget_low_f32(r0), 0);
+ XMVECTOR r11 = vdupq_lane_f32(vget_low_f32(r1), 1);
+ XMVECTOR r22 = vdupq_lane_f32(vget_high_f32(r2), 0);
+
+ // x^2 >= y^2 equivalent to r11 - r00 <= 0
+ XMVECTOR r11mr00 = vsubq_f32(r11, r00);
+ XMVECTOR x2gey2 = vcleq_f32(r11mr00, g_XMZero);
+
+ // z^2 >= w^2 equivalent to r11 + r00 <= 0
+ XMVECTOR r11pr00 = vaddq_f32(r11, r00);
+ XMVECTOR z2gew2 = vcleq_f32(r11pr00, g_XMZero);
+
+ // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0
+ XMVECTOR x2py2gez2pw2 = vcleq_f32(r22, g_XMZero);
+
+ // (4*x^2, 4*y^2, 4*z^2, 4*w^2)
+ XMVECTOR t0 = vmulq_f32( XMPMMP, r00 );
+ XMVECTOR x2y2z2w2 = vmlaq_f32( t0, XMMPMP, r11 );
+ x2y2z2w2 = vmlaq_f32( x2y2z2w2, XMMMPP, r22 );
+ x2y2z2w2 = vaddq_f32( x2y2z2w2, g_XMOne );
+
+ // (r01, r02, r12, r11)
+ t0 = vextq_f32(r0, r0, 1);
+ XMVECTOR t1 = vextq_f32(r1, r1, 1);
+ t0 = vcombine_f32( vget_low_f32(t0), vrev64_f32( vget_low_f32( t1 ) ) );
+
+ // (r10, r20, r21, r10)
+ t1 = vextq_f32(r2, r2, 3);
+ XMVECTOR r10 = vdupq_lane_f32( vget_low_f32(r1), 0 );
+ t1 = vbslq_f32( Select0110, t1, r10 );
+
+ // (4*x*y, 4*x*z, 4*y*z, unused)
+ XMVECTOR xyxzyz = vaddq_f32(t0, t1);
+
+ // (r21, r20, r10, r10)
+ t0 = vcombine_f32( vrev64_f32( vget_low_f32(r2) ), vget_low_f32(r10) );
+
+ // (r12, r02, r01, r12)
+ XMVECTOR t2 = vcombine_f32( vrev64_f32( vget_high_f32(r0) ), vrev64_f32( vget_low_f32(r0) ) );
+ XMVECTOR t3 = vdupq_lane_f32( vget_high_f32(r1), 0 );
+ t1 = vbslq_f32( Select0110, t2, t3 );
+
+ // (4*x*w, 4*y*w, 4*z*w, unused)
+ XMVECTOR xwywzw = vsubq_f32(t0, t1);
+ xwywzw = vmulq_f32(XMMPMP, xwywzw);
+
+ // (4*x*x, 4*x*y, 4*x*z, 4*x*w)
+ t0 = vextq_f32( xyxzyz, xyxzyz, 3 );
+ t1 = vbslq_f32( Select0110, t0, x2y2z2w2 );
+ t2 = vdupq_lane_f32( vget_low_f32(xwywzw), 0 );
+ XMVECTOR tensor0 = vbslq_f32( g_XMSelect1110, t1, t2 );
+
+ // (4*y*x, 4*y*y, 4*y*z, 4*y*w)
+ t0 = vbslq_f32( g_XMSelect1011, xyxzyz, x2y2z2w2 );
+ t1 = vdupq_lane_f32( vget_low_f32(xwywzw), 1 );
+ XMVECTOR tensor1 = vbslq_f32( g_XMSelect1110, t0, t1 );
+
+ // (4*z*x, 4*z*y, 4*z*z, 4*z*w)
+ t0 = vextq_f32(xyxzyz, xyxzyz, 1);
+ t1 = vcombine_f32( vget_low_f32(t0), vrev64_f32( vget_high_f32(xwywzw) ) );
+ XMVECTOR tensor2 = vbslq_f32( Select0010, x2y2z2w2, t1 );
+
+ // (4*w*x, 4*w*y, 4*w*z, 4*w*w)
+ XMVECTOR tensor3 = vbslq_f32( g_XMSelect1110, xwywzw, x2y2z2w2 );
+
+ // Select the row of the tensor-product matrix that has the largest
+ // magnitude.
+ t0 = vbslq_f32( x2gey2, tensor0, tensor1 );
+ t1 = vbslq_f32( z2gew2, tensor2, tensor3 );
+ t2 = vbslq_f32( x2py2gez2pw2, t0, t1 );
+
+ // Normalize the row. No division by zero is possible because the
+ // quaternion is unit-length (and the row is a nonzero multiple of
+ // the quaternion).
+ t0 = XMVector4Length(t2);
+ return XMVectorDivide(t2, t0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 XMPMMP = {+1.0f, -1.0f, -1.0f, +1.0f};
+ static const XMVECTORF32 XMMPMP = {-1.0f, +1.0f, -1.0f, +1.0f};
+ static const XMVECTORF32 XMMMPP = {-1.0f, -1.0f, +1.0f, +1.0f};
+
+ XMVECTOR r0 = M.r[0]; // (r00, r01, r02, 0)
+ XMVECTOR r1 = M.r[1]; // (r10, r11, r12, 0)
+ XMVECTOR r2 = M.r[2]; // (r20, r21, r22, 0)
+
+ // (r00, r00, r00, r00)
+ XMVECTOR r00 = XM_PERMUTE_PS(r0, _MM_SHUFFLE(0,0,0,0));
+ // (r11, r11, r11, r11)
+ XMVECTOR r11 = XM_PERMUTE_PS(r1, _MM_SHUFFLE(1,1,1,1));
+ // (r22, r22, r22, r22)
+ XMVECTOR r22 = XM_PERMUTE_PS(r2, _MM_SHUFFLE(2,2,2,2));
+
+ // x^2 >= y^2 equivalent to r11 - r00 <= 0
+ // (r11 - r00, r11 - r00, r11 - r00, r11 - r00)
+ XMVECTOR r11mr00 = _mm_sub_ps(r11, r00);
+ XMVECTOR x2gey2 = _mm_cmple_ps(r11mr00, g_XMZero);
+
+ // z^2 >= w^2 equivalent to r11 + r00 <= 0
+ // (r11 + r00, r11 + r00, r11 + r00, r11 + r00)
+ XMVECTOR r11pr00 = _mm_add_ps(r11, r00);
+ XMVECTOR z2gew2 = _mm_cmple_ps(r11pr00, g_XMZero);
+
+ // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0
+ XMVECTOR x2py2gez2pw2 = _mm_cmple_ps(r22, g_XMZero);
+
+ // (+r00, -r00, -r00, +r00)
+ XMVECTOR t0 = _mm_mul_ps(XMPMMP, r00);
+
+ // (-r11, +r11, -r11, +r11)
+ XMVECTOR t1 = _mm_mul_ps(XMMPMP, r11);
+
+ // (-r22, -r22, +r22, +r22)
+ XMVECTOR t2 = _mm_mul_ps(XMMMPP, r22);
+
+ // (4*x^2, 4*y^2, 4*z^2, 4*w^2)
+ XMVECTOR x2y2z2w2 = _mm_add_ps(t0, t1);
+ x2y2z2w2 = _mm_add_ps(t2, x2y2z2w2);
+ x2y2z2w2 = _mm_add_ps(x2y2z2w2, g_XMOne);
+
+ // (r01, r02, r12, r11)
+ t0 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1,2,2,1));
+ // (r10, r10, r20, r21)
+ t1 = _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(1,0,0,0));
+ // (r10, r20, r21, r10)
+ t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0));
+ // (4*x*y, 4*x*z, 4*y*z, unused)
+ XMVECTOR xyxzyz = _mm_add_ps(t0, t1);
+
+ // (r21, r20, r10, r10)
+ t0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0,0,0,1));
+ // (r12, r12, r02, r01)
+ t1 = _mm_shuffle_ps(r1, r0, _MM_SHUFFLE(1,2,2,2));
+ // (r12, r02, r01, r12)
+ t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0));
+ // (4*x*w, 4*y*w, 4*z*w, unused)
+ XMVECTOR xwywzw = _mm_sub_ps(t0, t1);
+ xwywzw = _mm_mul_ps(XMMPMP, xwywzw);
+
+ // (4*x^2, 4*y^2, 4*x*y, unused)
+ t0 = _mm_shuffle_ps(x2y2z2w2, xyxzyz, _MM_SHUFFLE(0,0,1,0));
+ // (4*z^2, 4*w^2, 4*z*w, unused)
+ t1 = _mm_shuffle_ps(x2y2z2w2, xwywzw, _MM_SHUFFLE(0,2,3,2));
+ // (4*x*z, 4*y*z, 4*x*w, 4*y*w)
+ t2 = _mm_shuffle_ps(xyxzyz, xwywzw, _MM_SHUFFLE(1,0,2,1));
+
+ // (4*x*x, 4*x*y, 4*x*z, 4*x*w)
+ XMVECTOR tensor0 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,0,2,0));
+ // (4*y*x, 4*y*y, 4*y*z, 4*y*w)
+ XMVECTOR tensor1 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,1,1,2));
+ // (4*z*x, 4*z*y, 4*z*z, 4*z*w)
+ XMVECTOR tensor2 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2,0,1,0));
+ // (4*w*x, 4*w*y, 4*w*z, 4*w*w)
+ XMVECTOR tensor3 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(1,2,3,2));
+
+ // Select the row of the tensor-product matrix that has the largest
+ // magnitude.
+ t0 = _mm_and_ps(x2gey2, tensor0);
+ t1 = _mm_andnot_ps(x2gey2, tensor1);
+ t0 = _mm_or_ps(t0, t1);
+ t1 = _mm_and_ps(z2gew2, tensor2);
+ t2 = _mm_andnot_ps(z2gew2, tensor3);
+ t1 = _mm_or_ps(t1, t2);
+ t0 = _mm_and_ps(x2py2gez2pw2, t0);
+ t1 = _mm_andnot_ps(x2py2gez2pw2, t1);
+ t2 = _mm_or_ps(t0, t1);
+
+ // Normalize the row. No division by zero is possible because the
+ // quaternion is unit-length (and the row is a nonzero multiple of
+ // the quaternion).
+ t0 = XMVector4Length(t2);
+ return _mm_div_ps(t2, t0);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Conversion operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMQuaternionToAxisAngle
+(
+ XMVECTOR* pAxis,
+ float* pAngle,
+ FXMVECTOR Q
+)
+{
+ assert(pAxis);
+ assert(pAngle);
+
+ *pAxis = Q;
+
+ *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q));
+}
+
+/****************************************************************************
+ *
+ * Plane
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline bool XMPlaneEqual
+(
+ FXMVECTOR P1,
+ FXMVECTOR P2
+)
+{
+ return XMVector4Equal(P1, P2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMPlaneNearEqual
+(
+ FXMVECTOR P1,
+ FXMVECTOR P2,
+ FXMVECTOR Epsilon
+)
+{
+ XMVECTOR NP1 = XMPlaneNormalize(P1);
+ XMVECTOR NP2 = XMPlaneNormalize(P2);
+ return XMVector4NearEqual(NP1, NP2, Epsilon);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMPlaneNotEqual
+(
+ FXMVECTOR P1,
+ FXMVECTOR P2
+)
+{
+ return XMVector4NotEqual(P1, P2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMPlaneIsNaN
+(
+ FXMVECTOR P
+)
+{
+ return XMVector4IsNaN(P);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMPlaneIsInfinite
+(
+ FXMVECTOR P
+)
+{
+ return XMVector4IsInfinite(P);
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneDot
+(
+ FXMVECTOR P,
+ FXMVECTOR V
+)
+{
+ return XMVector4Dot(P, V);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneDotCoord
+(
+ FXMVECTOR P,
+ FXMVECTOR V
+)
+{
+ // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);
+ XMVECTOR Result = XMVector4Dot(P, V3);
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneDotNormal
+(
+ FXMVECTOR P,
+ FXMVECTOR V
+)
+{
+ return XMVector3Dot(P, V);
+}
+
+//------------------------------------------------------------------------------
+// XMPlaneNormalizeEst uses a reciprocal estimate and
+// returns QNaN on zero and infinite vectors.
+
+inline XMVECTOR XMPlaneNormalizeEst
+(
+ FXMVECTOR P
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR Result = XMVector3ReciprocalLengthEst(P);
+ return XMVectorMultiply(P, Result);
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product
+ XMVECTOR vDot = _mm_mul_ps(P,P);
+ // x=Dot.y, y=Dot.z
+ XMVECTOR vTemp = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(2,1,2,1));
+ // Result.x = x+y
+ vDot = _mm_add_ss(vDot,vTemp);
+ // x=Dot.z
+ vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
+ // Result.x = (x+y)+z
+ vDot = _mm_add_ss(vDot,vTemp);
+ // Splat x
+ vDot = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(0,0,0,0));
+ // Get the reciprocal
+ vDot = _mm_rsqrt_ps(vDot);
+ // Get the reciprocal
+ vDot = _mm_mul_ps(vDot,P);
+ return vDot;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneNormalize
+(
+ FXMVECTOR P
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ float fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2]));
+ // Prevent divide by zero
+ if (fLengthSq) {
+ fLengthSq = 1.0f/fLengthSq;
+ }
+ {
+ XMVECTOR vResult = {
+ P.vector4_f32[0]*fLengthSq,
+ P.vector4_f32[1]*fLengthSq,
+ P.vector4_f32[2]*fLengthSq,
+ P.vector4_f32[3]*fLengthSq
+ };
+ return vResult;
+ }
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR vLength = XMVector3ReciprocalLength(P);
+ return XMVectorMultiply( P, vLength );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x,y and z only
+ XMVECTOR vLengthSq = _mm_mul_ps(P,P);
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,1,2,1));
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
+ // Prepare for the division
+ XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
+ // Failsafe on zero (Or epsilon) length planes
+ // If the length is infinity, set the elements to zero
+ vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
+ // Reciprocal mul to perform the normalization
+ vResult = _mm_div_ps(P,vResult);
+ // Any that are infinity, set to zero
+ vResult = _mm_and_ps(vResult,vLengthSq);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneIntersectLine
+(
+ FXMVECTOR P,
+ FXMVECTOR LinePoint1,
+ FXMVECTOR LinePoint2
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR V1 = XMVector3Dot(P, LinePoint1);
+ XMVECTOR V2 = XMVector3Dot(P, LinePoint2);
+ XMVECTOR D = XMVectorSubtract(V1, V2);
+
+ XMVECTOR VT = XMPlaneDotCoord(P, LinePoint1);
+ VT = XMVectorDivide(VT, D);
+
+ XMVECTOR Point = XMVectorSubtract(LinePoint2, LinePoint1);
+ Point = XMVectorMultiplyAdd(Point, VT, LinePoint1);
+
+ const XMVECTOR Zero = XMVectorZero();
+ XMVECTOR Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);
+
+ return XMVectorSelect(Point, g_XMQNaN.v, Control);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMPlaneIntersectPlane
+(
+ XMVECTOR* pLinePoint1,
+ XMVECTOR* pLinePoint2,
+ FXMVECTOR P1,
+ FXMVECTOR P2
+)
+{
+ assert(pLinePoint1);
+ assert(pLinePoint2);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR V1 = XMVector3Cross(P2, P1);
+
+ XMVECTOR LengthSq = XMVector3LengthSq(V1);
+
+ XMVECTOR V2 = XMVector3Cross(P2, V1);
+
+ XMVECTOR P1W = XMVectorSplatW(P1);
+ XMVECTOR Point = XMVectorMultiply(V2, P1W);
+
+ XMVECTOR V3 = XMVector3Cross(V1, P1);
+
+ XMVECTOR P2W = XMVectorSplatW(P2);
+ Point = XMVectorMultiplyAdd(V3, P2W, Point);
+
+ XMVECTOR LinePoint1 = XMVectorDivide(Point, LengthSq);
+
+ XMVECTOR LinePoint2 = XMVectorAdd(LinePoint1, V1);
+
+ XMVECTOR Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);
+ *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control);
+ *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneTransform
+(
+ FXMVECTOR P,
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR W = XMVectorSplatW(P);
+ XMVECTOR Z = XMVectorSplatZ(P);
+ XMVECTOR Y = XMVectorSplatY(P);
+ XMVECTOR X = XMVectorSplatX(P);
+
+ XMVECTOR Result = XMVectorMultiply(W, M.r[3]);
+ Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
+ Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
+ Result = XMVectorMultiplyAdd(X, M.r[0], Result);
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMFLOAT4* XMPlaneTransformStream
+(
+ XMFLOAT4* pOutputStream,
+ size_t OutputStride,
+ const XMFLOAT4* pInputStream,
+ size_t InputStride,
+ size_t PlaneCount,
+ CXMMATRIX M
+)
+{
+ return XMVector4TransformStream(pOutputStream,
+ OutputStride,
+ pInputStream,
+ InputStride,
+ PlaneCount,
+ M);
+}
+
+//------------------------------------------------------------------------------
+// Conversion operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneFromPointNormal
+(
+ FXMVECTOR Point,
+ FXMVECTOR Normal
+)
+{
+ XMVECTOR W = XMVector3Dot(Point, Normal);
+ W = XMVectorNegate(W);
+ return XMVectorSelect(W, Normal, g_XMSelect1110.v);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneFromPoints
+(
+ FXMVECTOR Point1,
+ FXMVECTOR Point2,
+ FXMVECTOR Point3
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR V21 = XMVectorSubtract(Point1, Point2);
+ XMVECTOR V31 = XMVectorSubtract(Point1, Point3);
+
+ XMVECTOR N = XMVector3Cross(V21, V31);
+ N = XMVector3Normalize(N);
+
+ XMVECTOR D = XMPlaneDotNormal(N, Point1);
+ D = XMVectorNegate(D);
+
+ XMVECTOR Result = XMVectorSelect(D, N, g_XMSelect1110.v);
+
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+/****************************************************************************
+ *
+ * Color
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorEqual
+(
+ FXMVECTOR C1,
+ FXMVECTOR C2
+)
+{
+ return XMVector4Equal(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorNotEqual
+(
+ FXMVECTOR C1,
+ FXMVECTOR C2
+)
+{
+ return XMVector4NotEqual(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorGreater
+(
+ FXMVECTOR C1,
+ FXMVECTOR C2
+)
+{
+ return XMVector4Greater(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorGreaterOrEqual
+(
+ FXMVECTOR C1,
+ FXMVECTOR C2
+)
+{
+ return XMVector4GreaterOrEqual(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorLess
+(
+ FXMVECTOR C1,
+ FXMVECTOR C2
+)
+{
+ return XMVector4Less(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorLessOrEqual
+(
+ FXMVECTOR C1,
+ FXMVECTOR C2
+)
+{
+ return XMVector4LessOrEqual(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorIsNaN
+(
+ FXMVECTOR C
+)
+{
+ return XMVector4IsNaN(C);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorIsInfinite
+(
+ FXMVECTOR C
+)
+{
+ return XMVector4IsInfinite(C);
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorNegative
+(
+ FXMVECTOR vColor
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ 1.0f - vColor.vector4_f32[0],
+ 1.0f - vColor.vector4_f32[1],
+ 1.0f - vColor.vector4_f32[2],
+ vColor.vector4_f32[3]
+ };
+ return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR vTemp = veorq_u32(vColor,g_XMNegate3);
+ return vaddq_f32(vTemp,g_XMOne3);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Negate only x,y and z.
+ XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3);
+ // Add 1,1,1,0 to -x,-y,-z,w
+ return _mm_add_ps(vTemp,g_XMOne3);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorModulate
+(
+ FXMVECTOR C1,
+ FXMVECTOR C2
+)
+{
+ return XMVectorMultiply(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorAdjustSaturation
+(
+ FXMVECTOR vColor,
+ float fSaturation
+)
+{
+ // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2];
+ // Result = (C - Luminance) * Saturation + Luminance;
+
+#if defined(_XM_NO_INTRINSICS_)
+ const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
+
+ float fLuminance = (vColor.vector4_f32[0]*gvLuminance.f[0])+(vColor.vector4_f32[1]*gvLuminance.f[1])+(vColor.vector4_f32[2]*gvLuminance.f[2]);
+ XMVECTORF32 vResult = {
+ ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance,
+ ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance,
+ ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance,
+ vColor.vector4_f32[3]};
+ return vResult.v;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
+ XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance );
+ XMVECTOR vResult = vsubq_f32(vColor, vLuminance);
+ XMVECTOR vSaturation = vdupq_n_f32(fSaturation);
+ vResult = vmlaq_f32( vLuminance, vResult, vSaturation );
+ return vbslq_f32( g_XMSelect1110, vResult, vColor );
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
+ XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance );
+// Splat fSaturation
+ XMVECTOR vSaturation = _mm_set_ps1(fSaturation);
+// vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;
+ XMVECTOR vResult = _mm_sub_ps(vColor,vLuminance);
+ vResult = _mm_mul_ps(vResult,vSaturation);
+ vResult = _mm_add_ps(vResult,vLuminance);
+// Retain w from the source color
+ vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
+ vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
+ return vResult;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorAdjustContrast
+(
+ FXMVECTOR vColor,
+ float fContrast
+)
+{
+ // Result = (vColor - 0.5f) * fContrast + 0.5f;
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ ((vColor.vector4_f32[0]-0.5f) * fContrast) + 0.5f,
+ ((vColor.vector4_f32[1]-0.5f) * fContrast) + 0.5f,
+ ((vColor.vector4_f32[2]-0.5f) * fContrast) + 0.5f,
+ vColor.vector4_f32[3] // Leave W untouched
+ };
+ return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR vResult = vsubq_f32(vColor, g_XMOneHalf.v);
+ XMVECTOR vContrast = vdupq_n_f32(fContrast);
+ vResult = vmlaq_f32( g_XMOneHalf.v, vResult, vContrast );
+ return vbslq_f32( g_XMSelect1110, vResult, vColor );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale
+ XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf); // Subtract 0.5f from the source (Saving source)
+ vResult = _mm_mul_ps(vResult,vScale); // Mul by scale
+ vResult = _mm_add_ps(vResult,g_XMOneHalf); // Add 0.5f
+// Retain w from the source color
+ vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
+ vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
+ return vResult;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorRGBToHSL( FXMVECTOR rgb )
+{
+ XMVECTOR r = XMVectorSplatX( rgb );
+ XMVECTOR g = XMVectorSplatY( rgb );
+ XMVECTOR b = XMVectorSplatZ( rgb );
+
+ XMVECTOR min = XMVectorMin( r, XMVectorMin( g, b ) );
+ XMVECTOR max = XMVectorMax( r, XMVectorMax( g, b ) );
+
+ XMVECTOR l = XMVectorMultiply( XMVectorAdd( min, max ), g_XMOneHalf );
+
+ XMVECTOR d = XMVectorSubtract( max, min );
+
+ XMVECTOR la = XMVectorSelect( rgb, l, g_XMSelect1110 );
+
+ if ( XMVector3Less( d, g_XMEpsilon ) )
+ {
+ // Achromatic, assume H and S of 0
+ return XMVectorSelect( la, g_XMZero, g_XMSelect1100 );
+ }
+ else
+ {
+ XMVECTOR s, h;
+
+ XMVECTOR d2 = XMVectorAdd( min, max );
+
+ if ( XMVector3Greater( l, g_XMOneHalf ) )
+ {
+ // d / (2-max-min)
+ s = XMVectorDivide( d, XMVectorSubtract( g_XMTwo, d2 ) );
+ }
+ else
+ {
+ // d / (max+min)
+ s = XMVectorDivide( d, d2 );
+ }
+
+ if ( XMVector3Equal( r, max ) )
+ {
+ // Red is max
+ h = XMVectorDivide( XMVectorSubtract( g, b ), d );
+ }
+ else if ( XMVector3Equal( g, max ) )
+ {
+ // Green is max
+ h = XMVectorDivide( XMVectorSubtract( b, r ), d );
+ h = XMVectorAdd( h, g_XMTwo );
+ }
+ else
+ {
+ // Blue is max
+ h = XMVectorDivide( XMVectorSubtract( r, g ), d );
+ h = XMVectorAdd( h, g_XMFour );
+ }
+
+ h = XMVectorDivide( h, g_XMSix );
+
+ if ( XMVector3Less( h, g_XMZero ) )
+ h = XMVectorAdd( h, g_XMOne );
+
+ XMVECTOR lha = XMVectorSelect( la, h, g_XMSelect1100 );
+ return XMVectorSelect( s, lha, g_XMSelect1011 );
+ }
+}
+
+//------------------------------------------------------------------------------
+
+namespace Internal
+{
+
+inline XMVECTOR XMColorHue2Clr( FXMVECTOR p, FXMVECTOR q, FXMVECTOR h )
+{
+ static const XMVECTORF32 oneSixth = { 1.0f/6.0f, 1.0f/6.0f, 1.0f/6.0f, 1.0f/6.0f };
+ static const XMVECTORF32 twoThirds = { 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f };
+
+ XMVECTOR t = h;
+
+ if ( XMVector3Less( t, g_XMZero ) )
+ t = XMVectorAdd( t, g_XMOne );
+
+ if ( XMVector3Greater( t, g_XMOne ) )
+ t = XMVectorSubtract( t, g_XMOne );
+
+ if ( XMVector3Less( t, oneSixth ) )
+ {
+ // p + (q - p) * 6 * t
+ XMVECTOR t1 = XMVectorSubtract( q, p );
+ XMVECTOR t2 = XMVectorMultiply( g_XMSix, t );
+ return XMVectorMultiplyAdd( t1, t2, p );
+ }
+
+ if ( XMVector3Less( t, g_XMOneHalf ) )
+ return q;
+
+ if ( XMVector3Less( t, twoThirds ) )
+ {
+ // p + (q - p) * 6 * (2/3 - t)
+ XMVECTOR t1 = XMVectorSubtract( q, p );
+ XMVECTOR t2 = XMVectorMultiply( g_XMSix, XMVectorSubtract( twoThirds, t ) );
+ return XMVectorMultiplyAdd( t1, t2, p );
+ }
+
+ return p;
+}
+
+}; // namespace Internal
+
+inline XMVECTOR XMColorHSLToRGB( FXMVECTOR hsl )
+{
+ static const XMVECTORF32 oneThird = { 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f };
+
+ XMVECTOR s = XMVectorSplatY( hsl );
+ XMVECTOR l = XMVectorSplatZ( hsl );
+
+ if ( XMVector3NearEqual( s, g_XMZero, g_XMEpsilon ) )
+ {
+ // Achromatic
+ return XMVectorSelect( hsl, l, g_XMSelect1110 );
+ }
+ else
+ {
+ XMVECTOR h = XMVectorSplatX( hsl );
+
+ XMVECTOR q;
+ if ( XMVector3Less( l, g_XMOneHalf ) )
+ {
+ q = XMVectorMultiply( l, XMVectorAdd ( g_XMOne, s ) );
+ }
+ else
+ {
+ q = XMVectorSubtract( XMVectorAdd( l, s ), XMVectorMultiply( l, s ) );
+ }
+
+ XMVECTOR p = XMVectorSubtract( XMVectorMultiply( g_XMTwo, l ), q );
+
+ XMVECTOR r = DirectX::Internal::XMColorHue2Clr( p, q, XMVectorAdd( h, oneThird ) );
+ XMVECTOR g = DirectX::Internal::XMColorHue2Clr( p, q, h );
+ XMVECTOR b = DirectX::Internal::XMColorHue2Clr( p, q, XMVectorSubtract( h, oneThird ) );
+
+ XMVECTOR rg = XMVectorSelect( g, r, g_XMSelect1000 );
+ XMVECTOR ba = XMVectorSelect( hsl, b, g_XMSelect1110 );
+
+ return XMVectorSelect( ba, rg, g_XMSelect1100 );
+ }
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorRGBToHSV( FXMVECTOR rgb )
+{
+ XMVECTOR r = XMVectorSplatX( rgb );
+ XMVECTOR g = XMVectorSplatY( rgb );
+ XMVECTOR b = XMVectorSplatZ( rgb );
+
+ XMVECTOR min = XMVectorMin( r, XMVectorMin( g, b ) );
+ XMVECTOR v = XMVectorMax( r, XMVectorMax( g, b ) );
+
+ XMVECTOR d = XMVectorSubtract( v, min );
+
+ XMVECTOR s = ( XMVector3NearEqual( v, g_XMZero, g_XMEpsilon ) ) ? g_XMZero : XMVectorDivide( d, v );
+
+ if ( XMVector3Less( d, g_XMEpsilon ) )
+ {
+ // Achromatic, assume H of 0
+ XMVECTOR hv = XMVectorSelect( v, g_XMZero, g_XMSelect1000 );
+ XMVECTOR hva = XMVectorSelect( rgb, hv, g_XMSelect1110 );
+ return XMVectorSelect( s, hva, g_XMSelect1011 );
+ }
+ else
+ {
+ XMVECTOR h;
+
+ if ( XMVector3Equal( r, v ) )
+ {
+ // Red is max
+ h = XMVectorDivide( XMVectorSubtract( g, b ), d );
+
+ if ( XMVector3Less( g, b ) )
+ h = XMVectorAdd( h, g_XMSix );
+ }
+ else if ( XMVector3Equal( g, v ) )
+ {
+ // Green is max
+ h = XMVectorDivide( XMVectorSubtract( b, r ), d );
+ h = XMVectorAdd( h, g_XMTwo );
+ }
+ else
+ {
+ // Blue is max
+ h = XMVectorDivide( XMVectorSubtract( r, g ), d );
+ h = XMVectorAdd( h, g_XMFour );
+ }
+
+ h = XMVectorDivide( h, g_XMSix );
+
+ XMVECTOR hv = XMVectorSelect( v, h, g_XMSelect1000 );
+ XMVECTOR hva = XMVectorSelect( rgb, hv, g_XMSelect1110 );
+ return XMVectorSelect( s, hva, g_XMSelect1011 );
+ }
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorHSVToRGB( FXMVECTOR hsv )
+{
+ XMVECTOR h = XMVectorSplatX( hsv );
+ XMVECTOR s = XMVectorSplatY( hsv );
+ XMVECTOR v = XMVectorSplatZ( hsv );
+
+ XMVECTOR h6 = XMVectorMultiply( h, g_XMSix );
+
+ XMVECTOR i = XMVectorFloor( h6 );
+ XMVECTOR f = XMVectorSubtract( h6, i );
+
+ // p = v* (1-s)
+ XMVECTOR p = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, s ) );
+
+ // q = v*(1-f*s)
+ XMVECTOR q = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, XMVectorMultiply( f, s ) ) );
+
+ // t = v*(1 - (1-f)*s)
+ XMVECTOR t = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, XMVectorMultiply( XMVectorSubtract( g_XMOne, f ), s ) ) );
+
+ int ii = static_cast<int>( XMVectorGetX( XMVectorMod( i, g_XMSix ) ) );
+
+ XMVECTOR _rgb;
+
+ switch (ii)
+ {
+ case 0: // rgb = vtp
+ {
+ XMVECTOR vt = XMVectorSelect( t, v, g_XMSelect1000 );
+ _rgb = XMVectorSelect( p, vt, g_XMSelect1100 );
+ }
+ break;
+ case 1: // rgb = qvp
+ {
+ XMVECTOR qv = XMVectorSelect( v, q, g_XMSelect1000 );
+ _rgb = XMVectorSelect( p, qv, g_XMSelect1100 );
+ }
+ break;
+ case 2: // rgb = pvt
+ {
+ XMVECTOR pv = XMVectorSelect( v, p, g_XMSelect1000 );
+ _rgb = XMVectorSelect( t, pv, g_XMSelect1100 );
+ }
+ break;
+ case 3: // rgb = pqv
+ {
+ XMVECTOR pq = XMVectorSelect( q, p, g_XMSelect1000 );
+ _rgb = XMVectorSelect( v, pq, g_XMSelect1100 );
+ }
+ break;
+ case 4: // rgb = tpv
+ {
+ XMVECTOR tp = XMVectorSelect( p, t, g_XMSelect1000 );
+ _rgb = XMVectorSelect( v, tp, g_XMSelect1100 );
+ }
+ break;
+ default: // rgb = vpq
+ {
+ XMVECTOR vp = XMVectorSelect( p, v, g_XMSelect1000 );
+ _rgb = XMVectorSelect( q, vp, g_XMSelect1100 );
+ }
+ break;
+ }
+
+ return XMVectorSelect( hsv, _rgb, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorRGBToYUV( FXMVECTOR rgb )
+{
+ static const XMVECTORF32 Scale0 = { 0.299f, -0.147f, 0.615f, 0.0f };
+ static const XMVECTORF32 Scale1 = { 0.587f, -0.289f, -0.515f, 0.0f };
+ static const XMVECTORF32 Scale2 = { 0.114f, 0.436f, -0.100f, 0.0f };
+
+ XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
+ XMVECTOR clr = XMVector3Transform( rgb, M );
+
+ return XMVectorSelect( rgb, clr, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorYUVToRGB( FXMVECTOR yuv )
+{
+ static const XMVECTORF32 Scale1 = { 0.0f, -0.395f, 2.032f, 0.0f };
+ static const XMVECTORF32 Scale2 = { 1.140f, -0.581f, 0.0f, 0.0f };
+
+ XMMATRIX M( g_XMOne, Scale1, Scale2, g_XMZero );
+ XMVECTOR clr = XMVector3Transform( yuv, M );
+
+ return XMVectorSelect( yuv, clr, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorRGBToYUV_HD( FXMVECTOR rgb )
+{
+ static const XMVECTORF32 Scale0 = { 0.2126f, -0.0997f, 0.6150f, 0.0f };
+ static const XMVECTORF32 Scale1 = { 0.7152f, -0.3354f, -0.5586f, 0.0f };
+ static const XMVECTORF32 Scale2 = { 0.0722f, 0.4351f, -0.0564f, 0.0f };
+
+ XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
+ XMVECTOR clr = XMVector3Transform( rgb, M );
+
+ return XMVectorSelect( rgb, clr, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorYUVToRGB_HD( FXMVECTOR yuv )
+{
+ static const XMVECTORF32 Scale1 = { 0.0f, -0.2153f, 2.1324f, 0.0f };
+ static const XMVECTORF32 Scale2 = { 1.2803f, -0.3806f, 0.0f, 0.0f };
+
+ XMMATRIX M( g_XMOne, Scale1, Scale2, g_XMZero );
+ XMVECTOR clr = XMVector3Transform( yuv, M );
+
+ return XMVectorSelect( yuv, clr, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorRGBToXYZ( FXMVECTOR rgb )
+{
+ static const XMVECTORF32 Scale0 = { 0.4887180f, 0.1762044f, 0.0000000f, 0.0f };
+ static const XMVECTORF32 Scale1 = { 0.3106803f, 0.8129847f, 0.0102048f, 0.0f };
+ static const XMVECTORF32 Scale2 = { 0.2006017f, 0.0108109f, 0.9897952f, 0.0f };
+ static const XMVECTORF32 Scale = { 1.f/0.17697f, 1.f/0.17697f, 1.f/0.17697f, 0.0f };
+
+ XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
+ XMVECTOR clr = XMVectorMultiply( XMVector3Transform( rgb, M ), Scale );
+
+ return XMVectorSelect( rgb, clr, g_XMSelect1110 );
+}
+
+inline XMVECTOR XMColorXYZToRGB( FXMVECTOR xyz )
+{
+ static const XMVECTORF32 Scale0 = { 2.3706743f, -0.5138850f, 0.0052982f, 0.0f };
+ static const XMVECTORF32 Scale1 = { -0.9000405f, 1.4253036f, -0.0146949f, 0.0f };
+ static const XMVECTORF32 Scale2 = { -0.4706338f, 0.0885814f, 1.0093968f, 0.0f };
+ static const XMVECTORF32 Scale = { 0.17697f, 0.17697f, 0.17697f, 0.0f };
+
+ XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
+ XMVECTOR clr = XMVector3Transform( XMVectorMultiply( xyz, Scale ), M );
+
+ return XMVectorSelect( xyz, clr, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorXYZToSRGB( FXMVECTOR xyz )
+{
+ static const XMVECTORF32 Scale0 = { 3.2406f, -0.9689f, 0.0557f, 0.0f };
+ static const XMVECTORF32 Scale1 = { -1.5372f, 1.8758f, -0.2040f, 0.0f };
+ static const XMVECTORF32 Scale2 = { -0.4986f, 0.0415f, 1.0570f, 0.0f };
+ static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 0.0f };
+ static const XMVECTORF32 Exp = { 1.0f/2.4f, 1.0f/2.4f, 1.0f/2.4f, 1.0f };
+
+ XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
+ XMVECTOR lclr = XMVector3Transform( xyz, M );
+
+ XMVECTOR sel = XMVectorGreater( lclr, Cutoff );
+
+ // clr = 12.92 * lclr for lclr <= 0.0031308f
+ XMVECTOR smallC = XMVectorMultiply( lclr, g_XMsrgbScale );
+
+ // clr = (1+a)*pow(lclr, 1/2.4) - a for lclr > 0.0031308 (where a = 0.055)
+ XMVECTOR largeC = XMVectorSubtract( XMVectorMultiply( g_XMsrgbA1, XMVectorPow( lclr, Exp ) ), g_XMsrgbA );
+
+ XMVECTOR clr = XMVectorSelect( smallC, largeC, sel );
+
+ return XMVectorSelect( xyz, clr, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorSRGBToXYZ( FXMVECTOR srgb )
+{
+ static const XMVECTORF32 Scale0 = { 0.4124f, 0.2126f, 0.0193f, 0.0f };
+ static const XMVECTORF32 Scale1 = { 0.3576f, 0.7152f, 0.1192f, 0.0f };
+ static const XMVECTORF32 Scale2 = { 0.1805f, 0.0722f, 0.9505f, 0.0f };
+ static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 0.0f };
+ static const XMVECTORF32 Exp = { 2.4f, 2.4f, 2.4f, 1.0f };
+
+ XMVECTOR sel = XMVectorGreater( srgb, Cutoff );
+
+ // lclr = clr / 12.92
+ XMVECTOR smallC = XMVectorDivide( srgb, g_XMsrgbScale );
+
+ // lclr = pow( (clr + a) / (1+a), 2.4 )
+ XMVECTOR largeC = XMVectorPow( XMVectorDivide( XMVectorAdd( srgb, g_XMsrgbA ), g_XMsrgbA1 ), Exp );
+
+ XMVECTOR lclr = XMVectorSelect( smallC, largeC, sel );
+
+ XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
+ XMVECTOR clr = XMVector3Transform( lclr, M );
+
+ return XMVectorSelect( srgb, clr, g_XMSelect1110 );
+}
+
+/****************************************************************************
+ *
+ * Miscellaneous
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline bool XMVerifyCPUSupport()
+{
+#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+#if defined(_M_AMD64)
+ // The X64 processor model requires SSE2 support
+ return true;
+#elif defined(PF_XMMI_INSTRUCTIONS_AVAILABLE)
+ // Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail
+ // Detecting SSE2 on older versions of Windows would require using cpuid directly
+ return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) != 0 && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) != 0 );
+#else
+ // If windows.h is not included, we return false (likely a false negative)
+ return false;
+#endif
+#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+#ifdef PF_ARM_NEON_INSTRUCTIONS_AVAILABLE
+ return ( IsProcessorFeaturePresent( PF_ARM_NEON_INSTRUCTIONS_AVAILABLE ) != 0 );
+#else
+ // If windows.h is not included, we return false (likely a false negative)
+ return false;
+#endif
+#else
+ return true;
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMFresnelTerm
+(
+ FXMVECTOR CosIncidentAngle,
+ FXMVECTOR RefractionIndex
+)
+{
+ assert(!XMVector4IsInfinite(CosIncidentAngle));
+
+ // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
+ // c = CosIncidentAngle
+ // g = sqrt(c^2 + RefractionIndex^2 - 1)
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v);
+ G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G);
+ G = XMVectorAbs(G);
+ G = XMVectorSqrt(G);
+
+ XMVECTOR S = XMVectorAdd(G, CosIncidentAngle);
+ XMVECTOR D = XMVectorSubtract(G, CosIncidentAngle);
+
+ XMVECTOR V0 = XMVectorMultiply(D, D);
+ XMVECTOR V1 = XMVectorMultiply(S, S);
+ V1 = XMVectorReciprocal(V1);
+ V0 = XMVectorMultiply(g_XMOneHalf.v, V0);
+ V0 = XMVectorMultiply(V0, V1);
+
+ XMVECTOR V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v);
+ XMVECTOR V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v);
+ V2 = XMVectorMultiply(V2, V2);
+ V3 = XMVectorMultiply(V3, V3);
+ V3 = XMVectorReciprocal(V3);
+ V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v);
+
+ XMVECTOR Result = XMVectorMultiply(V0, V2);
+
+ Result = XMVectorSaturate(Result);
+
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2))
+ XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex);
+ XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle);
+ G = _mm_sub_ps(G,g_XMOne);
+ vTemp = _mm_add_ps(vTemp,G);
+ // max((0-vTemp),vTemp) == abs(vTemp)
+ // The abs is needed to deal with refraction and cosine being zero
+ G = _mm_setzero_ps();
+ G = _mm_sub_ps(G,vTemp);
+ G = _mm_max_ps(G,vTemp);
+ // Last operation, the sqrt()
+ G = _mm_sqrt_ps(G);
+
+ // Calc G-C and G+C
+ XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle);
+ XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle);
+ // Perform the term (0.5f *(g - c)^2) / (g + c)^2
+ XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC);
+ vTemp = _mm_mul_ps(GAddC,GAddC);
+ vResult = _mm_mul_ps(vResult,g_XMOneHalf);
+ vResult = _mm_div_ps(vResult,vTemp);
+ // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1)
+ GAddC = _mm_mul_ps(GAddC,CosIncidentAngle);
+ GSubC = _mm_mul_ps(GSubC,CosIncidentAngle);
+ GAddC = _mm_sub_ps(GAddC,g_XMOne);
+ GSubC = _mm_add_ps(GSubC,g_XMOne);
+ GAddC = _mm_mul_ps(GAddC,GAddC);
+ GSubC = _mm_mul_ps(GSubC,GSubC);
+ GAddC = _mm_div_ps(GAddC,GSubC);
+ GAddC = _mm_add_ps(GAddC,g_XMOne);
+ // Multiply the two term parts
+ vResult = _mm_mul_ps(vResult,GAddC);
+ // Clamp to 0.0 - 1.0f
+ vResult = _mm_max_ps(vResult,g_XMZero);
+ vResult = _mm_min_ps(vResult,g_XMOne);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMScalarNearEqual
+(
+ float S1,
+ float S2,
+ float Epsilon
+)
+{
+ float Delta = S1 - S2;
+ return (fabsf(Delta) <= Epsilon);
+}
+
+//------------------------------------------------------------------------------
+// Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI
+inline float XMScalarModAngle
+(
+ float Angle
+)
+{
+ // Note: The modulo is performed with unsigned math only to work
+ // around a precision error on numbers that are close to PI
+
+ // Normalize the range from 0.0f to XM_2PI
+ Angle = Angle + XM_PI;
+ // Perform the modulo, unsigned
+ float fTemp = fabsf(Angle);
+ fTemp = fTemp - (XM_2PI * (float)((int32_t)(fTemp/XM_2PI)));
+ // Restore the number to the range of -XM_PI to XM_PI-epsilon
+ fTemp = fTemp - XM_PI;
+ // If the modulo'd value was negative, restore negation
+ if (Angle<0.0f) {
+ fTemp = -fTemp;
+ }
+ return fTemp;
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarSin
+(
+ float Value
+)
+{
+ // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
+ float quotient = XM_1DIV2PI*Value;
+ if (Value >= 0.0f)
+ {
+ quotient = (float)((int)(quotient + 0.5f));
+ }
+ else
+ {
+ quotient = (float)((int)(quotient - 0.5f));
+ }
+ float y = Value - XM_2PI*quotient;
+
+ // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
+ if (y > XM_PIDIV2)
+ {
+ y = XM_PI - y;
+ }
+ else if (y < -XM_PIDIV2)
+ {
+ y = -XM_PI - y;
+ }
+
+ // 11-degree minimax approximation
+ float y2 = y * y;
+ return ( ( ( ( (-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f ) * y2 + 0.0083333310f ) * y2 - 0.16666667f ) * y2 + 1.0f ) * y;
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarSinEst
+(
+ float Value
+)
+{
+ // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
+ float quotient = XM_1DIV2PI*Value;
+ if (Value >= 0.0f)
+ {
+ quotient = (float)((int)(quotient + 0.5f));
+ }
+ else
+ {
+ quotient = (float)((int)(quotient - 0.5f));
+ }
+ float y = Value - XM_2PI*quotient;
+
+ // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
+ if (y > XM_PIDIV2)
+ {
+ y = XM_PI - y;
+ }
+ else if (y < -XM_PIDIV2)
+ {
+ y = -XM_PI - y;
+ }
+
+ // 7-degree minimax approximation
+ float y2 = y * y;
+ return ( ( ( -0.00018524670f * y2 + 0.0083139502f ) * y2 - 0.16665852f ) * y2 + 1.0f ) * y;
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarCos
+(
+ float Value
+)
+{
+ // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
+ float quotient = XM_1DIV2PI*Value;
+ if (Value >= 0.0f)
+ {
+ quotient = (float)((int)(quotient + 0.5f));
+ }
+ else
+ {
+ quotient = (float)((int)(quotient - 0.5f));
+ }
+ float y = Value - XM_2PI*quotient;
+
+ // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x).
+ float sign;
+ if (y > XM_PIDIV2)
+ {
+ y = XM_PI - y;
+ sign = -1.0f;
+ }
+ else if (y < -XM_PIDIV2)
+ {
+ y = -XM_PI - y;
+ sign = -1.0f;
+ }
+ else
+ {
+ sign = +1.0f;
+ }
+
+ // 10-degree minimax approximation
+ float y2 = y*y;
+ float p = ( ( ( ( -2.6051615e-07f * y2 + 2.4760495e-05f ) * y2 - 0.0013888378f ) * y2 + 0.041666638f ) * y2 - 0.5f ) * y2 + 1.0f;
+ return sign*p;
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarCosEst
+(
+ float Value
+)
+{
+ // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
+ float quotient = XM_1DIV2PI*Value;
+ if (Value >= 0.0f)
+ {
+ quotient = (float)((int)(quotient + 0.5f));
+ }
+ else
+ {
+ quotient = (float)((int)(quotient - 0.5f));
+ }
+ float y = Value - XM_2PI*quotient;
+
+ // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x).
+ float sign;
+ if (y > XM_PIDIV2)
+ {
+ y = XM_PI - y;
+ sign = -1.0f;
+ }
+ else if (y < -XM_PIDIV2)
+ {
+ y = -XM_PI - y;
+ sign = -1.0f;
+ }
+ else
+ {
+ sign = +1.0f;
+ }
+
+ // 6-degree minimax approximation
+ float y2 = y * y;
+ float p = ( ( -0.0012712436f * y2 + 0.041493919f ) * y2 - 0.49992746f ) * y2 + 1.0f;
+ return sign*p;
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline void XMScalarSinCos
+(
+ float* pSin,
+ float* pCos,
+ float Value
+)
+{
+ assert(pSin);
+ assert(pCos);
+
+ // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
+ float quotient = XM_1DIV2PI*Value;
+ if (Value >= 0.0f)
+ {
+ quotient = (float)((int)(quotient + 0.5f));
+ }
+ else
+ {
+ quotient = (float)((int)(quotient - 0.5f));
+ }
+ float y = Value - XM_2PI*quotient;
+
+ // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
+ float sign;
+ if (y > XM_PIDIV2)
+ {
+ y = XM_PI - y;
+ sign = -1.0f;
+ }
+ else if (y < -XM_PIDIV2)
+ {
+ y = -XM_PI - y;
+ sign = -1.0f;
+ }
+ else
+ {
+ sign = +1.0f;
+ }
+
+ float y2 = y * y;
+
+ // 11-degree minimax approximation
+ *pSin = ( ( ( ( (-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f ) * y2 + 0.0083333310f ) * y2 - 0.16666667f ) * y2 + 1.0f ) * y;
+
+ // 10-degree minimax approximation
+ float p = ( ( ( ( -2.6051615e-07f * y2 + 2.4760495e-05f ) * y2 - 0.0013888378f ) * y2 + 0.041666638f ) * y2 - 0.5f ) * y2 + 1.0f;
+ *pCos = sign*p;
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline void XMScalarSinCosEst
+(
+ float* pSin,
+ float* pCos,
+ float Value
+)
+{
+ assert(pSin);
+ assert(pCos);
+
+ // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
+ float quotient = XM_1DIV2PI*Value;
+ if (Value >= 0.0f)
+ {
+ quotient = (float)((int)(quotient + 0.5f));
+ }
+ else
+ {
+ quotient = (float)((int)(quotient - 0.5f));
+ }
+ float y = Value - XM_2PI*quotient;
+
+ // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
+ float sign;
+ if (y > XM_PIDIV2)
+ {
+ y = XM_PI - y;
+ sign = -1.0f;
+ }
+ else if (y < -XM_PIDIV2)
+ {
+ y = -XM_PI - y;
+ sign = -1.0f;
+ }
+ else
+ {
+ sign = +1.0f;
+ }
+
+ float y2 = y * y;
+
+ // 7-degree minimax approximation
+ *pSin = ( ( ( -0.00018524670f * y2 + 0.0083139502f ) * y2 - 0.16665852f ) * y2 + 1.0f ) * y;
+
+ // 6-degree minimax approximation
+ float p = ( ( -0.0012712436f * y2 + 0.041493919f ) * y2 - 0.49992746f ) * y2 + 1.0f;
+ *pCos = sign*p;
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarASin
+(
+ float Value
+)
+{
+ // Clamp input to [-1,1].
+ bool nonnegative = (Value >= 0.0f);
+ float x = fabsf(Value);
+ float omx = 1.0f - x;
+ if (omx < 0.0f)
+ {
+ omx = 0.0f;
+ }
+ float root = sqrt(omx);
+
+ // 7-degree minimax approximation
+ float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f;
+ result *= root; // acos(|x|)
+
+ // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x)
+ return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2);
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarASinEst
+(
+ float Value
+)
+{
+ // Clamp input to [-1,1].
+ bool nonnegative = (Value >= 0.0f);
+ float x = fabsf(Value);
+ float omx = 1.0f - x;
+ if (omx < 0.0f)
+ {
+ omx = 0.0f;
+ }
+ float root = sqrt(omx);
+
+ // 3-degree minimax approximation
+ float result = ((-0.0187293f*x+0.0742610f)*x-0.2121144f)*x+1.5707288f;
+ result *= root; // acos(|x|)
+
+ // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x)
+ return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2);
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarACos
+(
+ float Value
+)
+{
+ // Clamp input to [-1,1].
+ bool nonnegative = (Value >= 0.0f);
+ float x = fabsf(Value);
+ float omx = 1.0f - x;
+ if (omx < 0.0f)
+ {
+ omx = 0.0f;
+ }
+ float root = sqrtf(omx);
+
+ // 7-degree minimax approximation
+ float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f;
+ result *= root;
+
+ // acos(x) = pi - acos(-x) when x < 0
+ return (nonnegative ? result : XM_PI - result);
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarACosEst
+(
+ float Value
+)
+{
+ // Clamp input to [-1,1].
+ bool nonnegative = (Value >= 0.0f);
+ float x = fabsf(Value);
+ float omx = 1.0f - x;
+ if (omx < 0.0f)
+ {
+ omx = 0.0f;
+ }
+ float root = sqrtf(omx);
+
+ // 3-degree minimax approximation
+ float result = ( ( -0.0187293f * x + 0.0742610f ) * x - 0.2121144f ) * x + 1.5707288f;
+ result *= root;
+
+ // acos(x) = pi - acos(-x) when x < 0
+ return (nonnegative ? result : XM_PI - result);
+}
+
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathVector.inl b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathVector.inl
new file mode 100644
index 00000000..39e24055
--- /dev/null
+++ b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathVector.inl
@@ -0,0 +1,10596 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathVector.inl -- SIMD C++ Math library
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//-------------------------------------------------------------------------------------
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+#if defined(_XM_NO_INTRINSICS_)
+#define XMISNAN(x) ((*(uint32_t*)&(x) & 0x7F800000) == 0x7F800000 && (*(uint32_t*)&(x) & 0x7FFFFF) != 0)
+#define XMISINF(x) ((*(uint32_t*)&(x) & 0x7FFFFFFF) == 0x7F800000)
+#endif
+
+/****************************************************************************
+ *
+ * General Vector
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Assignment operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+// Return a vector with all elements equaling zero
+inline XMVECTOR XMVectorZero()
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f};
+ return vResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vdupq_n_u32(0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_setzero_ps();
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Initialize a vector with four floating point values
+inline XMVECTOR XMVectorSet
+(
+ float x,
+ float y,
+ float z,
+ float w
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTORF32 vResult = {x,y,z,w};
+ return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 V0 = vcreate_f32(((uint64_t)*(const uint32_t *)&x) | ((uint64_t)(*(const uint32_t *)&y) << 32));
+ __n64 V1 = vcreate_f32(((uint64_t)*(const uint32_t *)&z) | ((uint64_t)(*(const uint32_t *)&w) << 32));
+ return vcombine_f32(V0, V1);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_set_ps( w, z, y, x );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Initialize a vector with four integer values
+inline XMVECTOR XMVectorSetInt
+(
+ uint32_t x,
+ uint32_t y,
+ uint32_t z,
+ uint32_t w
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTORU32 vResult = {x,y,z,w};
+ return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 V0 = vcreate_u32(((uint64_t)x) | ((uint64_t)y << 32));
+ __n64 V1 = vcreate_u32(((uint64_t)z) | ((uint64_t)w << 32));
+ return vcombine_u32(V0, V1);
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_set_epi32( w, z, y, x );
+ return reinterpret_cast<__m128 *>(&V)[0];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Initialize a vector with a replicated floating point value
+inline XMVECTOR XMVectorReplicate
+(
+ float Value
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+ XMVECTORF32 vResult = {Value,Value,Value,Value};
+ return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vdupq_n_f32( Value );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_set_ps1( Value );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Initialize a vector with a replicated floating point value passed by pointer
+_Use_decl_annotations_
+inline XMVECTOR XMVectorReplicatePtr
+(
+ const float *pValue
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+ float Value = pValue[0];
+ XMVECTORF32 vResult = {Value,Value,Value,Value};
+ return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vld1q_dup_f32( pValue );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_load_ps1( pValue );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Initialize a vector with a replicated integer value
+inline XMVECTOR XMVectorReplicateInt
+(
+ uint32_t Value
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+ XMVECTORU32 vResult = {Value,Value,Value,Value};
+ return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vdupq_n_u32( Value );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i vTemp = _mm_set1_epi32( Value );
+ return _mm_castsi128_ps(vTemp);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Initialize a vector with a replicated integer value passed by pointer
+_Use_decl_annotations_
+inline XMVECTOR XMVectorReplicateIntPtr
+(
+ const uint32_t *pValue
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+ uint32_t Value = pValue[0];
+ XMVECTORU32 vResult = {Value,Value,Value,Value};
+ return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vld1q_dup_u32(pValue);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_load_ps1(reinterpret_cast<const float *>(pValue));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Initialize a vector with all bits set (true mask)
+inline XMVECTOR XMVectorTrueInt()
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTORU32 vResult = {0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU};
+ return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vdupq_n_s32(-1);
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_set1_epi32(-1);
+ return reinterpret_cast<__m128 *>(&V)[0];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Initialize a vector with all bits clear (false mask)
+inline XMVECTOR XMVectorFalseInt()
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f};
+ return vResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vdupq_n_u32(0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_setzero_ps();
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Replicate the x component of the vector
+inline XMVECTOR XMVectorSplatX
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult;
+ vResult.vector4_f32[0] =
+ vResult.vector4_f32[1] =
+ vResult.vector4_f32[2] =
+ vResult.vector4_f32[3] = V.vector4_f32[0];
+ return vResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vdupq_lane_f32( vget_low_f32( V ), 0 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return XM_PERMUTE_PS( V, _MM_SHUFFLE(0, 0, 0, 0) );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Replicate the y component of the vector
+inline XMVECTOR XMVectorSplatY
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult;
+ vResult.vector4_f32[0] =
+ vResult.vector4_f32[1] =
+ vResult.vector4_f32[2] =
+ vResult.vector4_f32[3] = V.vector4_f32[1];
+ return vResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vdupq_lane_f32( vget_low_f32( V ), 1 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return XM_PERMUTE_PS( V, _MM_SHUFFLE(1, 1, 1, 1) );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Replicate the z component of the vector
+inline XMVECTOR XMVectorSplatZ
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult;
+ vResult.vector4_f32[0] =
+ vResult.vector4_f32[1] =
+ vResult.vector4_f32[2] =
+ vResult.vector4_f32[3] = V.vector4_f32[2];
+ return vResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vdupq_lane_f32( vget_high_f32( V ), 0 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return XM_PERMUTE_PS( V, _MM_SHUFFLE(2, 2, 2, 2) );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Replicate the w component of the vector
+inline XMVECTOR XMVectorSplatW
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult;
+ vResult.vector4_f32[0] =
+ vResult.vector4_f32[1] =
+ vResult.vector4_f32[2] =
+ vResult.vector4_f32[3] = V.vector4_f32[3];
+ return vResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vdupq_lane_f32( vget_high_f32( V ), 1 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return XM_PERMUTE_PS( V, _MM_SHUFFLE(3, 3, 3, 3) );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Return a vector of 1.0f,1.0f,1.0f,1.0f
+inline XMVECTOR XMVectorSplatOne()
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult;
+ vResult.vector4_f32[0] =
+ vResult.vector4_f32[1] =
+ vResult.vector4_f32[2] =
+ vResult.vector4_f32[3] = 1.0f;
+ return vResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vdupq_n_f32(1.0f);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return g_XMOne;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Return a vector of INF,INF,INF,INF
+inline XMVECTOR XMVectorSplatInfinity()
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult;
+ vResult.vector4_u32[0] =
+ vResult.vector4_u32[1] =
+ vResult.vector4_u32[2] =
+ vResult.vector4_u32[3] = 0x7F800000;
+ return vResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vdupq_n_u32(0x7F800000);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return g_XMInfinity;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Return a vector of Q_NAN,Q_NAN,Q_NAN,Q_NAN
+inline XMVECTOR XMVectorSplatQNaN()
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult;
+ vResult.vector4_u32[0] =
+ vResult.vector4_u32[1] =
+ vResult.vector4_u32[2] =
+ vResult.vector4_u32[3] = 0x7FC00000;
+ return vResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vdupq_n_u32(0x7FC00000);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return g_XMQNaN;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Return a vector of 1.192092896e-7f,1.192092896e-7f,1.192092896e-7f,1.192092896e-7f
+inline XMVECTOR XMVectorSplatEpsilon()
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult;
+ vResult.vector4_u32[0] =
+ vResult.vector4_u32[1] =
+ vResult.vector4_u32[2] =
+ vResult.vector4_u32[3] = 0x34000000;
+ return vResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vdupq_n_u32(0x34000000);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return g_XMEpsilon;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Return a vector of -0.0f (0x80000000),-0.0f,-0.0f,-0.0f
+inline XMVECTOR XMVectorSplatSignMask()
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult;
+ vResult.vector4_u32[0] =
+ vResult.vector4_u32[1] =
+ vResult.vector4_u32[2] =
+ vResult.vector4_u32[3] = 0x80000000U;
+ return vResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vdupq_n_u32(0x80000000U);
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_set1_epi32( 0x80000000 );
+ return reinterpret_cast<__m128*>(&V)[0];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Return a floating point value via an index. This is not a recommended
+// function to use due to performance loss.
+inline float XMVectorGetByIndex(FXMVECTOR V, size_t i)
+{
+ assert( i < 4 );
+ _Analysis_assume_( i < 4 );
+#if defined(_XM_NO_INTRINSICS_)
+ return V.vector4_f32[i];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return V.n128_f32[i];
+#elif defined(_XM_SSE_INTRINSICS_)
+ return V.m128_f32[i];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Return the X component in an FPU register.
+inline float XMVectorGetX(FXMVECTOR V)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return V.vector4_f32[0];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vgetq_lane_f32(V, 0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_cvtss_f32(V);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Return the Y component in an FPU register.
+inline float XMVectorGetY(FXMVECTOR V)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return V.vector4_f32[1];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vgetq_lane_f32(V, 1);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = XM_PERMUTE_PS(V,_MM_SHUFFLE(1,1,1,1));
+ return _mm_cvtss_f32(vTemp);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Return the Z component in an FPU register.
+inline float XMVectorGetZ(FXMVECTOR V)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return V.vector4_f32[2];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vgetq_lane_f32(V, 2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
+ return _mm_cvtss_f32(vTemp);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Return the W component in an FPU register.
+inline float XMVectorGetW(FXMVECTOR V)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return V.vector4_f32[3];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vgetq_lane_f32(V, 3);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = XM_PERMUTE_PS(V,_MM_SHUFFLE(3,3,3,3));
+ return _mm_cvtss_f32(vTemp);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Store a component indexed by i into a 32 bit float location in memory.
+_Use_decl_annotations_
+inline void XMVectorGetByIndexPtr(float *f, FXMVECTOR V, size_t i)
+{
+ assert( f != NULL );
+ assert( i < 4 );
+ _Analysis_assume_( i < 4 );
+#if defined(_XM_NO_INTRINSICS_)
+ *f = V.vector4_f32[i];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ *f = V.n128_f32[i];
+#elif defined(_XM_SSE_INTRINSICS_)
+ *f = V.m128_f32[i];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Store the X component into a 32 bit float location in memory.
+_Use_decl_annotations_
+inline void XMVectorGetXPtr(float *x, FXMVECTOR V)
+{
+ assert( x != NULL);
+#if defined(_XM_NO_INTRINSICS_)
+ *x = V.vector4_f32[0];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_lane_f32(x,V,0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ _mm_store_ss(x,V);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Store the Y component into a 32 bit float location in memory.
+_Use_decl_annotations_
+inline void XMVectorGetYPtr(float *y, FXMVECTOR V)
+{
+ assert( y != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ *y = V.vector4_f32[1];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_lane_f32(y,V,1);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(1,1,1,1));
+ _mm_store_ss(y,vResult);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Store the Z component into a 32 bit float location in memory.
+_Use_decl_annotations_
+inline void XMVectorGetZPtr(float *z, FXMVECTOR V)
+{
+ assert( z != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ *z = V.vector4_f32[2];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_lane_f32(z,V,2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
+ _mm_store_ss(z,vResult);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Store the W component into a 32 bit float location in memory.
+_Use_decl_annotations_
+inline void XMVectorGetWPtr(float *w, FXMVECTOR V)
+{
+ assert( w != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ *w = V.vector4_f32[3];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_lane_f32(w,V,3);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(3,3,3,3));
+ _mm_store_ss(w,vResult);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Return an integer value via an index. This is not a recommended
+// function to use due to performance loss.
+inline uint32_t XMVectorGetIntByIndex(FXMVECTOR V, size_t i)
+{
+ assert( i < 4 );
+ _Analysis_assume_( i < 4 );
+#if defined(_XM_NO_INTRINSICS_)
+ return V.vector4_u32[i];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return V.n128_u32[i];
+#elif defined(_XM_SSE_INTRINSICS_)
+ return V.m128_u32[i];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Return the X component in an integer register.
+inline uint32_t XMVectorGetIntX(FXMVECTOR V)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return V.vector4_u32[0];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vgetq_lane_u32(V, 0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return static_cast<uint32_t>(_mm_cvtsi128_si32(_mm_castps_si128(V)));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Return the Y component in an integer register.
+inline uint32_t XMVectorGetIntY(FXMVECTOR V)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return V.vector4_u32[1];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vgetq_lane_u32(V, 1);
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i vResulti = _mm_shuffle_epi32(_mm_castps_si128(V),_MM_SHUFFLE(1,1,1,1));
+ return static_cast<uint32_t>(_mm_cvtsi128_si32(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Return the Z component in an integer register.
+inline uint32_t XMVectorGetIntZ(FXMVECTOR V)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return V.vector4_u32[2];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vgetq_lane_u32(V, 2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i vResulti = _mm_shuffle_epi32(_mm_castps_si128(V),_MM_SHUFFLE(2,2,2,2));
+ return static_cast<uint32_t>(_mm_cvtsi128_si32(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Return the W component in an integer register.
+inline uint32_t XMVectorGetIntW(FXMVECTOR V)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return V.vector4_u32[3];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vgetq_lane_u32(V, 3);
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i vResulti = _mm_shuffle_epi32(_mm_castps_si128(V),_MM_SHUFFLE(3,3,3,3));
+ return static_cast<uint32_t>(_mm_cvtsi128_si32(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Store a component indexed by i into a 32 bit integer location in memory.
+_Use_decl_annotations_
+inline void XMVectorGetIntByIndexPtr(uint32_t *x, FXMVECTOR V, size_t i)
+{
+ assert( x != NULL );
+ assert( i < 4 );
+ _Analysis_assume_( i < 4 );
+#if defined(_XM_NO_INTRINSICS_)
+ *x = V.vector4_u32[i];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ *x = V.n128_u32[i];
+#elif defined(_XM_SSE_INTRINSICS_)
+ *x = V.m128_u32[i];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Store the X component into a 32 bit integer location in memory.
+_Use_decl_annotations_
+inline void XMVectorGetIntXPtr(uint32_t *x, FXMVECTOR V)
+{
+ assert( x != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ *x = V.vector4_u32[0];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_lane_u32(x,V,0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ _mm_store_ss(reinterpret_cast<float *>(x),V);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Store the Y component into a 32 bit integer location in memory.
+_Use_decl_annotations_
+inline void XMVectorGetIntYPtr(uint32_t *y, FXMVECTOR V)
+{
+ assert( y != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ *y = V.vector4_u32[1];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_lane_u32(y,V,1);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(1,1,1,1));
+ _mm_store_ss(reinterpret_cast<float *>(y),vResult);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Store the Z component into a 32 bit integer locaCantion in memory.
+_Use_decl_annotations_
+inline void XMVectorGetIntZPtr(uint32_t *z, FXMVECTOR V)
+{
+ assert( z != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ *z = V.vector4_u32[2];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_lane_u32(z,V,2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
+ _mm_store_ss(reinterpret_cast<float *>(z),vResult);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Store the W component into a 32 bit integer location in memory.
+_Use_decl_annotations_
+inline void XMVectorGetIntWPtr(uint32_t *w, FXMVECTOR V)
+{
+ assert( w != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ *w = V.vector4_u32[3];
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ vst1q_lane_u32(w,V,3);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(3,3,3,3));
+ _mm_store_ss(reinterpret_cast<float *>(w),vResult);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Set a single indexed floating point component
+inline XMVECTOR XMVectorSetByIndex(FXMVECTOR V, float f, size_t i)
+{
+ assert( i < 4 );
+ _Analysis_assume_( i < 4 );
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U = V;
+ U.vector4_f32[i] = f;
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR U = V;
+ U.n128_f32[i] = f;
+ return U;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR U = V;
+ U.m128_f32[i] = f;
+ return U;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Sets the X component of a vector to a passed floating point value
+inline XMVECTOR XMVectorSetX(FXMVECTOR V, float x)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_f32[0] = x;
+ U.vector4_f32[1] = V.vector4_f32[1];
+ U.vector4_f32[2] = V.vector4_f32[2];
+ U.vector4_f32[3] = V.vector4_f32[3];
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vsetq_lane_f32(x,V,0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = _mm_set_ss(x);
+ vResult = _mm_move_ss(V,vResult);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Sets the Y component of a vector to a passed floating point value
+inline XMVECTOR XMVectorSetY(FXMVECTOR V, float y)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_f32[0] = V.vector4_f32[0];
+ U.vector4_f32[1] = y;
+ U.vector4_f32[2] = V.vector4_f32[2];
+ U.vector4_f32[3] = V.vector4_f32[3];
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vsetq_lane_f32(y,V,1);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Swap y and x
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(3,2,0,1));
+ // Convert input to vector
+ XMVECTOR vTemp = _mm_set_ss(y);
+ // Replace the x component
+ vResult = _mm_move_ss(vResult,vTemp);
+ // Swap y and x again
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,2,0,1));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+// Sets the Z component of a vector to a passed floating point value
+inline XMVECTOR XMVectorSetZ(FXMVECTOR V, float z)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_f32[0] = V.vector4_f32[0];
+ U.vector4_f32[1] = V.vector4_f32[1];
+ U.vector4_f32[2] = z;
+ U.vector4_f32[3] = V.vector4_f32[3];
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vsetq_lane_f32(z,V,2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Swap z and x
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(3,0,1,2));
+ // Convert input to vector
+ XMVECTOR vTemp = _mm_set_ss(z);
+ // Replace the x component
+ vResult = _mm_move_ss(vResult,vTemp);
+ // Swap z and x again
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,0,1,2));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Sets the W component of a vector to a passed floating point value
+inline XMVECTOR XMVectorSetW(FXMVECTOR V, float w)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_f32[0] = V.vector4_f32[0];
+ U.vector4_f32[1] = V.vector4_f32[1];
+ U.vector4_f32[2] = V.vector4_f32[2];
+ U.vector4_f32[3] = w;
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vsetq_lane_f32(w,V,3);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Swap w and x
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(0,2,1,3));
+ // Convert input to vector
+ XMVECTOR vTemp = _mm_set_ss(w);
+ // Replace the x component
+ vResult = _mm_move_ss(vResult,vTemp);
+ // Swap w and x again
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(0,2,1,3));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Sets a component of a vector to a floating point value passed by pointer
+_Use_decl_annotations_
+inline XMVECTOR XMVectorSetByIndexPtr(FXMVECTOR V, const float *f, size_t i)
+{
+ assert( f != NULL );
+ assert( i < 4 );
+ _Analysis_assume_( i < 4 );
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U = V;
+ U.vector4_f32[i] = *f;
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR U = V;
+ U.n128_f32[i] = *f;
+ return U;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR U = V;
+ U.m128_f32[i] = *f;
+ return U;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Sets the X component of a vector to a floating point value passed by pointer
+_Use_decl_annotations_
+inline XMVECTOR XMVectorSetXPtr(FXMVECTOR V, const float *x)
+{
+ assert( x != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_f32[0] = *x;
+ U.vector4_f32[1] = V.vector4_f32[1];
+ U.vector4_f32[2] = V.vector4_f32[2];
+ U.vector4_f32[3] = V.vector4_f32[3];
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vld1q_lane_f32(x,V,0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = _mm_load_ss(x);
+ vResult = _mm_move_ss(V,vResult);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Sets the Y component of a vector to a floating point value passed by pointer
+_Use_decl_annotations_
+inline XMVECTOR XMVectorSetYPtr(FXMVECTOR V, const float *y)
+{
+ assert( y != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_f32[0] = V.vector4_f32[0];
+ U.vector4_f32[1] = *y;
+ U.vector4_f32[2] = V.vector4_f32[2];
+ U.vector4_f32[3] = V.vector4_f32[3];
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vld1q_lane_f32(y,V,1);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Swap y and x
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(3,2,0,1));
+ // Convert input to vector
+ XMVECTOR vTemp = _mm_load_ss(y);
+ // Replace the x component
+ vResult = _mm_move_ss(vResult,vTemp);
+ // Swap y and x again
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,2,0,1));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Sets the Z component of a vector to a floating point value passed by pointer
+_Use_decl_annotations_
+inline XMVECTOR XMVectorSetZPtr(FXMVECTOR V, const float *z)
+{
+ assert( z != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_f32[0] = V.vector4_f32[0];
+ U.vector4_f32[1] = V.vector4_f32[1];
+ U.vector4_f32[2] = *z;
+ U.vector4_f32[3] = V.vector4_f32[3];
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vld1q_lane_f32(z,V,2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Swap z and x
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(3,0,1,2));
+ // Convert input to vector
+ XMVECTOR vTemp = _mm_load_ss(z);
+ // Replace the x component
+ vResult = _mm_move_ss(vResult,vTemp);
+ // Swap z and x again
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,0,1,2));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Sets the W component of a vector to a floating point value passed by pointer
+_Use_decl_annotations_
+inline XMVECTOR XMVectorSetWPtr(FXMVECTOR V, const float *w)
+{
+ assert( w != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_f32[0] = V.vector4_f32[0];
+ U.vector4_f32[1] = V.vector4_f32[1];
+ U.vector4_f32[2] = V.vector4_f32[2];
+ U.vector4_f32[3] = *w;
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vld1q_lane_f32(w,V,3);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Swap w and x
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(0,2,1,3));
+ // Convert input to vector
+ XMVECTOR vTemp = _mm_load_ss(w);
+ // Replace the x component
+ vResult = _mm_move_ss(vResult,vTemp);
+ // Swap w and x again
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(0,2,1,3));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Sets a component of a vector to an integer passed by value
+inline XMVECTOR XMVectorSetIntByIndex(FXMVECTOR V, uint32_t x, size_t i)
+{
+ assert( i < 4 );
+ _Analysis_assume_( i < 4 );
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U = V;
+ U.vector4_u32[i] = x;
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORU32 tmp;
+ tmp.v = V;
+ tmp.u[i] = x;
+ return tmp;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTORU32 tmp;
+ tmp.v = V;
+ tmp.u[i] = x;
+ return tmp;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Sets the X component of a vector to an integer passed by value
+inline XMVECTOR XMVectorSetIntX(FXMVECTOR V, uint32_t x)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_u32[0] = x;
+ U.vector4_u32[1] = V.vector4_u32[1];
+ U.vector4_u32[2] = V.vector4_u32[2];
+ U.vector4_u32[3] = V.vector4_u32[3];
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vsetq_lane_u32(x,V,0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i vTemp = _mm_cvtsi32_si128(x);
+ XMVECTOR vResult = _mm_move_ss(V,_mm_castsi128_ps(vTemp));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Sets the Y component of a vector to an integer passed by value
+inline XMVECTOR XMVectorSetIntY(FXMVECTOR V, uint32_t y)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_u32[0] = V.vector4_u32[0];
+ U.vector4_u32[1] = y;
+ U.vector4_u32[2] = V.vector4_u32[2];
+ U.vector4_u32[3] = V.vector4_u32[3];
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vsetq_lane_u32(y,V,1);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Swap y and x
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(3,2,0,1));
+ // Convert input to vector
+ __m128i vTemp = _mm_cvtsi32_si128(y);
+ // Replace the x component
+ vResult = _mm_move_ss(vResult,_mm_castsi128_ps(vTemp));
+ // Swap y and x again
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,2,0,1));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Sets the Z component of a vector to an integer passed by value
+inline XMVECTOR XMVectorSetIntZ(FXMVECTOR V, uint32_t z)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_u32[0] = V.vector4_u32[0];
+ U.vector4_u32[1] = V.vector4_u32[1];
+ U.vector4_u32[2] = z;
+ U.vector4_u32[3] = V.vector4_u32[3];
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vsetq_lane_u32(z,V,2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Swap z and x
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(3,0,1,2));
+ // Convert input to vector
+ __m128i vTemp = _mm_cvtsi32_si128(z);
+ // Replace the x component
+ vResult = _mm_move_ss(vResult,_mm_castsi128_ps(vTemp));
+ // Swap z and x again
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,0,1,2));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Sets the W component of a vector to an integer passed by value
+inline XMVECTOR XMVectorSetIntW(FXMVECTOR V, uint32_t w)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_u32[0] = V.vector4_u32[0];
+ U.vector4_u32[1] = V.vector4_u32[1];
+ U.vector4_u32[2] = V.vector4_u32[2];
+ U.vector4_u32[3] = w;
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vsetq_lane_u32(w,V,3);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Swap w and x
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(0,2,1,3));
+ // Convert input to vector
+ __m128i vTemp = _mm_cvtsi32_si128(w);
+ // Replace the x component
+ vResult = _mm_move_ss(vResult,_mm_castsi128_ps(vTemp));
+ // Swap w and x again
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(0,2,1,3));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Sets a component of a vector to an integer value passed by pointer
+_Use_decl_annotations_
+inline XMVECTOR XMVectorSetIntByIndexPtr(FXMVECTOR V, const uint32_t *x, size_t i)
+{
+ assert( x != NULL );
+ assert( i < 4 );
+ _Analysis_assume_( i < 4 );
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U = V;
+ U.vector4_u32[i] = *x;
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORU32 tmp;
+ tmp.v = V;
+ tmp.u[i] = *x;
+ return tmp;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTORU32 tmp;
+ tmp.v = V;
+ tmp.u[i] = *x;
+ return tmp;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Sets the X component of a vector to an integer value passed by pointer
+_Use_decl_annotations_
+inline XMVECTOR XMVectorSetIntXPtr(FXMVECTOR V, const uint32_t *x)
+{
+ assert( x != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_u32[0] = *x;
+ U.vector4_u32[1] = V.vector4_u32[1];
+ U.vector4_u32[2] = V.vector4_u32[2];
+ U.vector4_u32[3] = V.vector4_u32[3];
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vld1q_lane_u32(x,V,0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(x));
+ XMVECTOR vResult = _mm_move_ss(V,vTemp);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Sets the Y component of a vector to an integer value passed by pointer
+_Use_decl_annotations_
+inline XMVECTOR XMVectorSetIntYPtr(FXMVECTOR V, const uint32_t *y)
+{
+ assert( y != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_u32[0] = V.vector4_u32[0];
+ U.vector4_u32[1] = *y;
+ U.vector4_u32[2] = V.vector4_u32[2];
+ U.vector4_u32[3] = V.vector4_u32[3];
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vld1q_lane_u32(y,V,1);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Swap y and x
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(3,2,0,1));
+ // Convert input to vector
+ XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(y));
+ // Replace the x component
+ vResult = _mm_move_ss(vResult,vTemp);
+ // Swap y and x again
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,2,0,1));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Sets the Z component of a vector to an integer value passed by pointer
+_Use_decl_annotations_
+inline XMVECTOR XMVectorSetIntZPtr(FXMVECTOR V, const uint32_t *z)
+{
+ assert( z != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_u32[0] = V.vector4_u32[0];
+ U.vector4_u32[1] = V.vector4_u32[1];
+ U.vector4_u32[2] = *z;
+ U.vector4_u32[3] = V.vector4_u32[3];
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vld1q_lane_u32(z,V,2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Swap z and x
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(3,0,1,2));
+ // Convert input to vector
+ XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(z));
+ // Replace the x component
+ vResult = _mm_move_ss(vResult,vTemp);
+ // Swap z and x again
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,0,1,2));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+// Sets the W component of a vector to an integer value passed by pointer
+_Use_decl_annotations_
+inline XMVECTOR XMVectorSetIntWPtr(FXMVECTOR V, const uint32_t *w)
+{
+ assert( w != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR U;
+ U.vector4_u32[0] = V.vector4_u32[0];
+ U.vector4_u32[1] = V.vector4_u32[1];
+ U.vector4_u32[2] = V.vector4_u32[2];
+ U.vector4_u32[3] = *w;
+ return U;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vld1q_lane_u32(w,V,3);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Swap w and x
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(0,2,1,3));
+ // Convert input to vector
+ XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(w));
+ // Replace the x component
+ vResult = _mm_move_ss(vResult,vTemp);
+ // Swap w and x again
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(0,2,1,3));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorSwizzle
+(
+ FXMVECTOR V,
+ uint32_t E0,
+ uint32_t E1,
+ uint32_t E2,
+ uint32_t E3
+)
+{
+ assert( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) );
+ _Analysis_assume_( (E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4) );
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result = { V.vector4_f32[E0],
+ V.vector4_f32[E1],
+ V.vector4_f32[E2],
+ V.vector4_f32[E3] };
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const uint32_t ControlElement[ 4 ] =
+ {
+#ifdef _XM_LITTLEENDIAN_
+ 0x03020100, // XM_SWIZZLE_X
+ 0x07060504, // XM_SWIZZLE_Y
+ 0x0B0A0908, // XM_SWIZZLE_Z
+ 0x0F0E0D0C, // XM_SWIZZLE_W
+#else
+ 0x00010203, // XM_SWIZZLE_X
+ 0x04050607, // XM_SWIZZLE_Y
+ 0x08090A0B, // XM_SWIZZLE_Z
+ 0x0C0D0E0F, // XM_SWIZZLE_W
+#endif
+ };
+
+ int8x8x2_t tbl;
+ tbl.val[0] = vget_low_f32(V);
+ tbl.val[1] = vget_high_f32(V);
+
+ __n64 idx = vcreate_u32( ((uint64_t)ControlElement[E0]) | (((uint64_t)ControlElement[E1]) << 32) );
+ const __n64 rL = vtbl2_u8( tbl, idx );
+
+ idx = vcreate_u32( ((uint64_t)ControlElement[E2]) | (((uint64_t)ControlElement[E3]) << 32) );
+ const __n64 rH = vtbl2_u8( tbl, idx );
+
+ return vcombine_f32( rL, rH );
+#elif defined(_XM_VMX128_INTRINSICS_)
+#else
+ const uint32_t *aPtr = (const uint32_t* )(&V);
+
+ XMVECTOR Result;
+ uint32_t *pWork = (uint32_t*)(&Result);
+
+ pWork[0] = aPtr[E0];
+ pWork[1] = aPtr[E1];
+ pWork[2] = aPtr[E2];
+ pWork[3] = aPtr[E3];
+
+ return Result;
+#endif
+}
+
+//------------------------------------------------------------------------------
+inline XMVECTOR XMVectorPermute
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ uint32_t PermuteX,
+ uint32_t PermuteY,
+ uint32_t PermuteZ,
+ uint32_t PermuteW
+)
+{
+ assert( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 );
+ _Analysis_assume_( PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7 );
+
+#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ static const uint32_t ControlElement[ 8 ] =
+ {
+#ifdef _XM_LITTLEENDIAN_
+ 0x03020100, // XM_PERMUTE_0X
+ 0x07060504, // XM_PERMUTE_0Y
+ 0x0B0A0908, // XM_PERMUTE_0Z
+ 0x0F0E0D0C, // XM_PERMUTE_0W
+ 0x13121110, // XM_PERMUTE_1X
+ 0x17161514, // XM_PERMUTE_1Y
+ 0x1B1A1918, // XM_PERMUTE_1Z
+ 0x1F1E1D1C, // XM_PERMUTE_1W
+#else
+ 0x00010203, // XM_PERMUTE_0X
+ 0x04050607, // XM_PERMUTE_0Y
+ 0x08090A0B, // XM_PERMUTE_0Z
+ 0x0C0D0E0F, // XM_PERMUTE_0W
+ 0x10111213, // XM_PERMUTE_1X
+ 0x14151617, // XM_PERMUTE_1Y
+ 0x18191A1B, // XM_PERMUTE_1Z
+ 0x1C1D1E1F, // XM_PERMUTE_1W
+#endif
+ };
+
+ int8x8x4_t tbl;
+ tbl.val[0] = vget_low_f32(V1);
+ tbl.val[1] = vget_high_f32(V1);
+ tbl.val[2] = vget_low_f32(V2);
+ tbl.val[3] = vget_high_f32(V2);
+
+ __n64 idx = vcreate_u32( ((uint64_t)ControlElement[PermuteX]) | (((uint64_t)ControlElement[PermuteY]) << 32) );
+ const __n64 rL = vtbl4_u8( tbl, idx );
+
+ idx = vcreate_u32( ((uint64_t)ControlElement[PermuteZ]) | (((uint64_t)ControlElement[PermuteW]) << 32) );
+ const __n64 rH = vtbl4_u8( tbl, idx );
+
+ return vcombine_f32( rL, rH );
+#elif defined(_XM_VMX128_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+#else
+
+ const uint32_t *aPtr[2];
+ aPtr[0] = (const uint32_t* )(&V1);
+ aPtr[1] = (const uint32_t* )(&V2);
+
+ XMVECTOR Result;
+ uint32_t *pWork = (uint32_t*)(&Result);
+
+ const uint32_t i0 = PermuteX & 3;
+ const uint32_t vi0 = PermuteX >> 2;
+ pWork[0] = aPtr[vi0][i0];
+
+ const uint32_t i1 = PermuteY & 3;
+ const uint32_t vi1 = PermuteY >> 2;
+ pWork[1] = aPtr[vi1][i1];
+
+ const uint32_t i2 = PermuteZ & 3;
+ const uint32_t vi2 = PermuteZ >> 2;
+ pWork[2] = aPtr[vi2][i2];
+
+ const uint32_t i3 = PermuteW & 3;
+ const uint32_t vi3 = PermuteW >> 2;
+ pWork[3] = aPtr[vi3][i3];
+
+ return Result;
+#endif
+}
+
+//------------------------------------------------------------------------------
+// Define a control vector to be used in XMVectorSelect
+// operations. The four integers specified in XMVectorSelectControl
+// serve as indices to select between components in two vectors.
+// The first index controls selection for the first component of
+// the vectors involved in a select operation, the second index
+// controls selection for the second component etc. A value of
+// zero for an index causes the corresponding component from the first
+// vector to be selected whereas a one causes the component from the
+// second vector to be selected instead.
+
+inline XMVECTOR XMVectorSelectControl
+(
+ uint32_t VectorIndex0,
+ uint32_t VectorIndex1,
+ uint32_t VectorIndex2,
+ uint32_t VectorIndex3
+)
+{
+#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ // x=Index0,y=Index1,z=Index2,w=Index3
+ __m128i vTemp = _mm_set_epi32(VectorIndex3,VectorIndex2,VectorIndex1,VectorIndex0);
+ // Any non-zero entries become 0xFFFFFFFF else 0
+ vTemp = _mm_cmpgt_epi32(vTemp,g_XMZero);
+ return reinterpret_cast<__m128 *>(&vTemp)[0];
+#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ __n64 V0 = vcreate_s32(((uint64_t)VectorIndex0) | ((uint64_t)VectorIndex1 << 32));
+ __n64 V1 = vcreate_s32(((uint64_t)VectorIndex2) | ((uint64_t)VectorIndex3 << 32));
+ __n128 vTemp = vcombine_s32(V0, V1);
+ // Any non-zero entries become 0xFFFFFFFF else 0
+ return vcgtq_s32(vTemp,g_XMZero);
+#else
+ XMVECTOR ControlVector;
+ const uint32_t ControlElement[] =
+ {
+ XM_SELECT_0,
+ XM_SELECT_1
+ };
+
+ assert(VectorIndex0 < 2);
+ assert(VectorIndex1 < 2);
+ assert(VectorIndex2 < 2);
+ assert(VectorIndex3 < 2);
+ _Analysis_assume_(VectorIndex0 < 2);
+ _Analysis_assume_(VectorIndex1 < 2);
+ _Analysis_assume_(VectorIndex2 < 2);
+ _Analysis_assume_(VectorIndex3 < 2);
+
+ ControlVector.vector4_u32[0] = ControlElement[VectorIndex0];
+ ControlVector.vector4_u32[1] = ControlElement[VectorIndex1];
+ ControlVector.vector4_u32[2] = ControlElement[VectorIndex2];
+ ControlVector.vector4_u32[3] = ControlElement[VectorIndex3];
+
+ return ControlVector;
+
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorSelect
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ FXMVECTOR Control
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_u32[0] = (V1.vector4_u32[0] & ~Control.vector4_u32[0]) | (V2.vector4_u32[0] & Control.vector4_u32[0]);
+ Result.vector4_u32[1] = (V1.vector4_u32[1] & ~Control.vector4_u32[1]) | (V2.vector4_u32[1] & Control.vector4_u32[1]);
+ Result.vector4_u32[2] = (V1.vector4_u32[2] & ~Control.vector4_u32[2]) | (V2.vector4_u32[2] & Control.vector4_u32[2]);
+ Result.vector4_u32[3] = (V1.vector4_u32[3] & ~Control.vector4_u32[3]) | (V2.vector4_u32[3] & Control.vector4_u32[3]);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vbslq_f32( Control, V2, V1 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp1 = _mm_andnot_ps(Control,V1);
+ XMVECTOR vTemp2 = _mm_and_ps(V2,Control);
+ return _mm_or_ps(vTemp1,vTemp2);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorMergeXY
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_u32[0] = V1.vector4_u32[0];
+ Result.vector4_u32[1] = V2.vector4_u32[0];
+ Result.vector4_u32[2] = V1.vector4_u32[1];
+ Result.vector4_u32[3] = V2.vector4_u32[1];
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vzipq_f32( V1, V2 ).val[0];
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_unpacklo_ps( V1, V2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorMergeZW
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_u32[0] = V1.vector4_u32[2];
+ Result.vector4_u32[1] = V2.vector4_u32[2];
+ Result.vector4_u32[2] = V1.vector4_u32[3];
+ Result.vector4_u32[3] = V2.vector4_u32[3];
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vzipq_f32( V1, V2 ).val[1];
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_unpackhi_ps( V1, V2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2, uint32_t Elements)
+{
+ assert( Elements < 4 );
+ _Analysis_assume_( Elements < 4 );
+ return XMVectorPermute(V1, V2, Elements, ((Elements) + 1), ((Elements) + 2), ((Elements) + 3));
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorRotateLeft(FXMVECTOR V, uint32_t Elements)
+{
+ assert( Elements < 4 );
+ _Analysis_assume_( Elements < 4 );
+ return XMVectorSwizzle( V, Elements & 3, (Elements + 1) & 3, (Elements + 2) & 3, (Elements + 3) & 3 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorRotateRight(FXMVECTOR V, uint32_t Elements)
+{
+ assert( Elements < 4 );
+ _Analysis_assume_( Elements < 4 );
+ return XMVectorSwizzle( V, (4 - (Elements)) & 3, (5 - (Elements)) & 3, (6 - (Elements)) & 3, (7 - (Elements)) & 3 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorInsert(FXMVECTOR VD, FXMVECTOR VS, uint32_t VSLeftRotateElements,
+ uint32_t Select0, uint32_t Select1, uint32_t Select2, uint32_t Select3)
+{
+ XMVECTOR Control = XMVectorSelectControl(Select0&1, Select1&1, Select2&1, Select3&1);
+ return XMVectorSelect( VD, XMVectorRotateLeft(VS, VSLeftRotateElements), Control );
+}
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[1] = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[2] = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[3] = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vceqq_f32( V1, V2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_cmpeq_ps( V1, V2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline XMVECTOR XMVectorEqualR
+(
+ uint32_t* pCR,
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+ assert( pCR != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+ uint32_t ux = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
+ uint32_t uy = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
+ uint32_t uz = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
+ uint32_t uw = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
+ uint32_t CR = 0;
+ if (ux&uy&uz&uw)
+ {
+ // All elements are greater
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!(ux|uy|uz|uw))
+ {
+ // All elements are not greater
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ *pCR = CR;
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = ux;
+ Control.vector4_u32[1] = uy;
+ Control.vector4_u32[2] = uz;
+ Control.vector4_u32[3] = uw;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vceqq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFFFU )
+ {
+ // All elements are equal
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ // All elements are not equal
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ *pCR = CR;
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
+ uint32_t CR = 0;
+ int iTest = _mm_movemask_ps(vTemp);
+ if (iTest==0xf)
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!iTest)
+ {
+ // All elements are not greater
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ *pCR = CR;
+ return vTemp;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Treat the components of the vectors as unsigned integers and
+// compare individual bits between the two. This is useful for
+// comparing control vectors and result vectors returned from
+// other comparison operations.
+
+inline XMVECTOR XMVectorEqualInt
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = (V1.vector4_u32[0] == V2.vector4_u32[0]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[1] = (V1.vector4_u32[1] == V2.vector4_u32[1]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[2] = (V1.vector4_u32[2] == V2.vector4_u32[2]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[3] = (V1.vector4_u32[3] == V2.vector4_u32[3]) ? 0xFFFFFFFF : 0;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vceqq_u32( V1, V2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_cmpeq_epi32( _mm_castps_si128(V1),_mm_castps_si128(V2) );
+ return reinterpret_cast<__m128 *>(&V)[0];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline XMVECTOR XMVectorEqualIntR
+(
+ uint32_t* pCR,
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+ assert( pCR != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Control = XMVectorEqualInt(V1, V2);
+
+ *pCR = 0;
+ if (XMVector4EqualInt(Control, XMVectorTrueInt()))
+ {
+ // All elements are equal
+ *pCR |= XM_CRMASK_CR6TRUE;
+ }
+ else if (XMVector4EqualInt(Control, XMVectorFalseInt()))
+ {
+ // All elements are not equal
+ *pCR |= XM_CRMASK_CR6FALSE;
+ }
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vceqq_u32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFFFU )
+ {
+ // All elements are equal
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ // All elements are not equal
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ *pCR = CR;
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_cmpeq_epi32( _mm_castps_si128(V1),_mm_castps_si128(V2) );
+ int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128*>(&V)[0]);
+ uint32_t CR = 0;
+ if (iTemp==0x0F)
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!iTemp)
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ *pCR = CR;
+ return reinterpret_cast<__m128 *>(&V)[0];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorNearEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ FXMVECTOR Epsilon
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ float fDeltax = V1.vector4_f32[0]-V2.vector4_f32[0];
+ float fDeltay = V1.vector4_f32[1]-V2.vector4_f32[1];
+ float fDeltaz = V1.vector4_f32[2]-V2.vector4_f32[2];
+ float fDeltaw = V1.vector4_f32[3]-V2.vector4_f32[3];
+
+ fDeltax = fabsf(fDeltax);
+ fDeltay = fabsf(fDeltay);
+ fDeltaz = fabsf(fDeltaz);
+ fDeltaw = fabsf(fDeltaw);
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = (fDeltax <= Epsilon.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
+ Control.vector4_u32[1] = (fDeltay <= Epsilon.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
+ Control.vector4_u32[2] = (fDeltaz <= Epsilon.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
+ Control.vector4_u32[3] = (fDeltaw <= Epsilon.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR vDelta = vsubq_f32(V1,V2);
+ return vacleq_f32( vDelta, Epsilon );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Get the difference
+ XMVECTOR vDelta = _mm_sub_ps(V1,V2);
+ // Get the absolute value of the difference
+ XMVECTOR vTemp = _mm_setzero_ps();
+ vTemp = _mm_sub_ps(vTemp,vDelta);
+ vTemp = _mm_max_ps(vTemp,vDelta);
+ vTemp = _mm_cmple_ps(vTemp,Epsilon);
+ return vTemp;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorNotEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = (V1.vector4_f32[0] != V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[1] = (V1.vector4_f32[1] != V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[2] = (V1.vector4_f32[2] != V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[3] = (V1.vector4_f32[3] != V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vmvnq_u32(vceqq_f32(V1, V2));
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_cmpneq_ps( V1, V2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorNotEqualInt
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = (V1.vector4_u32[0] != V2.vector4_u32[0]) ? 0xFFFFFFFFU : 0;
+ Control.vector4_u32[1] = (V1.vector4_u32[1] != V2.vector4_u32[1]) ? 0xFFFFFFFFU : 0;
+ Control.vector4_u32[2] = (V1.vector4_u32[2] != V2.vector4_u32[2]) ? 0xFFFFFFFFU : 0;
+ Control.vector4_u32[3] = (V1.vector4_u32[3] != V2.vector4_u32[3]) ? 0xFFFFFFFFU : 0;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vmvnq_u32(vceqq_u32(V1, V2));
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_cmpeq_epi32( _mm_castps_si128(V1),_mm_castps_si128(V2) );
+ return _mm_xor_ps(reinterpret_cast<__m128 *>(&V)[0],g_XMNegOneMask);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorGreater
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vcgtq_f32( V1, V2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_cmpgt_ps( V1, V2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline XMVECTOR XMVectorGreaterR
+(
+ uint32_t* pCR,
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+ assert( pCR != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+
+ uint32_t ux = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
+ uint32_t uy = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
+ uint32_t uz = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
+ uint32_t uw = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
+ uint32_t CR = 0;
+ if (ux&uy&uz&uw)
+ {
+ // All elements are greater
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!(ux|uy|uz|uw))
+ {
+ // All elements are not greater
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ *pCR = CR;
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = ux;
+ Control.vector4_u32[1] = uy;
+ Control.vector4_u32[2] = uz;
+ Control.vector4_u32[3] = uw;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcgtq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFFFU )
+ {
+ // All elements are greater
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ // All elements are not greater
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ *pCR = CR;
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
+ uint32_t CR = 0;
+ int iTest = _mm_movemask_ps(vTemp);
+ if (iTest==0xf)
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!iTest)
+ {
+ // All elements are not greater
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ *pCR = CR;
+ return vTemp;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorGreaterOrEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[1] = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[2] = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[3] = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vcgeq_f32( V1, V2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_cmpge_ps( V1, V2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline XMVECTOR XMVectorGreaterOrEqualR
+(
+ uint32_t* pCR,
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+ assert( pCR != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+
+ uint32_t ux = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
+ uint32_t uy = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
+ uint32_t uz = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
+ uint32_t uw = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
+ uint32_t CR = 0;
+ if (ux&uy&uz&uw)
+ {
+ // All elements are greater
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!(ux|uy|uz|uw))
+ {
+ // All elements are not greater
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ *pCR = CR;
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = ux;
+ Control.vector4_u32[1] = uy;
+ Control.vector4_u32[2] = uz;
+ Control.vector4_u32[3] = uw;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcgeq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFFFU )
+ {
+ // All elements are greater or equal
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ // All elements are not greater or equal
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ *pCR = CR;
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
+ uint32_t CR = 0;
+ int iTest = _mm_movemask_ps(vTemp);
+ if (iTest==0xf)
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!iTest)
+ {
+ // All elements are not greater
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ *pCR = CR;
+ return vTemp;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorLess
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vcltq_f32( V1, V2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_cmplt_ps( V1, V2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorLessOrEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = (V1.vector4_f32[0] <= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[1] = (V1.vector4_f32[1] <= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[2] = (V1.vector4_f32[2] <= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[3] = (V1.vector4_f32[3] <= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vcleq_f32( V1, V2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_cmple_ps( V1, V2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorInBounds
+(
+ FXMVECTOR V,
+ FXMVECTOR Bounds
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[1] = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[2] = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFF : 0;
+ Control.vector4_u32[3] = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFF : 0;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Test if less than or equal
+ XMVECTOR vTemp1 = vcleq_f32(V,Bounds);
+ // Negate the bounds
+ XMVECTOR vTemp2 = vnegq_f32(Bounds);
+ // Test if greater or equal (Reversed)
+ vTemp2 = vcleq_f32(vTemp2,V);
+ // Blend answers
+ vTemp1 = vandq_u32(vTemp1,vTemp2);
+ return vTemp1;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Test if less than or equal
+ XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
+ // Negate the bounds
+ XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
+ // Test if greater or equal (Reversed)
+ vTemp2 = _mm_cmple_ps(vTemp2,V);
+ // Blend answers
+ vTemp1 = _mm_and_ps(vTemp1,vTemp2);
+ return vTemp1;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline XMVECTOR XMVectorInBoundsR
+(
+ uint32_t* pCR,
+ FXMVECTOR V,
+ FXMVECTOR Bounds
+)
+{
+ assert( pCR != NULL );
+#if defined(_XM_NO_INTRINSICS_)
+
+ uint32_t ux = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
+ uint32_t uy = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
+ uint32_t uz = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
+ uint32_t uw = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
+
+ uint32_t CR = 0;
+ if (ux&uy&uz&uw)
+ {
+ // All elements are in bounds
+ CR = XM_CRMASK_CR6BOUNDS;
+ }
+ *pCR = CR;
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = ux;
+ Control.vector4_u32[1] = uy;
+ Control.vector4_u32[2] = uz;
+ Control.vector4_u32[3] = uw;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Test if less than or equal
+ XMVECTOR vTemp1 = vcleq_f32(V,Bounds);
+ // Negate the bounds
+ XMVECTOR vTemp2 = vnegq_f32(Bounds);
+ // Test if greater or equal (Reversed)
+ vTemp2 = vcleq_f32(vTemp2,V);
+ // Blend answers
+ vTemp1 = vandq_u32(vTemp1,vTemp2);
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFFFU )
+ {
+ // All elements are in bounds
+ CR = XM_CRMASK_CR6BOUNDS;
+ }
+ *pCR = CR;
+ return vTemp1;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Test if less than or equal
+ XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
+ // Negate the bounds
+ XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
+ // Test if greater or equal (Reversed)
+ vTemp2 = _mm_cmple_ps(vTemp2,V);
+ // Blend answers
+ vTemp1 = _mm_and_ps(vTemp1,vTemp2);
+
+ uint32_t CR = 0;
+ if (_mm_movemask_ps(vTemp1)==0xf) {
+ // All elements are in bounds
+ CR = XM_CRMASK_CR6BOUNDS;
+ }
+ *pCR = CR;
+ return vTemp1;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorIsNaN
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = XMISNAN(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
+ Control.vector4_u32[1] = XMISNAN(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
+ Control.vector4_u32[2] = XMISNAN(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
+ Control.vector4_u32[3] = XMISNAN(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Test against itself. NaN is always not equal
+ __n128 vTempNan = vceqq_f32( V, V );
+ // Flip results
+ return vmvnq_u32( vTempNan );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Test against itself. NaN is always not equal
+ return _mm_cmpneq_ps(V,V);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorIsInfinite
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Control;
+ Control.vector4_u32[0] = XMISINF(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
+ Control.vector4_u32[1] = XMISINF(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
+ Control.vector4_u32[2] = XMISINF(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
+ Control.vector4_u32[3] = XMISINF(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
+ return Control;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Mask off the sign bit
+ __n128 vTemp = vandq_u32(V,g_XMAbsMask);
+ // Compare to infinity
+ vTemp = vceqq_f32(vTemp,g_XMInfinity);
+ // If any are infinity, the signs are true.
+ return vTemp;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Mask off the sign bit
+ __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
+ // Compare to infinity
+ vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
+ // If any are infinity, the signs are true.
+ return vTemp;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Rounding and clamping operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorMin
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_f32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0];
+ Result.vector4_f32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1];
+ Result.vector4_f32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2];
+ Result.vector4_f32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3];
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vminq_f32( V1, V2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_min_ps( V1, V2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorMax
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_f32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0];
+ Result.vector4_f32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1];
+ Result.vector4_f32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2];
+ Result.vector4_f32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3];
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vmaxq_f32( V1, V2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_max_ps( V1, V2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorRound
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ const XMVECTOR Zero = XMVectorZero();
+ const XMVECTOR BiasPos = XMVectorReplicate(0.5f);
+ const XMVECTOR BiasNeg = XMVectorReplicate(-0.5f);
+
+ XMVECTOR Bias = XMVectorLess(V, Zero);
+ Bias = XMVectorSelect(BiasPos, BiasNeg, Bias);
+ XMVECTOR Result = XMVectorAdd(V, Bias);
+ Result = XMVectorTruncate(Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vTest = vabsq_f32( V );
+ vTest = vcltq_f32( vTest, g_XMNoFraction );
+
+ __n128 Bias = vcltq_f32( V, vdupq_n_u32(0) );
+
+ __n128 BiasPos = vdupq_n_f32( 0.5f );
+ __n128 BiasNeg = vdupq_n_f32( -0.5f );
+ Bias = vbslq_f32( Bias, BiasNeg, BiasPos );
+ __n128 V0 = vaddq_f32( V, Bias );
+ __n128 vInt = vcvtq_s32_f32( V0 );
+ __n128 vResult = vcvtq_f32_s32( vInt );
+
+ // All numbers less than 8388608 will use the round to int
+ // All others, use the ORIGINAL value
+ return vbslq_f32( vTest, vResult, V );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // To handle NAN, INF and numbers greater than 8388608, use masking
+ // Get the abs value
+ __m128i vTest = _mm_and_si128(_mm_castps_si128(V),g_XMAbsMask);
+ // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
+ vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
+ // Convert to int and back to float for rounding
+ __m128i vInt = _mm_cvtps_epi32(V);
+ // Convert back to floats
+ XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
+ // All numbers less than 8388608 will use the round to int
+ vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
+ // All others, use the ORIGINAL value
+ vTest = _mm_andnot_si128(vTest,_mm_castps_si128(V));
+ vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorTruncate
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ uint32_t i;
+
+ // Avoid C4701
+ Result.vector4_f32[0] = 0.0f;
+
+ for (i = 0; i < 4; i++)
+ {
+ if (XMISNAN(V.vector4_f32[i]))
+ {
+ Result.vector4_u32[i] = 0x7FC00000;
+ }
+ else if (fabsf(V.vector4_f32[i]) < 8388608.0f)
+ {
+ Result.vector4_f32[i] = (float)((int32_t)V.vector4_f32[i]);
+ }
+ else
+ {
+ Result.vector4_f32[i] = V.vector4_f32[i];
+ }
+ }
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vTest = vabsq_f32( V );
+ vTest = vcltq_f32( vTest, g_XMNoFraction );
+
+ __n128 vInt = vcvtq_s32_f32( V );
+ __n128 vResult = vcvtq_f32_s32( vInt );
+
+ // All numbers less than 8388608 will use the round to int
+ // All others, use the ORIGINAL value
+ return vbslq_f32( vTest, vResult, V );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // To handle NAN, INF and numbers greater than 8388608, use masking
+ // Get the abs value
+ __m128i vTest = _mm_and_si128(_mm_castps_si128(V),g_XMAbsMask);
+ // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
+ vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
+ // Convert to int and back to float for rounding with truncation
+ __m128i vInt = _mm_cvttps_epi32(V);
+ // Convert back to floats
+ XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
+ // All numbers less than 8388608 will use the round to int
+ vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
+ // All others, use the ORIGINAL value
+ vTest = _mm_andnot_si128(vTest,_mm_castps_si128(V));
+ vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorFloor
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR vResult = {
+ floorf(V.vector4_f32[0]),
+ floorf(V.vector4_f32[1]),
+ floorf(V.vector4_f32[2]),
+ floorf(V.vector4_f32[3])
+ };
+ return vResult;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 V0 = vsubq_f32( V, vdupq_n_u32(0x3EFFFFA0) );
+ return XMVectorRound(V0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // To handle NAN, INF and numbers greater than 8388608, use masking
+ // Get the abs value
+ __m128i vTest = _mm_and_si128(_mm_castps_si128(V),g_XMAbsMask);
+ // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
+ vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
+ // Convert to int and back to float for rounding
+ XMVECTOR vResult = _mm_sub_ps(V,g_XMOneHalfMinusEpsilon);
+ __m128i vInt = _mm_cvtps_epi32(vResult);
+ // Convert back to floats
+ vResult = _mm_cvtepi32_ps(vInt);
+ // All numbers less than 8388608 will use the round to int
+ vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
+ // All others, use the ORIGINAL value
+ vTest = _mm_andnot_si128(vTest,_mm_castps_si128(V));
+ vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorCeiling
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult = {
+ ceilf(V.vector4_f32[0]),
+ ceilf(V.vector4_f32[1]),
+ ceilf(V.vector4_f32[2]),
+ ceilf(V.vector4_f32[3])
+ };
+ return vResult;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 V0 = vaddq_f32( V, vdupq_n_u32(0x3EFFFFA0) );
+ return XMVectorRound(V0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // To handle NAN, INF and numbers greater than 8388608, use masking
+ // Get the abs value
+ __m128i vTest = _mm_and_si128(_mm_castps_si128(V),g_XMAbsMask);
+ // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
+ vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
+ // Convert to int and back to float for rounding
+ XMVECTOR vResult = _mm_add_ps(V,g_XMOneHalfMinusEpsilon);
+ __m128i vInt = _mm_cvtps_epi32(vResult);
+ // Convert back to floats
+ vResult = _mm_cvtepi32_ps(vInt);
+ // All numbers less than 8388608 will use the round to int
+ vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
+ // All others, use the ORIGINAL value
+ vTest = _mm_andnot_si128(vTest,_mm_castps_si128(V));
+ vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorClamp
+(
+ FXMVECTOR V,
+ FXMVECTOR Min,
+ FXMVECTOR Max
+)
+{
+ assert(XMVector4LessOrEqual(Min, Max));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result = XMVectorMax(Min, V);
+ Result = XMVectorMin(Max, Result);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR vResult;
+ vResult = vmaxq_f32(Min,V);
+ vResult = vminq_f32(vResult,Max);
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult;
+ vResult = _mm_max_ps(Min,V);
+ vResult = _mm_min_ps(vResult,Max);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorSaturate
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ const XMVECTOR Zero = XMVectorZero();
+
+ return XMVectorClamp(V, Zero, g_XMOne.v);
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Set <0 to 0
+ XMVECTOR vResult = vmaxq_f32(V, vdupq_n_u32(0) );
+ // Set>1 to 1
+ return vminq_f32(vResult, vdupq_n_f32(1.0f) );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Set <0 to 0
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ // Set>1 to 1
+ return _mm_min_ps(vResult,g_XMOne);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Bitwise logical operations
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorAndInt
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_u32[0] = V1.vector4_u32[0] & V2.vector4_u32[0];
+ Result.vector4_u32[1] = V1.vector4_u32[1] & V2.vector4_u32[1];
+ Result.vector4_u32[2] = V1.vector4_u32[2] & V2.vector4_u32[2];
+ Result.vector4_u32[3] = V1.vector4_u32[3] & V2.vector4_u32[3];
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vandq_u32(V1,V2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_and_ps(V1,V2);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorAndCInt
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_u32[0] = V1.vector4_u32[0] & ~V2.vector4_u32[0];
+ Result.vector4_u32[1] = V1.vector4_u32[1] & ~V2.vector4_u32[1];
+ Result.vector4_u32[2] = V1.vector4_u32[2] & ~V2.vector4_u32[2];
+ Result.vector4_u32[3] = V1.vector4_u32[3] & ~V2.vector4_u32[3];
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vbicq_u32(V1,V2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_andnot_si128( _mm_castps_si128(V2), _mm_castps_si128(V1) );
+ return reinterpret_cast<__m128 *>(&V)[0];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorOrInt
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_u32[0] = V1.vector4_u32[0] | V2.vector4_u32[0];
+ Result.vector4_u32[1] = V1.vector4_u32[1] | V2.vector4_u32[1];
+ Result.vector4_u32[2] = V1.vector4_u32[2] | V2.vector4_u32[2];
+ Result.vector4_u32[3] = V1.vector4_u32[3] | V2.vector4_u32[3];
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vorrq_u32(V1,V2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_or_si128( _mm_castps_si128(V1), _mm_castps_si128(V2) );
+ return reinterpret_cast<__m128 *>(&V)[0];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorNorInt
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_u32[0] = ~(V1.vector4_u32[0] | V2.vector4_u32[0]);
+ Result.vector4_u32[1] = ~(V1.vector4_u32[1] | V2.vector4_u32[1]);
+ Result.vector4_u32[2] = ~(V1.vector4_u32[2] | V2.vector4_u32[2]);
+ Result.vector4_u32[3] = ~(V1.vector4_u32[3] | V2.vector4_u32[3]);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 Result = vorrq_u32(V1,V2);
+ return vbicq_u32(g_XMNegOneMask, Result);
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i Result;
+ Result = _mm_or_si128( _mm_castps_si128(V1), _mm_castps_si128(V2) );
+ Result = _mm_andnot_si128( Result,g_XMNegOneMask);
+ return reinterpret_cast<__m128 *>(&Result)[0];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorXorInt
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_u32[0] = V1.vector4_u32[0] ^ V2.vector4_u32[0];
+ Result.vector4_u32[1] = V1.vector4_u32[1] ^ V2.vector4_u32[1];
+ Result.vector4_u32[2] = V1.vector4_u32[2] ^ V2.vector4_u32[2];
+ Result.vector4_u32[3] = V1.vector4_u32[3] ^ V2.vector4_u32[3];
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return veorq_u32(V1,V2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i V = _mm_xor_si128( _mm_castps_si128(V1), _mm_castps_si128(V2) );
+ return reinterpret_cast<__m128 *>(&V)[0];
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorNegate
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_f32[0] = -V.vector4_f32[0];
+ Result.vector4_f32[1] = -V.vector4_f32[1];
+ Result.vector4_f32[2] = -V.vector4_f32[2];
+ Result.vector4_f32[3] = -V.vector4_f32[3];
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vnegq_f32(V);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR Z;
+
+ Z = _mm_setzero_ps();
+
+ return _mm_sub_ps( Z, V );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorAdd
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_f32[0] = V1.vector4_f32[0] + V2.vector4_f32[0];
+ Result.vector4_f32[1] = V1.vector4_f32[1] + V2.vector4_f32[1];
+ Result.vector4_f32[2] = V1.vector4_f32[2] + V2.vector4_f32[2];
+ Result.vector4_f32[3] = V1.vector4_f32[3] + V2.vector4_f32[3];
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vaddq_f32( V1, V2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_add_ps( V1, V2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorAddAngles
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ const XMVECTOR Zero = XMVectorZero();
+
+ // Add the given angles together. If the range of V1 is such
+ // that -Pi <= V1 < Pi and the range of V2 is such that
+ // -2Pi <= V2 <= 2Pi, then the range of the resulting angle
+ // will be -Pi <= Result < Pi.
+ XMVECTOR Result = XMVectorAdd(V1, V2);
+
+ XMVECTOR Mask = XMVectorLess(Result, g_XMNegativePi.v);
+ XMVECTOR Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask);
+
+ Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v);
+ Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask);
+
+ Result = XMVectorAdd(Result, Offset);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Adjust the angles
+ __n128 vResult = vaddq_f32(V1,V2);
+ // Less than Pi?
+ __n128 vOffset = vcltq_f32(vResult,g_XMNegativePi);
+ vOffset = vandq_u32(vOffset,g_XMTwoPi);
+ // Add 2Pi to all entries less than -Pi
+ vResult = vaddq_f32(vResult,vOffset);
+ // Greater than or equal to Pi?
+ vOffset = vcgeq_f32(vResult,g_XMPi);
+ vOffset = vandq_u32(vOffset,g_XMTwoPi);
+ // Sub 2Pi to all entries greater than Pi
+ vResult = vsubq_f32(vResult,vOffset);
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Adjust the angles
+ XMVECTOR vResult = _mm_add_ps(V1,V2);
+ // Less than Pi?
+ XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi);
+ vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
+ // Add 2Pi to all entries less than -Pi
+ vResult = _mm_add_ps(vResult,vOffset);
+ // Greater than or equal to Pi?
+ vOffset = _mm_cmpge_ps(vResult,g_XMPi);
+ vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
+ // Sub 2Pi to all entries greater than Pi
+ vResult = _mm_sub_ps(vResult,vOffset);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorSubtract
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_f32[0] = V1.vector4_f32[0] - V2.vector4_f32[0];
+ Result.vector4_f32[1] = V1.vector4_f32[1] - V2.vector4_f32[1];
+ Result.vector4_f32[2] = V1.vector4_f32[2] - V2.vector4_f32[2];
+ Result.vector4_f32[3] = V1.vector4_f32[3] - V2.vector4_f32[3];
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vsubq_f32( V1, V2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_sub_ps( V1, V2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorSubtractAngles
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ const XMVECTOR Zero = XMVectorZero();
+
+ // Subtract the given angles. If the range of V1 is such
+ // that -Pi <= V1 < Pi and the range of V2 is such that
+ // -2Pi <= V2 <= 2Pi, then the range of the resulting angle
+ // will be -Pi <= Result < Pi.
+ XMVECTOR Result = XMVectorSubtract(V1, V2);
+
+ XMVECTOR Mask = XMVectorLess(Result, g_XMNegativePi.v);
+ XMVECTOR Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask);
+
+ Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v);
+ Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask);
+
+ Result = XMVectorAdd(Result, Offset);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Adjust the angles
+ __n128 vResult = vsubq_f32(V1,V2);
+ // Less than Pi?
+ __n128 vOffset = vcltq_f32(vResult,g_XMNegativePi);
+ vOffset = vandq_u32(vOffset,g_XMTwoPi);
+ // Add 2Pi to all entries less than -Pi
+ vResult = vaddq_f32(vResult,vOffset);
+ // Greater than or equal to Pi?
+ vOffset = vcgeq_f32(vResult,g_XMPi);
+ vOffset = vandq_u32(vOffset,g_XMTwoPi);
+ // Sub 2Pi to all entries greater than Pi
+ vResult = vsubq_f32(vResult,vOffset);
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Adjust the angles
+ XMVECTOR vResult = _mm_sub_ps(V1,V2);
+ // Less than Pi?
+ XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi);
+ vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
+ // Add 2Pi to all entries less than -Pi
+ vResult = _mm_add_ps(vResult,vOffset);
+ // Greater than or equal to Pi?
+ vOffset = _mm_cmpge_ps(vResult,g_XMPi);
+ vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
+ // Sub 2Pi to all entries greater than Pi
+ vResult = _mm_sub_ps(vResult,vOffset);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorMultiply
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result = {
+ V1.vector4_f32[0] * V2.vector4_f32[0],
+ V1.vector4_f32[1] * V2.vector4_f32[1],
+ V1.vector4_f32[2] * V2.vector4_f32[2],
+ V1.vector4_f32[3] * V2.vector4_f32[3]
+ };
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vmulq_f32( V1, V2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_mul_ps( V1, V2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorMultiplyAdd
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ FXMVECTOR V3
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult = {
+ (V1.vector4_f32[0] * V2.vector4_f32[0]) + V3.vector4_f32[0],
+ (V1.vector4_f32[1] * V2.vector4_f32[1]) + V3.vector4_f32[1],
+ (V1.vector4_f32[2] * V2.vector4_f32[2]) + V3.vector4_f32[2],
+ (V1.vector4_f32[3] * V2.vector4_f32[3]) + V3.vector4_f32[3]
+ };
+ return vResult;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vmlaq_f32( V3, V1, V2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = _mm_mul_ps( V1, V2 );
+ return _mm_add_ps(vResult, V3 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorDivide
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = V1.vector4_f32[0] / V2.vector4_f32[0];
+ Result.vector4_f32[1] = V1.vector4_f32[1] / V2.vector4_f32[1];
+ Result.vector4_f32[2] = V1.vector4_f32[2] / V2.vector4_f32[2];
+ Result.vector4_f32[3] = V1.vector4_f32[3] / V2.vector4_f32[3];
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // 2 iterations of Newton-Raphson refinement of reciprocal
+ __n128 Reciprocal = vrecpeq_f32(V2);
+ __n128 S = vrecpsq_f32( Reciprocal, V2 );
+ Reciprocal = vmulq_f32( S, Reciprocal );
+ S = vrecpsq_f32( Reciprocal, V2 );
+ Reciprocal = vmulq_f32( S, Reciprocal );
+ return vmulq_f32( V1, Reciprocal );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_div_ps( V1, V2 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorNegativeMultiplySubtract
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ FXMVECTOR V3
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR vResult = {
+ V3.vector4_f32[0] - (V1.vector4_f32[0] * V2.vector4_f32[0]),
+ V3.vector4_f32[1] - (V1.vector4_f32[1] * V2.vector4_f32[1]),
+ V3.vector4_f32[2] - (V1.vector4_f32[2] * V2.vector4_f32[2]),
+ V3.vector4_f32[3] - (V1.vector4_f32[3] * V2.vector4_f32[3])
+ };
+ return vResult;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vmlsq_f32( V3, V1, V2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR R = _mm_mul_ps( V1, V2 );
+ return _mm_sub_ps( V3, R );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorScale
+(
+ FXMVECTOR V,
+ float ScaleFactor
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult = {
+ V.vector4_f32[0] * ScaleFactor,
+ V.vector4_f32[1] * ScaleFactor,
+ V.vector4_f32[2] * ScaleFactor,
+ V.vector4_f32[3] * ScaleFactor
+ };
+ return vResult;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vmulq_n_f32( V, ScaleFactor );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = _mm_set_ps1(ScaleFactor);
+ return _mm_mul_ps(vResult,V);
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorReciprocalEst
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = 1.f / V.vector4_f32[0];
+ Result.vector4_f32[1] = 1.f / V.vector4_f32[1];
+ Result.vector4_f32[2] = 1.f / V.vector4_f32[2];
+ Result.vector4_f32[3] = 1.f / V.vector4_f32[3];
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vrecpeq_f32(V);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_rcp_ps(V);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorReciprocal
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = 1.f / V.vector4_f32[0];
+ Result.vector4_f32[1] = 1.f / V.vector4_f32[1];
+ Result.vector4_f32[2] = 1.f / V.vector4_f32[2];
+ Result.vector4_f32[3] = 1.f / V.vector4_f32[3];
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // 2 iterations of Newton-Raphson refinement
+ __n128 Reciprocal = vrecpeq_f32(V);
+ __n128 S = vrecpsq_f32( Reciprocal, V );
+ Reciprocal = vmulq_f32( S, Reciprocal );
+ S = vrecpsq_f32( Reciprocal, V );
+ return vmulq_f32( S, Reciprocal );
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_div_ps(g_XMOne,V);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Return an estimated square root
+inline XMVECTOR XMVectorSqrtEst
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = sqrtf( V.vector4_f32[0] );
+ Result.vector4_f32[1] = sqrtf( V.vector4_f32[1] );
+ Result.vector4_f32[2] = sqrtf( V.vector4_f32[2] );
+ Result.vector4_f32[3] = sqrtf( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // 1 iteration of Newton-Raphson refinment of sqrt
+ __n128 S0 = vrsqrteq_f32(V);
+ __n128 P0 = vmulq_f32( V, S0 );
+ __n128 R0 = vrsqrtsq_f32( P0, S0 );
+ __n128 S1 = vmulq_f32( S0, R0 );
+
+ XMVECTOR VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v);
+ XMVECTOR VEqualsZero = XMVectorEqual(V, vdupq_n_f32(0) );
+ __n128 Result = vmulq_f32( V, S1 );
+ XMVECTOR Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
+ return XMVectorSelect(V, Result, Select);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_sqrt_ps(V);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorSqrt
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = sqrtf( V.vector4_f32[0] );
+ Result.vector4_f32[1] = sqrtf( V.vector4_f32[1] );
+ Result.vector4_f32[2] = sqrtf( V.vector4_f32[2] );
+ Result.vector4_f32[3] = sqrtf( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // 3 iterations of Newton-Raphson refinment of sqrt
+ __n128 S0 = vrsqrteq_f32(V);
+ __n128 P0 = vmulq_f32( V, S0 );
+ __n128 R0 = vrsqrtsq_f32( P0, S0 );
+ __n128 S1 = vmulq_f32( S0, R0 );
+ __n128 P1 = vmulq_f32( V, S1 );
+ __n128 R1 = vrsqrtsq_f32( P1, S1 );
+ __n128 S2 = vmulq_f32( S1, R1 );
+ __n128 P2 = vmulq_f32( V, S2 );
+ __n128 R2 = vrsqrtsq_f32( P2, S2 );
+ __n128 S3 = vmulq_f32( S2, R2 );
+
+ XMVECTOR VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v);
+ XMVECTOR VEqualsZero = XMVectorEqual(V, vdupq_n_f32(0) );
+ __n128 Result = vmulq_f32( V, S3 );
+ XMVECTOR Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
+ return XMVectorSelect(V, Result, Select);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_sqrt_ps(V);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorReciprocalSqrtEst
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = 1.f / sqrtf( V.vector4_f32[0] );
+ Result.vector4_f32[1] = 1.f / sqrtf( V.vector4_f32[1] );
+ Result.vector4_f32[2] = 1.f / sqrtf( V.vector4_f32[2] );
+ Result.vector4_f32[3] = 1.f / sqrtf( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vrsqrteq_f32(V);
+#elif defined(_XM_SSE_INTRINSICS_)
+ return _mm_rsqrt_ps(V);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorReciprocalSqrt
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = 1.f / sqrtf( V.vector4_f32[0] );
+ Result.vector4_f32[1] = 1.f / sqrtf( V.vector4_f32[1] );
+ Result.vector4_f32[2] = 1.f / sqrtf( V.vector4_f32[2] );
+ Result.vector4_f32[3] = 1.f / sqrtf( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // 2 iterations of Newton-Raphson refinement of reciprocal
+ __n128 S0 = vrsqrteq_f32(V);
+
+ __n128 P0 = vmulq_f32( V, S0 );
+ __n128 R0 = vrsqrtsq_f32( P0, S0 );
+
+ __n128 S1 = vmulq_f32( S0, R0 );
+ __n128 P1 = vmulq_f32( V, S1 );
+ __n128 R1 = vrsqrtsq_f32( P1, S1 );
+
+ return vmulq_f32( S1, R1 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = _mm_sqrt_ps(V);
+ vResult = _mm_div_ps(g_XMOne,vResult);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorExp
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_f32[0] = powf(2.0f, V.vector4_f32[0]);
+ Result.vector4_f32[1] = powf(2.0f, V.vector4_f32[1]);
+ Result.vector4_f32[2] = powf(2.0f, V.vector4_f32[2]);
+ Result.vector4_f32[3] = powf(2.0f, V.vector4_f32[3]);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ powf(2.0f,vgetq_lane_f32(V, 0)),
+ powf(2.0f,vgetq_lane_f32(V, 1)),
+ powf(2.0f,vgetq_lane_f32(V, 2)),
+ powf(2.0f,vgetq_lane_f32(V, 3))
+ };
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ __declspec(align(16)) float a[4];
+ _mm_store_ps( a, V );
+ XMVECTOR vResult = _mm_setr_ps(
+ powf(2.0f,a[0]),
+ powf(2.0f,a[1]),
+ powf(2.0f,a[2]),
+ powf(2.0f,a[3]));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorLog
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ const float fScale = 1.4426950f; // (1.0f / logf(2.0f));
+
+ XMVECTOR Result;
+ Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale;
+ Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale;
+ Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale;
+ Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale;
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR vScale = vdupq_n_f32(1.0f / logf(2.0f));
+ XMVECTORF32 vResult = {
+ logf(vgetq_lane_f32(V, 0)),
+ logf(vgetq_lane_f32(V, 1)),
+ logf(vgetq_lane_f32(V, 2)),
+ logf(vgetq_lane_f32(V, 3))
+ };
+ return vmulq_f32( vResult, vScale );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __declspec(align(16)) float a[4];
+ _mm_store_ps( a, V );
+ XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f));
+ XMVECTOR vResult = _mm_setr_ps(
+ logf(a[0]),
+ logf(a[1]),
+ logf(a[2]),
+ logf(a[3]));
+ vResult = _mm_mul_ps(vResult,vScale);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorPow
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_f32[0] = powf(V1.vector4_f32[0], V2.vector4_f32[0]);
+ Result.vector4_f32[1] = powf(V1.vector4_f32[1], V2.vector4_f32[1]);
+ Result.vector4_f32[2] = powf(V1.vector4_f32[2], V2.vector4_f32[2]);
+ Result.vector4_f32[3] = powf(V1.vector4_f32[3], V2.vector4_f32[3]);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ powf(vgetq_lane_f32(V1, 0), vgetq_lane_f32(V2, 0)),
+ powf(vgetq_lane_f32(V1, 1), vgetq_lane_f32(V2, 1)),
+ powf(vgetq_lane_f32(V1, 2), vgetq_lane_f32(V2, 2)),
+ powf(vgetq_lane_f32(V1, 3), vgetq_lane_f32(V2, 3))
+ };
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ __declspec(align(16)) float a[4];
+ __declspec(align(16)) float b[4];
+ _mm_store_ps( a, V1 );
+ _mm_store_ps( b, V2 );
+ XMVECTOR vResult = _mm_setr_ps(
+ powf(a[0],b[0]),
+ powf(a[1],b[1]),
+ powf(a[2],b[2]),
+ powf(a[3],b[3]));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorAbs
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult = {
+ fabsf(V.vector4_f32[0]),
+ fabsf(V.vector4_f32[1]),
+ fabsf(V.vector4_f32[2]),
+ fabsf(V.vector4_f32[3])
+ };
+ return vResult;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ return vabsq_f32( V );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = _mm_setzero_ps();
+ vResult = _mm_sub_ps(vResult,V);
+ vResult = _mm_max_ps(vResult,V);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorMod
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+ // V1 % V2 = V1 - V2 * truncate(V1 / V2)
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Quotient = XMVectorDivide(V1, V2);
+ Quotient = XMVectorTruncate(Quotient);
+ XMVECTOR Result = XMVectorNegativeMultiplySubtract(V2, Quotient, V1);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR vResult = XMVectorDivide(V1, V2);
+ vResult = XMVectorTruncate(vResult);
+ return vmlsq_f32( V1, vResult, V2 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = _mm_div_ps(V1, V2);
+ vResult = XMVectorTruncate(vResult);
+ vResult = _mm_mul_ps(vResult,V2);
+ vResult = _mm_sub_ps(V1,vResult);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorModAngles
+(
+ FXMVECTOR Angles
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR V;
+ XMVECTOR Result;
+
+ // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
+ V = XMVectorMultiply(Angles, g_XMReciprocalTwoPi.v);
+ V = XMVectorRound(V);
+ Result = XMVectorNegativeMultiplySubtract(g_XMTwoPi.v, V, Angles);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
+ XMVECTOR vResult = vmulq_f32(Angles,g_XMReciprocalTwoPi);
+ // Use the inline function due to complexity for rounding
+ vResult = XMVectorRound(vResult);
+ return vmlsq_f32( Angles, vResult, g_XMTwoPi );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
+ XMVECTOR vResult = _mm_mul_ps(Angles,g_XMReciprocalTwoPi);
+ // Use the inline function due to complexity for rounding
+ vResult = XMVectorRound(vResult);
+ vResult = _mm_mul_ps(vResult,g_XMTwoPi);
+ vResult = _mm_sub_ps(Angles,vResult);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorSin
+(
+ FXMVECTOR V
+)
+{
+ // 11-degree minimax approximation
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = XMScalarSin( V.vector4_f32[0] );
+ Result.vector4_f32[1] = XMScalarSin( V.vector4_f32[1] );
+ Result.vector4_f32[2] = XMScalarSin( V.vector4_f32[2] );
+ Result.vector4_f32[3] = XMScalarSin( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Force the value within the bounds of pi
+ XMVECTOR x = XMVectorModAngles(V);
+
+ // Map in [-pi/2,pi/2] with sin(y) = sin(x).
+ __n128 sign = vandq_u32(x, g_XMNegativeZero);
+ __n128 c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0
+ __n128 absx = vabsq_f32( x );
+ __n128 rflx = vsubq_f32(c, x);
+ __n128 comp = vcleq_f32(absx, g_XMHalfPi);
+ x = vbslq_f32( comp, x, rflx );
+
+ __n128 x2 = vmulq_f32(x, x);
+
+ // Compute polynomial approximation
+ const XMVECTOR SC1 = g_XMSinCoefficients1;
+ XMVECTOR Result = vdupq_lane_f32(vget_low_f32(SC1), 0);
+
+ const XMVECTOR SC0 = g_XMSinCoefficients0;
+ XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(SC0), 1);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_high_f32(SC0), 0);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_low_f32(SC0), 1);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_low_f32(SC0), 0);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ Result = vmlaq_f32(g_XMOne, Result, x2);
+ Result = vmulq_f32(Result, x);
+ return Result;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Force the value within the bounds of pi
+ XMVECTOR x = XMVectorModAngles(V);
+
+ // Map in [-pi/2,pi/2] with sin(y) = sin(x).
+ __m128 sign = _mm_and_ps(x, g_XMNegativeZero);
+ __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0
+ __m128 absx = _mm_andnot_ps(sign, x); // |x|
+ __m128 rflx = _mm_sub_ps(c, x);
+ __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi);
+ __m128 select0 = _mm_and_ps(comp, x);
+ __m128 select1 = _mm_andnot_ps(comp, rflx);
+ x = _mm_or_ps(select0, select1);
+
+ __m128 x2 = _mm_mul_ps(x, x);
+
+ // Compute polynomial approximation
+ const XMVECTOR SC1 = g_XMSinCoefficients1;
+ XMVECTOR vConstants = XM_PERMUTE_PS( SC1, _MM_SHUFFLE(0, 0, 0, 0) );
+ __m128 Result = _mm_mul_ps(vConstants, x2);
+
+ const XMVECTOR SC0 = g_XMSinCoefficients0;
+ vConstants = XM_PERMUTE_PS( SC0, _MM_SHUFFLE(3, 3, 3, 3) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( SC0, _MM_SHUFFLE(2, 2, 2, 2) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( SC0, _MM_SHUFFLE(1, 1, 1, 1) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( SC0, _MM_SHUFFLE(0, 0, 0, 0) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+ Result = _mm_add_ps(Result, g_XMOne);
+ Result = _mm_mul_ps(Result, x);
+ return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorCos
+(
+ FXMVECTOR V
+)
+{
+ // 10-degree minimax approximation
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = XMScalarCos( V.vector4_f32[0] );
+ Result.vector4_f32[1] = XMScalarCos( V.vector4_f32[1] );
+ Result.vector4_f32[2] = XMScalarCos( V.vector4_f32[2] );
+ Result.vector4_f32[3] = XMScalarCos( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Map V to x in [-pi,pi].
+ XMVECTOR x = XMVectorModAngles(V);
+
+ // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x).
+ __n128 sign = vandq_u32(x, g_XMNegativeZero);
+ __n128 c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0
+ __n128 absx = vabsq_f32( x );
+ __n128 rflx = vsubq_f32(c, x);
+ __n128 comp = vcleq_f32(absx, g_XMHalfPi);
+ x = vbslq_f32( comp, x, rflx );
+ sign = vbslq_f32( comp, g_XMOne, g_XMNegativeOne );
+
+ __n128 x2 = vmulq_f32(x, x);
+
+ // Compute polynomial approximation
+ const XMVECTOR CC1 = g_XMCosCoefficients1;
+ XMVECTOR Result = vdupq_lane_f32(vget_low_f32(CC1), 0);
+
+ const XMVECTOR CC0 = g_XMCosCoefficients0;
+ XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(CC0), 1);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_high_f32(CC0), 0);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_low_f32(CC0), 1);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_low_f32(CC0), 0);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ Result = vmlaq_f32(g_XMOne, Result, x2);
+ Result = vmulq_f32(Result, sign);
+ return Result;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Map V to x in [-pi,pi].
+ XMVECTOR x = XMVectorModAngles(V);
+
+ // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x).
+ XMVECTOR sign = _mm_and_ps(x, g_XMNegativeZero);
+ __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0
+ __m128 absx = _mm_andnot_ps(sign, x); // |x|
+ __m128 rflx = _mm_sub_ps(c, x);
+ __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi);
+ __m128 select0 = _mm_and_ps(comp, x);
+ __m128 select1 = _mm_andnot_ps(comp, rflx);
+ x = _mm_or_ps(select0, select1);
+ select0 = _mm_and_ps(comp, g_XMOne);
+ select1 = _mm_andnot_ps(comp, g_XMNegativeOne);
+ sign = _mm_or_ps(select0, select1);
+
+ __m128 x2 = _mm_mul_ps(x, x);
+
+ // Compute polynomial approximation
+ const XMVECTOR CC1 = g_XMCosCoefficients1;
+ XMVECTOR vConstants = XM_PERMUTE_PS( CC1, _MM_SHUFFLE(0, 0, 0, 0) );
+ __m128 Result = _mm_mul_ps(vConstants, x2);
+
+ const XMVECTOR CC0 = g_XMCosCoefficients0;
+ vConstants = XM_PERMUTE_PS( CC0, _MM_SHUFFLE(3, 3, 3, 3) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( CC0, _MM_SHUFFLE(2, 2, 2, 2) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( CC0, _MM_SHUFFLE(1, 1, 1, 1) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( CC0, _MM_SHUFFLE(0, 0, 0, 0) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+ Result = _mm_add_ps(Result, g_XMOne);
+ Result = _mm_mul_ps(Result, sign);
+ return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline void XMVectorSinCos
+(
+ XMVECTOR* pSin,
+ XMVECTOR* pCos,
+ FXMVECTOR V
+)
+{
+ assert(pSin != NULL);
+ assert(pCos != NULL);
+
+ // 11/10-degree minimax approximation
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Sin;
+ XMVECTOR Cos;
+
+ XMScalarSinCos(&Sin.vector4_f32[0], &Cos.vector4_f32[0], V.vector4_f32[0]);
+ XMScalarSinCos(&Sin.vector4_f32[1], &Cos.vector4_f32[1], V.vector4_f32[1]);
+ XMScalarSinCos(&Sin.vector4_f32[2], &Cos.vector4_f32[2], V.vector4_f32[2]);
+ XMScalarSinCos(&Sin.vector4_f32[3], &Cos.vector4_f32[3], V.vector4_f32[3]);
+
+ *pSin = Sin;
+ *pCos = Cos;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Force the value within the bounds of pi
+ XMVECTOR x = XMVectorModAngles(V);
+
+ // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x).
+ __n128 sign = vandq_u32(x, g_XMNegativeZero);
+ __n128 c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0
+ __n128 absx = vabsq_f32( x );
+ __n128 rflx = vsubq_f32(c, x);
+ __n128 comp = vcleq_f32(absx, g_XMHalfPi);
+ x = vbslq_f32( comp, x, rflx );
+ sign = vbslq_f32( comp, g_XMOne, g_XMNegativeOne );
+
+ __n128 x2 = vmulq_f32(x, x);
+
+ // Compute polynomial approximation for sine
+ const XMVECTOR SC1 = g_XMSinCoefficients1;
+ XMVECTOR Result = vdupq_lane_f32(vget_low_f32(SC1), 0);
+
+ const XMVECTOR SC0 = g_XMSinCoefficients0;
+ XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(SC0), 1);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_high_f32(SC0), 0);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_low_f32(SC0), 1);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_low_f32(SC0), 0);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ Result = vmlaq_f32(g_XMOne, Result, x2);
+ *pSin = vmulq_f32(Result, x);
+
+ // Compute polynomial approximation for cosine
+ const XMVECTOR CC1 = g_XMCosCoefficients1;
+ Result = vdupq_lane_f32(vget_low_f32(CC1), 0);
+
+ const XMVECTOR CC0 = g_XMCosCoefficients0;
+ vConstants = vdupq_lane_f32(vget_high_f32(CC0), 1);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_high_f32(CC0), 0);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_low_f32(CC0), 1);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_low_f32(CC0), 0);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ Result = vmlaq_f32(g_XMOne, Result, x2);
+ *pCos = vmulq_f32(Result, sign);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Force the value within the bounds of pi
+ XMVECTOR x = XMVectorModAngles(V);
+
+ // Map in [-pi/2,pi/2] with sin(y) = sin(x), cos(y) = sign*cos(x).
+ XMVECTOR sign = _mm_and_ps(x, g_XMNegativeZero);
+ __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0
+ __m128 absx = _mm_andnot_ps(sign, x); // |x|
+ __m128 rflx = _mm_sub_ps(c, x);
+ __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi);
+ __m128 select0 = _mm_and_ps(comp, x);
+ __m128 select1 = _mm_andnot_ps(comp, rflx);
+ x = _mm_or_ps(select0, select1);
+ select0 = _mm_and_ps(comp, g_XMOne);
+ select1 = _mm_andnot_ps(comp, g_XMNegativeOne);
+ sign = _mm_or_ps(select0, select1);
+
+ __m128 x2 = _mm_mul_ps(x, x);
+
+ // Compute polynomial approximation of sine
+ const XMVECTOR SC1 = g_XMSinCoefficients1;
+ XMVECTOR vConstants = XM_PERMUTE_PS( SC1, _MM_SHUFFLE(0, 0, 0, 0) );
+ __m128 Result = _mm_mul_ps(vConstants, x2);
+
+ const XMVECTOR SC0 = g_XMSinCoefficients0;
+ vConstants = XM_PERMUTE_PS( SC0, _MM_SHUFFLE(3, 3, 3, 3) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( SC0, _MM_SHUFFLE(2, 2, 2, 2) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( SC0, _MM_SHUFFLE(1, 1, 1, 1) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( SC0, _MM_SHUFFLE(0, 0, 0, 0) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+ Result = _mm_add_ps(Result, g_XMOne);
+ Result = _mm_mul_ps(Result, x);
+ *pSin = Result;
+
+ // Compute polynomial approximation of cosine
+ const XMVECTOR CC1 = g_XMCosCoefficients1;
+ vConstants = XM_PERMUTE_PS( CC1, _MM_SHUFFLE(0, 0, 0, 0) );
+ Result = _mm_mul_ps(vConstants, x2);
+
+ const XMVECTOR CC0 = g_XMCosCoefficients0;
+ vConstants = XM_PERMUTE_PS( CC0, _MM_SHUFFLE(3, 3, 3, 3) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( CC0, _MM_SHUFFLE(2, 2, 2, 2) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( CC0, _MM_SHUFFLE(1, 1, 1, 1) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( CC0, _MM_SHUFFLE(0, 0, 0, 0) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+ Result = _mm_add_ps(Result, g_XMOne);
+ Result = _mm_mul_ps(Result, sign);
+ *pCos = Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorTan
+(
+ FXMVECTOR V
+)
+{
+ // Cody and Waite algorithm to compute tangent.
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = tanf( V.vector4_f32[0] );
+ Result.vector4_f32[1] = tanf( V.vector4_f32[1] );
+ Result.vector4_f32[2] = tanf( V.vector4_f32[2] );
+ Result.vector4_f32[3] = tanf( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f};
+ static const XMVECTORF32 TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f};
+ static const XMVECTORF32 TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 0.63661977228f /*2 / Pi*/ };
+ static const XMVECTORU32 Mask = {0x1, 0x1, 0x1, 0x1};
+
+ XMVECTOR TwoDivPi = XMVectorSplatW(TanConstants.v);
+
+ XMVECTOR Zero = XMVectorZero();
+
+ XMVECTOR C0 = XMVectorSplatX(TanConstants.v);
+ XMVECTOR C1 = XMVectorSplatY(TanConstants.v);
+ XMVECTOR Epsilon = XMVectorSplatZ(TanConstants.v);
+
+ XMVECTOR VA = XMVectorMultiply(V, TwoDivPi);
+
+ VA = XMVectorRound(VA);
+
+ XMVECTOR VC = XMVectorNegativeMultiplySubtract(VA, C0, V);
+
+ XMVECTOR VB = XMVectorAbs(VA);
+
+ VC = XMVectorNegativeMultiplySubtract(VA, C1, VC);
+
+#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ VB = vcvtq_u32_f32( VB );
+#elif defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ reinterpret_cast<__m128i *>(&VB)[0] = _mm_cvttps_epi32(VB);
+#else
+ for (size_t i = 0; i < 4; i++)
+ {
+ VB.vector4_u32[i] = (uint32_t)VB.vector4_f32[i];
+ }
+#endif
+
+ XMVECTOR VC2 = XMVectorMultiply(VC, VC);
+
+ XMVECTOR T7 = XMVectorSplatW(TanCoefficients1.v);
+ XMVECTOR T6 = XMVectorSplatZ(TanCoefficients1.v);
+ XMVECTOR T4 = XMVectorSplatX(TanCoefficients1.v);
+ XMVECTOR T3 = XMVectorSplatW(TanCoefficients0.v);
+ XMVECTOR T5 = XMVectorSplatY(TanCoefficients1.v);
+ XMVECTOR T2 = XMVectorSplatZ(TanCoefficients0.v);
+ XMVECTOR T1 = XMVectorSplatY(TanCoefficients0.v);
+ XMVECTOR T0 = XMVectorSplatX(TanCoefficients0.v);
+
+ XMVECTOR VBIsEven = XMVectorAndInt(VB, Mask.v);
+ VBIsEven = XMVectorEqualInt(VBIsEven, Zero);
+
+ XMVECTOR N = XMVectorMultiplyAdd(VC2, T7, T6);
+ XMVECTOR D = XMVectorMultiplyAdd(VC2, T4, T3);
+ N = XMVectorMultiplyAdd(VC2, N, T5);
+ D = XMVectorMultiplyAdd(VC2, D, T2);
+ N = XMVectorMultiply(VC2, N);
+ D = XMVectorMultiplyAdd(VC2, D, T1);
+ N = XMVectorMultiplyAdd(VC, N, VC);
+ XMVECTOR VCNearZero = XMVectorInBounds(VC, Epsilon);
+ D = XMVectorMultiplyAdd(VC2, D, T0);
+
+ N = XMVectorSelect(N, VC, VCNearZero);
+ D = XMVectorSelect(D, g_XMOne.v, VCNearZero);
+
+ XMVECTOR R0 = XMVectorNegate(N);
+ XMVECTOR R1 = XMVectorDivide(N,D);
+ R0 = XMVectorDivide(D,R0);
+
+ XMVECTOR VIsZero = XMVectorEqual(V, Zero);
+
+ XMVECTOR Result = XMVectorSelect(R0, R1, VBIsEven);
+
+ Result = XMVectorSelect(Result, Zero, VIsZero);
+
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorSinH
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = sinhf( V.vector4_f32[0] );
+ Result.vector4_f32[1] = sinhf( V.vector4_f32[1] );
+ Result.vector4_f32[2] = sinhf( V.vector4_f32[2] );
+ Result.vector4_f32[3] = sinhf( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
+
+ XMVECTOR V1 = vmlaq_f32( g_XMNegativeOne.v, V, Scale.v );
+ XMVECTOR V2 = vmlsq_f32( g_XMNegativeOne.v, V, Scale.v );
+ XMVECTOR E1 = XMVectorExp(V1);
+ XMVECTOR E2 = XMVectorExp(V2);
+
+ return vsubq_f32(E1, E2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
+
+ XMVECTOR V1 = _mm_mul_ps(V, Scale);
+ V1 = _mm_add_ps(V1,g_XMNegativeOne);
+ XMVECTOR V2 = _mm_mul_ps(V, Scale);
+ V2 = _mm_sub_ps(g_XMNegativeOne,V2);
+ XMVECTOR E1 = XMVectorExp(V1);
+ XMVECTOR E2 = XMVectorExp(V2);
+
+ return _mm_sub_ps(E1, E2);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorCosH
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = coshf( V.vector4_f32[0] );
+ Result.vector4_f32[1] = coshf( V.vector4_f32[1] );
+ Result.vector4_f32[2] = coshf( V.vector4_f32[2] );
+ Result.vector4_f32[3] = coshf( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
+
+ XMVECTOR V1 = vmlaq_f32(g_XMNegativeOne.v, V, Scale.v);
+ XMVECTOR V2 = vmlsq_f32(g_XMNegativeOne.v, V, Scale.v);
+ XMVECTOR E1 = XMVectorExp(V1);
+ XMVECTOR E2 = XMVectorExp(V2);
+ return vaddq_f32(E1, E2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
+
+ XMVECTOR V1 = _mm_mul_ps(V,Scale.v);
+ V1 = _mm_add_ps(V1,g_XMNegativeOne.v);
+ XMVECTOR V2 = _mm_mul_ps(V, Scale.v);
+ V2 = _mm_sub_ps(g_XMNegativeOne.v,V2);
+ XMVECTOR E1 = XMVectorExp(V1);
+ XMVECTOR E2 = XMVectorExp(V2);
+ return _mm_add_ps(E1, E2);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorTanH
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = tanhf( V.vector4_f32[0] );
+ Result.vector4_f32[1] = tanhf( V.vector4_f32[1] );
+ Result.vector4_f32[2] = tanhf( V.vector4_f32[2] );
+ Result.vector4_f32[3] = tanhf( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
+
+ XMVECTOR E = vmulq_f32(V, Scale.v);
+ E = XMVectorExp(E);
+ E = vmlaq_f32( g_XMOneHalf.v, E, g_XMOneHalf.v );
+ E = XMVectorReciprocal(E);
+ return vsubq_f32(g_XMOne.v, E);
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
+
+ XMVECTOR E = _mm_mul_ps(V, Scale.v);
+ E = XMVectorExp(E);
+ E = _mm_mul_ps(E,g_XMOneHalf.v);
+ E = _mm_add_ps(E,g_XMOneHalf.v);
+ E = _mm_div_ps(g_XMOne.v,E);
+ return _mm_sub_ps(g_XMOne.v,E);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorASin
+(
+ FXMVECTOR V
+)
+{
+ // 7-degree minimax approximation
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = XMScalarASin( V.vector4_f32[0] );
+ Result.vector4_f32[1] = XMScalarASin( V.vector4_f32[1] );
+ Result.vector4_f32[2] = XMScalarASin( V.vector4_f32[2] );
+ Result.vector4_f32[3] = XMScalarASin( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 nonnegative = vcgeq_f32(V, g_XMZero);
+ __n128 x = vabsq_f32(V);
+
+ // Compute (1-|V|), clamp to zero to avoid sqrt of negative number.
+ __n128 oneMValue = vsubq_f32(g_XMOne, x);
+ __n128 clampOneMValue = vmaxq_f32(g_XMZero, oneMValue);
+ __n128 root = XMVectorSqrt(clampOneMValue);
+
+ // Compute polynomial approximation
+ const XMVECTOR AC1 = g_XMArcCoefficients1;
+ __n128 t0 = vdupq_lane_f32(vget_high_f32(AC1), 1);
+
+ XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AC1), 0);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(AC1), 1);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(AC1), 0);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ const XMVECTOR AC0 = g_XMArcCoefficients0;
+ vConstants = vdupq_lane_f32(vget_high_f32(AC0), 1);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ vConstants = vdupq_lane_f32(vget_high_f32(AC0), 0);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(AC0), 1);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(AC0), 0);
+ t0 = vmlaq_f32( vConstants, t0, x );
+ t0 = vmulq_f32(t0, root);
+
+ __n128 t1 = vsubq_f32(g_XMPi, t0);
+ t0 = vbslq_f32( nonnegative, t0, t1 );
+ t0 = vsubq_f32(g_XMHalfPi, t0);
+ return t0;
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 nonnegative = _mm_cmpge_ps(V, g_XMZero);
+ __m128 mvalue = _mm_sub_ps(g_XMZero, V);
+ __m128 x = _mm_max_ps(V, mvalue); // |V|
+
+ // Compute (1-|V|), clamp to zero to avoid sqrt of negative number.
+ __m128 oneMValue = _mm_sub_ps(g_XMOne, x);
+ __m128 clampOneMValue = _mm_max_ps(g_XMZero, oneMValue);
+ __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|V|)
+
+ // Compute polynomial approximation
+ const XMVECTOR AC1 = g_XMArcCoefficients1;
+ XMVECTOR vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(3, 3, 3, 3) );
+ __m128 t0 = _mm_mul_ps(vConstants, x);
+
+ vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(2, 2, 2, 2) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(1, 1, 1, 1) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(0, 0, 0, 0) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ const XMVECTOR AC0 = g_XMArcCoefficients0;
+ vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(3, 3, 3, 3) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ vConstants = XM_PERMUTE_PS( AC0,_MM_SHUFFLE(2, 2, 2, 2) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(1, 1, 1, 1) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(0, 0, 0, 0) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, root);
+
+ __m128 t1 = _mm_sub_ps(g_XMPi, t0);
+ t0 = _mm_and_ps(nonnegative, t0);
+ t1 = _mm_andnot_ps(nonnegative, t1);
+ t0 = _mm_or_ps(t0, t1);
+ t0 = _mm_sub_ps(g_XMHalfPi, t0);
+ return t0;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorACos
+(
+ FXMVECTOR V
+)
+{
+ // 7-degree minimax approximation
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = XMScalarACos( V.vector4_f32[0] );
+ Result.vector4_f32[1] = XMScalarACos( V.vector4_f32[1] );
+ Result.vector4_f32[2] = XMScalarACos( V.vector4_f32[2] );
+ Result.vector4_f32[3] = XMScalarACos( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 nonnegative = vcgeq_f32(V, g_XMZero);
+ __n128 x = vabsq_f32(V);
+
+ // Compute (1-|V|), clamp to zero to avoid sqrt of negative number.
+ __n128 oneMValue = vsubq_f32(g_XMOne, x);
+ __n128 clampOneMValue = vmaxq_f32(g_XMZero, oneMValue);
+ __n128 root = XMVectorSqrt(clampOneMValue);
+
+ // Compute polynomial approximation
+ const XMVECTOR AC1 = g_XMArcCoefficients1;
+ __n128 t0 = vdupq_lane_f32(vget_high_f32(AC1), 1);
+
+ XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AC1), 0);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(AC1), 1);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(AC1), 0);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ const XMVECTOR AC0 = g_XMArcCoefficients0;
+ vConstants = vdupq_lane_f32(vget_high_f32(AC0), 1);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ vConstants = vdupq_lane_f32(vget_high_f32(AC0), 0);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(AC0), 1);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(AC0), 0);
+ t0 = vmlaq_f32( vConstants, t0, x );
+ t0 = vmulq_f32(t0, root);
+
+ __n128 t1 = vsubq_f32(g_XMPi, t0);
+ t0 = vbslq_f32( nonnegative, t0, t1 );
+ return t0;
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 nonnegative = _mm_cmpge_ps(V, g_XMZero);
+ __m128 mvalue = _mm_sub_ps(g_XMZero, V);
+ __m128 x = _mm_max_ps(V, mvalue); // |V|
+
+ // Compute (1-|V|), clamp to zero to avoid sqrt of negative number.
+ __m128 oneMValue = _mm_sub_ps(g_XMOne, x);
+ __m128 clampOneMValue = _mm_max_ps(g_XMZero, oneMValue);
+ __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|V|)
+
+ // Compute polynomial approximation
+ const XMVECTOR AC1 = g_XMArcCoefficients1;
+ XMVECTOR vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(3, 3, 3, 3) );
+ __m128 t0 = _mm_mul_ps(vConstants, x);
+
+ vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(2, 2, 2, 2) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(1, 1, 1, 1) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ vConstants = XM_PERMUTE_PS( AC1, _MM_SHUFFLE(0, 0, 0, 0) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ const XMVECTOR AC0 = g_XMArcCoefficients0;
+ vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(3, 3, 3, 3) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(2, 2, 2, 2) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(1, 1, 1, 1) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ vConstants = XM_PERMUTE_PS( AC0, _MM_SHUFFLE(0, 0, 0, 0) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, root);
+
+ __m128 t1 = _mm_sub_ps(g_XMPi, t0);
+ t0 = _mm_and_ps(nonnegative, t0);
+ t1 = _mm_andnot_ps(nonnegative, t1);
+ t0 = _mm_or_ps(t0, t1);
+ return t0;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorATan
+(
+ FXMVECTOR V
+)
+{
+ // 17-degree minimax approximation
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = atanf( V.vector4_f32[0] );
+ Result.vector4_f32[1] = atanf( V.vector4_f32[1] );
+ Result.vector4_f32[2] = atanf( V.vector4_f32[2] );
+ Result.vector4_f32[3] = atanf( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 absV = vabsq_f32(V);
+ __n128 invV = XMVectorReciprocal(V);
+ __n128 comp = vcgtq_f32(V, g_XMOne);
+ __n128 sign = vbslq_f32(comp, g_XMOne, g_XMNegativeOne);
+ comp = vcleq_f32(absV, g_XMOne);
+ sign = vbslq_f32(comp, g_XMZero, sign);
+ __n128 x = vbslq_f32(comp, V, invV);
+
+ __n128 x2 = vmulq_f32(x, x);
+
+ // Compute polynomial approximation
+ const XMVECTOR TC1 = g_XMATanCoefficients1;
+ __n128 Result = vdupq_lane_f32(vget_high_f32(TC1), 1);
+
+ XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(TC1), 0);
+ Result = vmlaq_f32( vConstants, Result, x2 );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(TC1), 1);
+ Result = vmlaq_f32( vConstants, Result, x2 );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(TC1), 0);
+ Result = vmlaq_f32( vConstants, Result, x2 );
+
+ const XMVECTOR TC0 = g_XMATanCoefficients0;
+ vConstants = vdupq_lane_f32(vget_high_f32(TC0), 1);
+ Result = vmlaq_f32( vConstants, Result, x2 );
+
+ vConstants = vdupq_lane_f32(vget_high_f32(TC0), 0);
+ Result = vmlaq_f32( vConstants, Result, x2 );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(TC0), 1);
+ Result = vmlaq_f32( vConstants, Result, x2 );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(TC0), 0);
+ Result = vmlaq_f32( vConstants, Result, x2 );
+
+ Result = vmlaq_f32( g_XMOne, Result, x2 );
+ Result = vmulq_f32( Result, x );
+
+ __n128 result1 = vmulq_f32(sign, g_XMHalfPi);
+ result1 = vsubq_f32(result1, Result);
+
+ comp = vceqq_f32(sign, g_XMZero);
+ Result = vbslq_f32( comp, Result, result1 );
+ return Result;
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 absV = XMVectorAbs(V);
+ __m128 invV = _mm_div_ps(g_XMOne, V);
+ __m128 comp = _mm_cmpgt_ps(V, g_XMOne);
+ __m128 select0 = _mm_and_ps(comp, g_XMOne);
+ __m128 select1 = _mm_andnot_ps(comp, g_XMNegativeOne);
+ __m128 sign = _mm_or_ps(select0, select1);
+ comp = _mm_cmple_ps(absV, g_XMOne);
+ select0 = _mm_and_ps(comp, g_XMZero);
+ select1 = _mm_andnot_ps(comp, sign);
+ sign = _mm_or_ps(select0, select1);
+ select0 = _mm_and_ps(comp, V);
+ select1 = _mm_andnot_ps(comp, invV);
+ __m128 x = _mm_or_ps(select0, select1);
+
+ __m128 x2 = _mm_mul_ps(x, x);
+
+ // Compute polynomial approximation
+ const XMVECTOR TC1 = g_XMATanCoefficients1;
+ XMVECTOR vConstants = XM_PERMUTE_PS( TC1, _MM_SHUFFLE(3, 3, 3, 3) );
+ __m128 Result = _mm_mul_ps(vConstants, x2);
+
+ vConstants = XM_PERMUTE_PS( TC1, _MM_SHUFFLE(2, 2, 2, 2) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( TC1, _MM_SHUFFLE(1, 1, 1, 1) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( TC1, _MM_SHUFFLE(0, 0, 0, 0) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ const XMVECTOR TC0 = g_XMATanCoefficients0;
+ vConstants = XM_PERMUTE_PS( TC0, _MM_SHUFFLE(3, 3, 3, 3) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( TC0, _MM_SHUFFLE(2, 2, 2, 2) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( TC0, _MM_SHUFFLE(1, 1, 1, 1) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( TC0, _MM_SHUFFLE(0, 0, 0, 0) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+ Result = _mm_add_ps(Result, g_XMOne);
+ Result = _mm_mul_ps(Result, x);
+ __m128 result1 = _mm_mul_ps(sign, g_XMHalfPi);
+ result1 = _mm_sub_ps(result1, Result);
+
+ comp = _mm_cmpeq_ps(sign, g_XMZero);
+ select0 = _mm_and_ps(comp, Result);
+ select1 = _mm_andnot_ps(comp, result1);
+ Result = _mm_or_ps(select0, select1);
+ return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorATan2
+(
+ FXMVECTOR Y,
+ FXMVECTOR X
+)
+{
+ // Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions:
+
+ // Y == 0 and X is Negative -> Pi with the sign of Y
+ // y == 0 and x is positive -> 0 with the sign of y
+ // Y != 0 and X == 0 -> Pi / 2 with the sign of Y
+ // Y != 0 and X is Negative -> atan(y/x) + (PI with the sign of Y)
+ // X == -Infinity and Finite Y -> Pi with the sign of Y
+ // X == +Infinity and Finite Y -> 0 with the sign of Y
+ // Y == Infinity and X is Finite -> Pi / 2 with the sign of Y
+ // Y == Infinity and X == -Infinity -> 3Pi / 4 with the sign of Y
+ // Y == Infinity and X == +Infinity -> Pi / 4 with the sign of Y
+
+ static const XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
+
+ XMVECTOR Zero = XMVectorZero();
+ XMVECTOR ATanResultValid = XMVectorTrueInt();
+
+ XMVECTOR Pi = XMVectorSplatX(ATan2Constants);
+ XMVECTOR PiOverTwo = XMVectorSplatY(ATan2Constants);
+ XMVECTOR PiOverFour = XMVectorSplatZ(ATan2Constants);
+ XMVECTOR ThreePiOverFour = XMVectorSplatW(ATan2Constants);
+
+ XMVECTOR YEqualsZero = XMVectorEqual(Y, Zero);
+ XMVECTOR XEqualsZero = XMVectorEqual(X, Zero);
+ XMVECTOR XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v);
+ XIsPositive = XMVectorEqualInt(XIsPositive, Zero);
+ XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y);
+ XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X);
+
+ XMVECTOR YSign = XMVectorAndInt(Y, g_XMNegativeZero.v);
+ Pi = XMVectorOrInt(Pi, YSign);
+ PiOverTwo = XMVectorOrInt(PiOverTwo, YSign);
+ PiOverFour = XMVectorOrInt(PiOverFour, YSign);
+ ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign);
+
+ XMVECTOR R1 = XMVectorSelect(Pi, YSign, XIsPositive);
+ XMVECTOR R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero);
+ XMVECTOR R3 = XMVectorSelect(R2, R1, YEqualsZero);
+ XMVECTOR R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
+ XMVECTOR R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity);
+ XMVECTOR Result = XMVectorSelect(R3, R5, YEqualsInfinity);
+ ATanResultValid = XMVectorEqualInt(Result, ATanResultValid);
+
+ XMVECTOR V = XMVectorDivide(Y, X);
+
+ XMVECTOR R0 = XMVectorATan(V);
+
+ R1 = XMVectorSelect( Pi, Zero, XIsPositive );
+ R2 = XMVectorAdd(R0, R1);
+
+ return XMVectorSelect(Result, R2, ATanResultValid);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorSinEst
+(
+ FXMVECTOR V
+)
+{
+ // 7-degree minimax approximation
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = XMScalarSinEst( V.vector4_f32[0] );
+ Result.vector4_f32[1] = XMScalarSinEst( V.vector4_f32[1] );
+ Result.vector4_f32[2] = XMScalarSinEst( V.vector4_f32[2] );
+ Result.vector4_f32[3] = XMScalarSinEst( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Force the value within the bounds of pi
+ XMVECTOR x = XMVectorModAngles(V);
+
+ // Map in [-pi/2,pi/2] with sin(y) = sin(x).
+ __n128 sign = vandq_u32(x, g_XMNegativeZero);
+ __n128 c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0
+ __n128 absx = vabsq_f32( x );
+ __n128 rflx = vsubq_f32(c, x);
+ __n128 comp = vcleq_f32(absx, g_XMHalfPi);
+ x = vbslq_f32( comp, x, rflx );
+
+ __n128 x2 = vmulq_f32(x, x);
+
+ // Compute polynomial approximation
+ const XMVECTOR SEC = g_XMSinCoefficients1;
+ XMVECTOR Result = vdupq_lane_f32(vget_high_f32(SEC), 1);
+
+ XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(SEC), 0);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_low_f32(SEC), 1);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ Result = vmlaq_f32(g_XMOne, Result, x2);
+ Result = vmulq_f32(Result, x);
+ return Result;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Force the value within the bounds of pi
+ XMVECTOR x = XMVectorModAngles(V);
+
+ // Map in [-pi/2,pi/2] with sin(y) = sin(x).
+ __m128 sign = _mm_and_ps(x, g_XMNegativeZero);
+ __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0
+ __m128 absx = _mm_andnot_ps(sign, x); // |x|
+ __m128 rflx = _mm_sub_ps(c, x);
+ __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi);
+ __m128 select0 = _mm_and_ps(comp, x);
+ __m128 select1 = _mm_andnot_ps(comp, rflx);
+ x = _mm_or_ps(select0, select1);
+
+ __m128 x2 = _mm_mul_ps(x, x);
+
+ // Compute polynomial approximation
+ const XMVECTOR SEC = g_XMSinCoefficients1;
+ XMVECTOR vConstants = XM_PERMUTE_PS( SEC, _MM_SHUFFLE(3, 3, 3, 3) );
+ __m128 Result = _mm_mul_ps(vConstants, x2);
+
+ vConstants = XM_PERMUTE_PS( SEC, _MM_SHUFFLE(2, 2, 2, 2) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( SEC, _MM_SHUFFLE(1, 1, 1, 1) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ Result = _mm_add_ps(Result, g_XMOne);
+ Result = _mm_mul_ps(Result, x);
+ return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorCosEst
+(
+ FXMVECTOR V
+)
+{
+ // 6-degree minimax approximation
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = XMScalarCosEst( V.vector4_f32[0] );
+ Result.vector4_f32[1] = XMScalarCosEst( V.vector4_f32[1] );
+ Result.vector4_f32[2] = XMScalarCosEst( V.vector4_f32[2] );
+ Result.vector4_f32[3] = XMScalarCosEst( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Map V to x in [-pi,pi].
+ XMVECTOR x = XMVectorModAngles(V);
+
+ // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x).
+ __n128 sign = vandq_u32(x, g_XMNegativeZero);
+ __n128 c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0
+ __n128 absx = vabsq_f32( x );
+ __n128 rflx = vsubq_f32(c, x);
+ __n128 comp = vcleq_f32(absx, g_XMHalfPi);
+ x = vbslq_f32( comp, x, rflx );
+ sign = vbslq_f32( comp, g_XMOne, g_XMNegativeOne );
+
+ __n128 x2 = vmulq_f32(x, x);
+
+ // Compute polynomial approximation
+ const XMVECTOR CEC = g_XMCosCoefficients1;
+ XMVECTOR Result = vdupq_lane_f32(vget_high_f32(CEC), 1);
+
+ XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(CEC), 0);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_low_f32(CEC), 1);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ Result = vmlaq_f32(g_XMOne, Result, x2);
+ Result = vmulq_f32(Result, sign);
+ return Result;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Map V to x in [-pi,pi].
+ XMVECTOR x = XMVectorModAngles(V);
+
+ // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x).
+ XMVECTOR sign = _mm_and_ps(x, g_XMNegativeZero);
+ __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0
+ __m128 absx = _mm_andnot_ps(sign, x); // |x|
+ __m128 rflx = _mm_sub_ps(c, x);
+ __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi);
+ __m128 select0 = _mm_and_ps(comp, x);
+ __m128 select1 = _mm_andnot_ps(comp, rflx);
+ x = _mm_or_ps(select0, select1);
+ select0 = _mm_and_ps(comp, g_XMOne);
+ select1 = _mm_andnot_ps(comp, g_XMNegativeOne);
+ sign = _mm_or_ps(select0, select1);
+
+ __m128 x2 = _mm_mul_ps(x, x);
+
+ // Compute polynomial approximation
+ const XMVECTOR CEC = g_XMCosCoefficients1;
+ XMVECTOR vConstants = XM_PERMUTE_PS( CEC, _MM_SHUFFLE(3, 3, 3, 3) );
+ __m128 Result = _mm_mul_ps(vConstants, x2);
+
+ vConstants = XM_PERMUTE_PS( CEC, _MM_SHUFFLE(2, 2, 2, 2) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( CEC, _MM_SHUFFLE(1, 1, 1, 1) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ Result = _mm_add_ps(Result, g_XMOne);
+ Result = _mm_mul_ps(Result, sign);
+ return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline void XMVectorSinCosEst
+(
+ XMVECTOR* pSin,
+ XMVECTOR* pCos,
+ FXMVECTOR V
+)
+{
+ assert(pSin != NULL);
+ assert(pCos != NULL);
+
+ // 7/6-degree minimax approximation
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Sin;
+ XMVECTOR Cos;
+
+ XMScalarSinCosEst(&Sin.vector4_f32[0], &Cos.vector4_f32[0], V.vector4_f32[0]);
+ XMScalarSinCosEst(&Sin.vector4_f32[1], &Cos.vector4_f32[1], V.vector4_f32[1]);
+ XMScalarSinCosEst(&Sin.vector4_f32[2], &Cos.vector4_f32[2], V.vector4_f32[2]);
+ XMScalarSinCosEst(&Sin.vector4_f32[3], &Cos.vector4_f32[3], V.vector4_f32[3]);
+
+ *pSin = Sin;
+ *pCos = Cos;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Force the value within the bounds of pi
+ XMVECTOR x = XMVectorModAngles(V);
+
+ // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x).
+ __n128 sign = vandq_u32(x, g_XMNegativeZero);
+ __n128 c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0
+ __n128 absx = vabsq_f32( x );
+ __n128 rflx = vsubq_f32(c, x);
+ __n128 comp = vcleq_f32(absx, g_XMHalfPi);
+ x = vbslq_f32( comp, x, rflx );
+ sign = vbslq_f32( comp, g_XMOne, g_XMNegativeOne );
+
+ __n128 x2 = vmulq_f32(x, x);
+
+ // Compute polynomial approximation for sine
+ const XMVECTOR SEC = g_XMSinCoefficients1;
+ XMVECTOR Result = vdupq_lane_f32(vget_high_f32(SEC), 1);
+
+ XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(SEC), 0);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_low_f32(SEC), 1);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ Result = vmlaq_f32(g_XMOne, Result, x2);
+ *pSin = vmulq_f32(Result, x);
+
+ // Compute polynomial approximation
+ const XMVECTOR CEC = g_XMCosCoefficients1;
+ Result = vdupq_lane_f32(vget_high_f32(CEC), 1);
+
+ vConstants = vdupq_lane_f32(vget_high_f32(CEC), 0);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ vConstants = vdupq_lane_f32(vget_low_f32(CEC), 1);
+ Result = vmlaq_f32(vConstants, Result, x2);
+
+ Result = vmlaq_f32(g_XMOne, Result, x2);
+ *pCos = vmulq_f32(Result, sign);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Force the value within the bounds of pi
+ XMVECTOR x = XMVectorModAngles(V);
+
+ // Map in [-pi/2,pi/2] with sin(y) = sin(x), cos(y) = sign*cos(x).
+ XMVECTOR sign = _mm_and_ps(x, g_XMNegativeZero);
+ __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0
+ __m128 absx = _mm_andnot_ps(sign, x); // |x|
+ __m128 rflx = _mm_sub_ps(c, x);
+ __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi);
+ __m128 select0 = _mm_and_ps(comp, x);
+ __m128 select1 = _mm_andnot_ps(comp, rflx);
+ x = _mm_or_ps(select0, select1);
+ select0 = _mm_and_ps(comp, g_XMOne);
+ select1 = _mm_andnot_ps(comp, g_XMNegativeOne);
+ sign = _mm_or_ps(select0, select1);
+
+ __m128 x2 = _mm_mul_ps(x, x);
+
+ // Compute polynomial approximation for sine
+ const XMVECTOR SEC = g_XMSinCoefficients1;
+ XMVECTOR vConstants = XM_PERMUTE_PS( SEC, _MM_SHUFFLE(3, 3, 3, 3) );
+ __m128 Result = _mm_mul_ps(vConstants, x2);
+
+ vConstants = XM_PERMUTE_PS( SEC, _MM_SHUFFLE(2, 2, 2, 2) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( SEC, _MM_SHUFFLE(1, 1, 1, 1) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ Result = _mm_add_ps(Result, g_XMOne);
+ Result = _mm_mul_ps(Result, x);
+ *pSin = Result;
+
+ // Compute polynomial approximation for cosine
+ const XMVECTOR CEC = g_XMCosCoefficients1;
+ vConstants = XM_PERMUTE_PS( CEC, _MM_SHUFFLE(3, 3, 3, 3) );
+ Result = _mm_mul_ps(vConstants, x2);
+
+ vConstants = XM_PERMUTE_PS( CEC, _MM_SHUFFLE(2, 2, 2, 2) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( CEC, _MM_SHUFFLE(1, 1, 1, 1) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ Result = _mm_add_ps(Result, g_XMOne);
+ Result = _mm_mul_ps(Result, sign);
+ *pCos = Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorTanEst
+(
+ FXMVECTOR V
+)
+{
+ XMVECTOR OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients.v);
+
+ XMVECTOR V1 = XMVectorMultiply(V, OneOverPi);
+ V1 = XMVectorRound(V1);
+
+ V1 = XMVectorNegativeMultiplySubtract(g_XMPi.v, V1, V);
+
+ XMVECTOR T0 = XMVectorSplatX(g_XMTanEstCoefficients.v);
+ XMVECTOR T1 = XMVectorSplatY(g_XMTanEstCoefficients.v);
+ XMVECTOR T2 = XMVectorSplatZ(g_XMTanEstCoefficients.v);
+
+ XMVECTOR V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2);
+ XMVECTOR V2 = XMVectorMultiply(V1, V1);
+ XMVECTOR V1T0 = XMVectorMultiply(V1, T0);
+ XMVECTOR V1T1 = XMVectorMultiply(V1, T1);
+
+ XMVECTOR D = XMVectorReciprocalEst(V2T2);
+ XMVECTOR N = XMVectorMultiplyAdd(V2, V1T1, V1T0);
+
+ return XMVectorMultiply(N, D);
+}
+
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorASinEst
+(
+ FXMVECTOR V
+)
+{
+ // 3-degree minimax approximation
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = XMScalarASinEst( V.vector4_f32[0] );
+ Result.vector4_f32[1] = XMScalarASinEst( V.vector4_f32[1] );
+ Result.vector4_f32[2] = XMScalarASinEst( V.vector4_f32[2] );
+ Result.vector4_f32[3] = XMScalarASinEst( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 nonnegative = vcgeq_f32(V, g_XMZero);
+ __n128 x = vabsq_f32(V);
+
+ // Compute (1-|V|), clamp to zero to avoid sqrt of negative number.
+ __n128 oneMValue = vsubq_f32(g_XMOne, x);
+ __n128 clampOneMValue = vmaxq_f32(g_XMZero, oneMValue);
+ __n128 root = XMVectorSqrt(clampOneMValue);
+
+ // Compute polynomial approximation
+ const XMVECTOR AEC = g_XMArcEstCoefficients;
+ __n128 t0 = vdupq_lane_f32(vget_high_f32(AEC), 1);
+
+ XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AEC), 0);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(AEC), 1);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(AEC), 0);
+ t0 = vmlaq_f32( vConstants, t0, x );
+ t0 = vmulq_f32(t0, root);
+
+ __n128 t1 = vsubq_f32(g_XMPi, t0);
+ t0 = vbslq_f32( nonnegative, t0, t1 );
+ t0 = vsubq_f32(g_XMHalfPi, t0);
+ return t0;
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 nonnegative = _mm_cmpge_ps(V, g_XMZero);
+ __m128 mvalue = _mm_sub_ps(g_XMZero, V);
+ __m128 x = _mm_max_ps(V, mvalue); // |V|
+
+ // Compute (1-|V|), clamp to zero to avoid sqrt of negative number.
+ __m128 oneMValue = _mm_sub_ps(g_XMOne, x);
+ __m128 clampOneMValue = _mm_max_ps(g_XMZero, oneMValue);
+ __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|V|)
+
+ // Compute polynomial approximation
+ const XMVECTOR AEC = g_XMArcEstCoefficients;
+ XMVECTOR vConstants = XM_PERMUTE_PS( AEC, _MM_SHUFFLE(3, 3, 3, 3) );
+ __m128 t0 = _mm_mul_ps(vConstants, x);
+
+ vConstants = XM_PERMUTE_PS( AEC, _MM_SHUFFLE(2, 2, 2, 2) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ vConstants = XM_PERMUTE_PS( AEC, _MM_SHUFFLE(1, 1, 1, 1) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ vConstants = XM_PERMUTE_PS( AEC, _MM_SHUFFLE(0, 0, 0, 0) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, root);
+
+ __m128 t1 = _mm_sub_ps(g_XMPi, t0);
+ t0 = _mm_and_ps(nonnegative, t0);
+ t1 = _mm_andnot_ps(nonnegative, t1);
+ t0 = _mm_or_ps(t0, t1);
+ t0 = _mm_sub_ps(g_XMHalfPi, t0);
+ return t0;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorACosEst
+(
+ FXMVECTOR V
+)
+{
+ // 3-degree minimax approximation
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = XMScalarACosEst( V.vector4_f32[0] );
+ Result.vector4_f32[1] = XMScalarACosEst( V.vector4_f32[1] );
+ Result.vector4_f32[2] = XMScalarACosEst( V.vector4_f32[2] );
+ Result.vector4_f32[3] = XMScalarACosEst( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 nonnegative = vcgeq_f32(V, g_XMZero);
+ __n128 x = vabsq_f32(V);
+
+ // Compute (1-|V|), clamp to zero to avoid sqrt of negative number.
+ __n128 oneMValue = vsubq_f32(g_XMOne, x);
+ __n128 clampOneMValue = vmaxq_f32(g_XMZero, oneMValue);
+ __n128 root = XMVectorSqrt(clampOneMValue);
+
+ // Compute polynomial approximation
+ const XMVECTOR AEC = g_XMArcEstCoefficients;
+ __n128 t0 = vdupq_lane_f32(vget_high_f32(AEC), 1);
+
+ XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AEC), 0);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(AEC), 1);
+ t0 = vmlaq_f32( vConstants, t0, x );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(AEC), 0);
+ t0 = vmlaq_f32( vConstants, t0, x );
+ t0 = vmulq_f32(t0, root);
+
+ __n128 t1 = vsubq_f32(g_XMPi, t0);
+ t0 = vbslq_f32( nonnegative, t0, t1 );
+ return t0;
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 nonnegative = _mm_cmpge_ps(V, g_XMZero);
+ __m128 mvalue = _mm_sub_ps(g_XMZero, V);
+ __m128 x = _mm_max_ps(V, mvalue); // |V|
+
+ // Compute (1-|V|), clamp to zero to avoid sqrt of negative number.
+ __m128 oneMValue = _mm_sub_ps(g_XMOne, x);
+ __m128 clampOneMValue = _mm_max_ps(g_XMZero, oneMValue);
+ __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|V|)
+
+ // Compute polynomial approximation
+ const XMVECTOR AEC = g_XMArcEstCoefficients;
+ XMVECTOR vConstants = XM_PERMUTE_PS( AEC, _MM_SHUFFLE(3, 3, 3, 3) );
+ __m128 t0 = _mm_mul_ps(vConstants, x);
+
+ vConstants = XM_PERMUTE_PS( AEC, _MM_SHUFFLE(2, 2, 2, 2) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ vConstants = XM_PERMUTE_PS( AEC, _MM_SHUFFLE(1, 1, 1, 1) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, x);
+
+ vConstants = XM_PERMUTE_PS( AEC, _MM_SHUFFLE(0, 0, 0, 0) );
+ t0 = _mm_add_ps(t0, vConstants);
+ t0 = _mm_mul_ps(t0, root);
+
+ __m128 t1 = _mm_sub_ps(g_XMPi, t0);
+ t0 = _mm_and_ps(nonnegative, t0);
+ t1 = _mm_andnot_ps(nonnegative, t1);
+ t0 = _mm_or_ps(t0, t1);
+ return t0;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+namespace Internal
+{
+
+inline float XMScalarATanEst
+(
+ float Value
+)
+{
+ float y, sign;
+ if (fabsf(Value) <= 1.0f)
+ {
+ y = Value;
+ sign = 0.0f;
+ }
+ else if (Value > 1.0f)
+ {
+ y = 1.0f / Value;
+ sign = 1.0f;
+ }
+ else
+ {
+ y = 1.0f / Value;
+ sign = -1.0f;
+ }
+
+ // 9-degree minimax approximation
+ float y2 = y*y;
+ float poly = ((((0.0208351f*y2-0.085133f)*y2+0.180141f)*y2-0.3302995f)*y2+0.999866f)*y;
+
+ return (sign == 0.0f ? poly : sign*XM_PIDIV2 - poly);
+}
+
+}; // namespace Internal
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorATanEst
+(
+ FXMVECTOR V
+)
+{
+ // 9-degree minimax approximation
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+ Result.vector4_f32[0] = Internal::XMScalarATanEst( V.vector4_f32[0] );
+ Result.vector4_f32[1] = Internal::XMScalarATanEst( V.vector4_f32[1] );
+ Result.vector4_f32[2] = Internal::XMScalarATanEst( V.vector4_f32[2] );
+ Result.vector4_f32[3] = Internal::XMScalarATanEst( V.vector4_f32[3] );
+ return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 absV = vabsq_f32(V);
+ __n128 invV = XMVectorReciprocalEst(V);
+ __n128 comp = vcgtq_f32(V, g_XMOne);
+ __n128 sign = vbslq_f32(comp, g_XMOne, g_XMNegativeOne );
+ comp = vcleq_f32(absV, g_XMOne);
+ sign = vbslq_f32(comp, g_XMZero, sign );
+ __n128 x = vbslq_f32(comp, V, invV );
+
+ __n128 x2 = vmulq_f32(x, x);
+
+ // Compute polynomial approximation
+ const XMVECTOR AEC = g_XMATanEstCoefficients1;
+ __n128 Result = vdupq_lane_f32(vget_high_f32(AEC), 1);
+
+ XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AEC), 0);
+ Result = vmlaq_f32( vConstants, Result, x2 );
+
+ vConstants = vdupq_lane_f32(vget_low_f32(AEC), 1);
+ Result = vmlaq_f32( vConstants, Result, x2 );
+
+ vConstants = vdupq_lane_f32(vget_low_f32( AEC), 0);
+ Result = vmlaq_f32( vConstants, Result, x2 );
+
+ // ATanEstCoefficients0 is already splatted
+ Result = vmlaq_f32( g_XMATanEstCoefficients0, Result, x2 );
+ Result = vmulq_f32( Result, x );
+
+ float32x4_t result1 = vmulq_f32(sign, g_XMHalfPi);
+ result1 = vsubq_f32(result1, Result);
+
+ comp = vceqq_f32(sign, g_XMZero);
+ Result = vbslq_f32( comp, Result, result1 );
+ return Result;
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128 absV = XMVectorAbs(V);
+ __m128 invV = _mm_div_ps(g_XMOne, V);
+ __m128 comp = _mm_cmpgt_ps(V, g_XMOne);
+ __m128 select0 = _mm_and_ps(comp, g_XMOne);
+ __m128 select1 = _mm_andnot_ps(comp, g_XMNegativeOne);
+ __m128 sign = _mm_or_ps(select0, select1);
+ comp = _mm_cmple_ps(absV, g_XMOne);
+ select0 = _mm_and_ps(comp, g_XMZero);
+ select1 = _mm_andnot_ps(comp, sign);
+ sign = _mm_or_ps(select0, select1);
+ select0 = _mm_and_ps(comp, V);
+ select1 = _mm_andnot_ps(comp, invV);
+ __m128 x = _mm_or_ps(select0, select1);
+
+ __m128 x2 = _mm_mul_ps(x, x);
+
+ // Compute polynomial approximation
+ const XMVECTOR AEC = g_XMATanEstCoefficients1;
+ XMVECTOR vConstants = XM_PERMUTE_PS( AEC, _MM_SHUFFLE(3, 3, 3, 3) );
+ __m128 Result = _mm_mul_ps(vConstants, x2);
+
+ vConstants = XM_PERMUTE_PS( AEC, _MM_SHUFFLE(2, 2, 2, 2) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( AEC, _MM_SHUFFLE(1, 1, 1, 1) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ vConstants = XM_PERMUTE_PS( AEC, _MM_SHUFFLE(0, 0, 0, 0) );
+ Result = _mm_add_ps(Result, vConstants);
+ Result = _mm_mul_ps(Result, x2);
+
+ // ATanEstCoefficients0 is already splatted
+ Result = _mm_add_ps(Result, g_XMATanEstCoefficients0);
+ Result = _mm_mul_ps(Result, x);
+ __m128 result1 = _mm_mul_ps(sign, g_XMHalfPi);
+ result1 = _mm_sub_ps(result1, Result);
+
+ comp = _mm_cmpeq_ps(sign, g_XMZero);
+ select0 = _mm_and_ps(comp, Result);
+ select1 = _mm_andnot_ps(comp, result1);
+ Result = _mm_or_ps(select0, select1);
+ return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorATan2Est
+(
+ FXMVECTOR Y,
+ FXMVECTOR X
+)
+{
+ static const XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, 2.3561944905f /* Pi*3/4 */};
+
+ const XMVECTOR Zero = XMVectorZero();
+ XMVECTOR ATanResultValid = XMVectorTrueInt();
+
+ XMVECTOR Pi = XMVectorSplatX(ATan2Constants);
+ XMVECTOR PiOverTwo = XMVectorSplatY(ATan2Constants);
+ XMVECTOR PiOverFour = XMVectorSplatZ(ATan2Constants);
+ XMVECTOR ThreePiOverFour = XMVectorSplatW(ATan2Constants);
+
+ XMVECTOR YEqualsZero = XMVectorEqual(Y, Zero);
+ XMVECTOR XEqualsZero = XMVectorEqual(X, Zero);
+ XMVECTOR XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v);
+ XIsPositive = XMVectorEqualInt(XIsPositive, Zero);
+ XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y);
+ XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X);
+
+ XMVECTOR YSign = XMVectorAndInt(Y, g_XMNegativeZero.v);
+ Pi = XMVectorOrInt(Pi, YSign);
+ PiOverTwo = XMVectorOrInt(PiOverTwo, YSign);
+ PiOverFour = XMVectorOrInt(PiOverFour, YSign);
+ ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign);
+
+ XMVECTOR R1 = XMVectorSelect(Pi, YSign, XIsPositive);
+ XMVECTOR R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero);
+ XMVECTOR R3 = XMVectorSelect(R2, R1, YEqualsZero);
+ XMVECTOR R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
+ XMVECTOR R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity);
+ XMVECTOR Result = XMVectorSelect(R3, R5, YEqualsInfinity);
+ ATanResultValid = XMVectorEqualInt(Result, ATanResultValid);
+
+ XMVECTOR Reciprocal = XMVectorReciprocalEst(X);
+ XMVECTOR V = XMVectorMultiply(Y, Reciprocal);
+ XMVECTOR R0 = XMVectorATanEst(V);
+
+ R1 = XMVectorSelect( Pi, Zero, XIsPositive );
+ R2 = XMVectorAdd(R0, R1);
+
+ Result = XMVectorSelect(Result, R2, ATanResultValid);
+
+ return Result;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorLerp
+(
+ FXMVECTOR V0,
+ FXMVECTOR V1,
+ float t
+)
+{
+ // V0 + t * (V1 - V0)
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Scale = XMVectorReplicate(t);
+ XMVECTOR Length = XMVectorSubtract(V1, V0);
+ return XMVectorMultiplyAdd(Length, Scale, V0);
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR L = vsubq_f32( V1, V0 );
+ return vmlaq_n_f32( V0, L, t );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR L = _mm_sub_ps( V1, V0 );
+ XMVECTOR S = _mm_set_ps1( t );
+ XMVECTOR Result = _mm_mul_ps( L, S );
+ return _mm_add_ps( Result, V0 );
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorLerpV
+(
+ FXMVECTOR V0,
+ FXMVECTOR V1,
+ FXMVECTOR T
+)
+{
+ // V0 + T * (V1 - V0)
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Length = XMVectorSubtract(V1, V0);
+ return XMVectorMultiplyAdd(Length, T, V0);
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR L = vsubq_f32( V1, V0 );
+ return vmlaq_f32( V0, L, T );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR Length = _mm_sub_ps( V1, V0 );
+ XMVECTOR Result = _mm_mul_ps( Length, T );
+ return _mm_add_ps( Result, V0 );
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorHermite
+(
+ FXMVECTOR Position0,
+ FXMVECTOR Tangent0,
+ FXMVECTOR Position1,
+ GXMVECTOR Tangent1,
+ float t
+)
+{
+ // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
+ // (t^3 - 2 * t^2 + t) * Tangent0 +
+ // (-2 * t^3 + 3 * t^2) * Position1 +
+ // (t^3 - t^2) * Tangent1
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float t2 = t * t;
+ float t3 = t * t2;
+
+ XMVECTOR P0 = XMVectorReplicate(2.0f * t3 - 3.0f * t2 + 1.0f);
+ XMVECTOR T0 = XMVectorReplicate(t3 - 2.0f * t2 + t);
+ XMVECTOR P1 = XMVectorReplicate(-2.0f * t3 + 3.0f * t2);
+ XMVECTOR T1 = XMVectorReplicate(t3 - t2);
+
+ XMVECTOR Result = XMVectorMultiply(P0, Position0);
+ Result = XMVectorMultiplyAdd(T0, Tangent0, Result);
+ Result = XMVectorMultiplyAdd(P1, Position1, Result);
+ Result = XMVectorMultiplyAdd(T1, Tangent1, Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float t2 = t * t;
+ float t3 = t * t2;
+
+ XMVECTOR P0 = vdupq_n_f32(2.0f * t3 - 3.0f * t2 + 1.0f);
+ XMVECTOR T0 = vdupq_n_f32(t3 - 2.0f * t2 + t);
+ XMVECTOR P1 = vdupq_n_f32(-2.0f * t3 + 3.0f * t2);
+ XMVECTOR T1 = vdupq_n_f32(t3 - t2);
+
+ XMVECTOR vResult = vmulq_f32(P0, Position0);
+ vResult = vmlaq_f32( vResult, T0, Tangent0 );
+ vResult = vmlaq_f32( vResult, P1, Position1 );
+ vResult = vmlaq_f32( vResult, T1, Tangent1 );
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ float t2 = t * t;
+ float t3 = t * t2;
+
+ XMVECTOR P0 = _mm_set_ps1(2.0f * t3 - 3.0f * t2 + 1.0f);
+ XMVECTOR T0 = _mm_set_ps1(t3 - 2.0f * t2 + t);
+ XMVECTOR P1 = _mm_set_ps1(-2.0f * t3 + 3.0f * t2);
+ XMVECTOR T1 = _mm_set_ps1(t3 - t2);
+
+ XMVECTOR vResult = _mm_mul_ps(P0, Position0);
+ XMVECTOR vTemp = _mm_mul_ps(T0, Tangent0);
+ vResult = _mm_add_ps(vResult,vTemp);
+ vTemp = _mm_mul_ps(P1, Position1);
+ vResult = _mm_add_ps(vResult,vTemp);
+ vTemp = _mm_mul_ps(T1, Tangent1);
+ vResult = _mm_add_ps(vResult,vTemp);
+ return vResult;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorHermiteV
+(
+ FXMVECTOR Position0,
+ FXMVECTOR Tangent0,
+ FXMVECTOR Position1,
+ GXMVECTOR Tangent1,
+ CXMVECTOR T
+)
+{
+ // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
+ // (t^3 - 2 * t^2 + t) * Tangent0 +
+ // (-2 * t^3 + 3 * t^2) * Position1 +
+ // (t^3 - t^2) * Tangent1
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR T2 = XMVectorMultiply(T, T);
+ XMVECTOR T3 = XMVectorMultiply(T , T2);
+
+ XMVECTOR P0 = XMVectorReplicate(2.0f * T3.vector4_f32[0] - 3.0f * T2.vector4_f32[0] + 1.0f);
+ XMVECTOR T0 = XMVectorReplicate(T3.vector4_f32[1] - 2.0f * T2.vector4_f32[1] + T.vector4_f32[1]);
+ XMVECTOR P1 = XMVectorReplicate(-2.0f * T3.vector4_f32[2] + 3.0f * T2.vector4_f32[2]);
+ XMVECTOR T1 = XMVectorReplicate(T3.vector4_f32[3] - T2.vector4_f32[3]);
+
+ XMVECTOR Result = XMVectorMultiply(P0, Position0);
+ Result = XMVectorMultiplyAdd(T0, Tangent0, Result);
+ Result = XMVectorMultiplyAdd(P1, Position1, Result);
+ Result = XMVectorMultiplyAdd(T1, Tangent1, Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const XMVECTORF32 CatMulT2 = {-3.0f,-2.0f,3.0f,-1.0f};
+ static const XMVECTORF32 CatMulT3 = {2.0f,1.0f,-2.0f,1.0f};
+
+ XMVECTOR T2 = vmulq_f32(T,T);
+ XMVECTOR T3 = vmulq_f32(T,T2);
+ // Mul by the constants against t^2
+ T2 = vmulq_f32(T2,CatMulT2);
+ // Mul by the constants against t^3
+ T3 = vmlaq_f32(T2, T3, CatMulT3 );
+ // T3 now has the pre-result.
+ // I need to add t.y only
+ T2 = vandq_u32(T,g_XMMaskY);
+ T3 = vaddq_f32(T3,T2);
+ // Add 1.0f to x
+ T3 = vaddq_f32(T3,g_XMIdentityR0);
+ // Now, I have the constants created
+ // Mul the x constant to Position0
+ XMVECTOR vResult = vdupq_lane_f32( vget_low_f32( T3 ), 0 ); // T3[0]
+ vResult = vmulq_f32(vResult,Position0);
+ // Mul the y constant to Tangent0
+ T2 = vdupq_lane_f32( vget_low_f32( T3 ), 1 ); // T3[1]
+ vResult = vmlaq_f32(vResult, T2, Tangent0 );
+ // Mul the z constant to Position1
+ T2 = vdupq_lane_f32( vget_high_f32( T3 ), 0 ); // T3[2]
+ vResult = vmlaq_f32(vResult, T2, Position1 );
+ // Mul the w constant to Tangent1
+ T3 = vdupq_lane_f32( vget_high_f32( T3 ), 1 ); // T3[3]
+ vResult = vmlaq_f32(vResult, T3, Tangent1 );
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 CatMulT2 = {-3.0f,-2.0f,3.0f,-1.0f};
+ static const XMVECTORF32 CatMulT3 = {2.0f,1.0f,-2.0f,1.0f};
+
+ XMVECTOR T2 = _mm_mul_ps(T,T);
+ XMVECTOR T3 = _mm_mul_ps(T,T2);
+ // Mul by the constants against t^2
+ T2 = _mm_mul_ps(T2,CatMulT2);
+ // Mul by the constants against t^3
+ T3 = _mm_mul_ps(T3,CatMulT3);
+ // T3 now has the pre-result.
+ T3 = _mm_add_ps(T3,T2);
+ // I need to add t.y only
+ T2 = _mm_and_ps(T,g_XMMaskY);
+ T3 = _mm_add_ps(T3,T2);
+ // Add 1.0f to x
+ T3 = _mm_add_ps(T3,g_XMIdentityR0);
+ // Now, I have the constants created
+ // Mul the x constant to Position0
+ XMVECTOR vResult = XM_PERMUTE_PS(T3,_MM_SHUFFLE(0,0,0,0));
+ vResult = _mm_mul_ps(vResult,Position0);
+ // Mul the y constant to Tangent0
+ T2 = XM_PERMUTE_PS(T3,_MM_SHUFFLE(1,1,1,1));
+ T2 = _mm_mul_ps(T2,Tangent0);
+ vResult = _mm_add_ps(vResult,T2);
+ // Mul the z constant to Position1
+ T2 = XM_PERMUTE_PS(T3,_MM_SHUFFLE(2,2,2,2));
+ T2 = _mm_mul_ps(T2,Position1);
+ vResult = _mm_add_ps(vResult,T2);
+ // Mul the w constant to Tangent1
+ T3 = XM_PERMUTE_PS(T3,_MM_SHUFFLE(3,3,3,3));
+ T3 = _mm_mul_ps(T3,Tangent1);
+ vResult = _mm_add_ps(vResult,T3);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorCatmullRom
+(
+ FXMVECTOR Position0,
+ FXMVECTOR Position1,
+ FXMVECTOR Position2,
+ GXMVECTOR Position3,
+ float t
+)
+{
+ // Result = ((-t^3 + 2 * t^2 - t) * Position0 +
+ // (3 * t^3 - 5 * t^2 + 2) * Position1 +
+ // (-3 * t^3 + 4 * t^2 + t) * Position2 +
+ // (t^3 - t^2) * Position3) * 0.5
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float t2 = t * t;
+ float t3 = t * t2;
+
+ XMVECTOR P0 = XMVectorReplicate((-t3 + 2.0f * t2 - t) * 0.5f);
+ XMVECTOR P1 = XMVectorReplicate((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
+ XMVECTOR P2 = XMVectorReplicate((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
+ XMVECTOR P3 = XMVectorReplicate((t3 - t2) * 0.5f);
+
+ XMVECTOR Result = XMVectorMultiply(P0, Position0);
+ Result = XMVectorMultiplyAdd(P1, Position1, Result);
+ Result = XMVectorMultiplyAdd(P2, Position2, Result);
+ Result = XMVectorMultiplyAdd(P3, Position3, Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float t2 = t * t;
+ float t3 = t * t2;
+
+ XMVECTOR P0 = vdupq_n_f32((-t3 + 2.0f * t2 - t) * 0.5f);
+ XMVECTOR P1 = vdupq_n_f32((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
+ XMVECTOR P2 = vdupq_n_f32((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
+ XMVECTOR P3 = vdupq_n_f32((t3 - t2) * 0.5f);
+
+ P1 = vmulq_f32(P1, Position1);
+ P0 = vmlaq_f32(P1, P0, Position0);
+ P3 = vmulq_f32(P3, Position3);
+ P2 = vmlaq_f32(P3, P2, Position2);
+ P0 = vaddq_f32(P0,P2);
+ return P0;
+#elif defined(_XM_SSE_INTRINSICS_)
+ float t2 = t * t;
+ float t3 = t * t2;
+
+ XMVECTOR P0 = _mm_set_ps1((-t3 + 2.0f * t2 - t) * 0.5f);
+ XMVECTOR P1 = _mm_set_ps1((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
+ XMVECTOR P2 = _mm_set_ps1((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
+ XMVECTOR P3 = _mm_set_ps1((t3 - t2) * 0.5f);
+
+ P0 = _mm_mul_ps(P0, Position0);
+ P1 = _mm_mul_ps(P1, Position1);
+ P2 = _mm_mul_ps(P2, Position2);
+ P3 = _mm_mul_ps(P3, Position3);
+ P0 = _mm_add_ps(P0,P1);
+ P2 = _mm_add_ps(P2,P3);
+ P0 = _mm_add_ps(P0,P2);
+ return P0;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorCatmullRomV
+(
+ FXMVECTOR Position0,
+ FXMVECTOR Position1,
+ FXMVECTOR Position2,
+ GXMVECTOR Position3,
+ CXMVECTOR T
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ float fx = T.vector4_f32[0];
+ float fy = T.vector4_f32[1];
+ float fz = T.vector4_f32[2];
+ float fw = T.vector4_f32[3];
+ XMVECTOR vResult = {
+ 0.5f*((-fx*fx*fx+2*fx*fx-fx)*Position0.vector4_f32[0]+
+ (3*fx*fx*fx-5*fx*fx+2)*Position1.vector4_f32[0]+
+ (-3*fx*fx*fx+4*fx*fx+fx)*Position2.vector4_f32[0]+
+ (fx*fx*fx-fx*fx)*Position3.vector4_f32[0]),
+ 0.5f*((-fy*fy*fy+2*fy*fy-fy)*Position0.vector4_f32[1]+
+ (3*fy*fy*fy-5*fy*fy+2)*Position1.vector4_f32[1]+
+ (-3*fy*fy*fy+4*fy*fy+fy)*Position2.vector4_f32[1]+
+ (fy*fy*fy-fy*fy)*Position3.vector4_f32[1]),
+ 0.5f*((-fz*fz*fz+2*fz*fz-fz)*Position0.vector4_f32[2]+
+ (3*fz*fz*fz-5*fz*fz+2)*Position1.vector4_f32[2]+
+ (-3*fz*fz*fz+4*fz*fz+fz)*Position2.vector4_f32[2]+
+ (fz*fz*fz-fz*fz)*Position3.vector4_f32[2]),
+ 0.5f*((-fw*fw*fw+2*fw*fw-fw)*Position0.vector4_f32[3]+
+ (3*fw*fw*fw-5*fw*fw+2)*Position1.vector4_f32[3]+
+ (-3*fw*fw*fw+4*fw*fw+fw)*Position2.vector4_f32[3]+
+ (fw*fw*fw-fw*fw)*Position3.vector4_f32[3])
+ };
+ return vResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const XMVECTORF32 Catmul2 = {2.0f,2.0f,2.0f,2.0f};
+ static const XMVECTORF32 Catmul3 = {3.0f,3.0f,3.0f,3.0f};
+ static const XMVECTORF32 Catmul4 = {4.0f,4.0f,4.0f,4.0f};
+ static const XMVECTORF32 Catmul5 = {5.0f,5.0f,5.0f,5.0f};
+ // Cache T^2 and T^3
+ XMVECTOR T2 = vmulq_f32(T,T);
+ XMVECTOR T3 = vmulq_f32(T,T2);
+ // Perform the Position0 term
+ XMVECTOR vResult = vaddq_f32(T2,T2);
+ vResult = vsubq_f32(vResult,T);
+ vResult = vsubq_f32(vResult,T3);
+ vResult = vmulq_f32(vResult,Position0);
+ // Perform the Position1 term and add
+ XMVECTOR vTemp = vmulq_f32(T3,Catmul3);
+ vTemp = vmlsq_f32(vTemp, T2, Catmul5);
+ vTemp = vaddq_f32(vTemp,Catmul2);
+ vResult = vmlaq_f32(vResult, vTemp, Position1);
+ // Perform the Position2 term and add
+ vTemp = vmulq_f32(T2,Catmul4);
+ vTemp = vmlsq_f32(vTemp, T3, Catmul3);
+ vTemp = vaddq_f32(vTemp,T);
+ vResult = vmlaq_f32(vResult, vTemp, Position2);
+ // Position3 is the last term
+ T3 = vsubq_f32(T3,T2);
+ vResult = vmlaq_f32(vResult, T3, Position3);
+ // Multiply by 0.5f and exit
+ vResult = vmulq_f32(vResult,g_XMOneHalf);
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Catmul2 = {2.0f,2.0f,2.0f,2.0f};
+ static const XMVECTORF32 Catmul3 = {3.0f,3.0f,3.0f,3.0f};
+ static const XMVECTORF32 Catmul4 = {4.0f,4.0f,4.0f,4.0f};
+ static const XMVECTORF32 Catmul5 = {5.0f,5.0f,5.0f,5.0f};
+ // Cache T^2 and T^3
+ XMVECTOR T2 = _mm_mul_ps(T,T);
+ XMVECTOR T3 = _mm_mul_ps(T,T2);
+ // Perform the Position0 term
+ XMVECTOR vResult = _mm_add_ps(T2,T2);
+ vResult = _mm_sub_ps(vResult,T);
+ vResult = _mm_sub_ps(vResult,T3);
+ vResult = _mm_mul_ps(vResult,Position0);
+ // Perform the Position1 term and add
+ XMVECTOR vTemp = _mm_mul_ps(T3,Catmul3);
+ XMVECTOR vTemp2 = _mm_mul_ps(T2,Catmul5);
+ vTemp = _mm_sub_ps(vTemp,vTemp2);
+ vTemp = _mm_add_ps(vTemp,Catmul2);
+ vTemp = _mm_mul_ps(vTemp,Position1);
+ vResult = _mm_add_ps(vResult,vTemp);
+ // Perform the Position2 term and add
+ vTemp = _mm_mul_ps(T2,Catmul4);
+ vTemp2 = _mm_mul_ps(T3,Catmul3);
+ vTemp = _mm_sub_ps(vTemp,vTemp2);
+ vTemp = _mm_add_ps(vTemp,T);
+ vTemp = _mm_mul_ps(vTemp,Position2);
+ vResult = _mm_add_ps(vResult,vTemp);
+ // Position3 is the last term
+ T3 = _mm_sub_ps(T3,T2);
+ T3 = _mm_mul_ps(T3,Position3);
+ vResult = _mm_add_ps(vResult,T3);
+ // Multiply by 0.5f and exit
+ vResult = _mm_mul_ps(vResult,g_XMOneHalf);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorBaryCentric
+(
+ FXMVECTOR Position0,
+ FXMVECTOR Position1,
+ FXMVECTOR Position2,
+ float f,
+ float g
+)
+{
+ // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0)
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR P10 = XMVectorSubtract(Position1, Position0);
+ XMVECTOR ScaleF = XMVectorReplicate(f);
+
+ XMVECTOR P20 = XMVectorSubtract(Position2, Position0);
+ XMVECTOR ScaleG = XMVectorReplicate(g);
+
+ XMVECTOR Result = XMVectorMultiplyAdd(P10, ScaleF, Position0);
+ Result = XMVectorMultiplyAdd(P20, ScaleG, Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR R1 = vsubq_f32(Position1,Position0);
+ XMVECTOR SF = vdupq_n_f32(f);
+ XMVECTOR R2 = vsubq_f32(Position2,Position0);
+ XMVECTOR SG = vdupq_n_f32(g);
+ R1 = vmlaq_f32( Position0, R1, SF);
+ return vmlaq_f32( R1, R2, SG );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR R1 = _mm_sub_ps(Position1,Position0);
+ XMVECTOR SF = _mm_set_ps1(f);
+ XMVECTOR R2 = _mm_sub_ps(Position2,Position0);
+ XMVECTOR SG = _mm_set_ps1(g);
+ R1 = _mm_mul_ps(R1,SF);
+ R2 = _mm_mul_ps(R2,SG);
+ R1 = _mm_add_ps(R1,Position0);
+ R1 = _mm_add_ps(R1,R2);
+ return R1;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVectorBaryCentricV
+(
+ FXMVECTOR Position0,
+ FXMVECTOR Position1,
+ FXMVECTOR Position2,
+ GXMVECTOR F,
+ CXMVECTOR G
+)
+{
+ // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0)
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR P10 = XMVectorSubtract(Position1, Position0);
+ XMVECTOR P20 = XMVectorSubtract(Position2, Position0);
+
+ XMVECTOR Result = XMVectorMultiplyAdd(P10, F, Position0);
+ Result = XMVectorMultiplyAdd(P20, G, Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR R1 = vsubq_f32(Position1,Position0);
+ XMVECTOR R2 = vsubq_f32(Position2,Position0);
+ R1 = vmlaq_f32( Position0, R1, F );
+ return vmlaq_f32( R1, R2, G);
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR R1 = _mm_sub_ps(Position1,Position0);
+ XMVECTOR R2 = _mm_sub_ps(Position2,Position0);
+ R1 = _mm_mul_ps(R1,F);
+ R2 = _mm_mul_ps(R2,G);
+ R1 = _mm_add_ps(R1,Position0);
+ R1 = _mm_add_ps(R1,R2);
+ return R1;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+/****************************************************************************
+ *
+ * 2D Vector
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector2Equal
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vTemp = vceq_f32( vget_low_f32(V1), vget_low_f32(V2) );
+ return ( vget_lane_u64( vTemp, 0 ) == 0xFFFFFFFFFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
+// z and w are don't care
+ return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAllTrue(XMVector2EqualR(V1, V2));
+#endif
+}
+
+
+//------------------------------------------------------------------------------
+
+inline uint32_t XMVector2EqualR
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ uint32_t CR = 0;
+ if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] == V2.vector4_f32[1]))
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] != V2.vector4_f32[1]))
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vTemp = vceq_f32( vget_low_f32(V1), vget_low_f32(V2) );
+ uint64_t r = vget_lane_u64( vTemp, 0 );
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFFFFFFFFFFFU )
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
+// z and w are don't care
+ int iTest = _mm_movemask_ps(vTemp)&3;
+ uint32_t CR = 0;
+ if (iTest==3)
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!iTest)
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector2EqualInt
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vTemp = vceq_u32( vget_low_u32(V1), vget_low_u32(V2) );
+ return ( vget_lane_u64( vTemp, 0 ) == 0xFFFFFFFFFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1),_mm_castps_si128(V2));
+ return (((_mm_movemask_ps(_mm_castsi128_ps(vTemp))&3)==3) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAllTrue(XMVector2EqualIntR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline uint32_t XMVector2EqualIntR
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ uint32_t CR = 0;
+ if ((V1.vector4_u32[0] == V2.vector4_u32[0]) &&
+ (V1.vector4_u32[1] == V2.vector4_u32[1]))
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) &&
+ (V1.vector4_u32[1] != V2.vector4_u32[1]))
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vTemp = vceq_u32( vget_low_u32(V1), vget_low_u32(V2) );
+ uint64_t r = vget_lane_u64( vTemp, 0 );
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFFFFFFFFFFFU )
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1),_mm_castps_si128(V2));
+ int iTest = _mm_movemask_ps(_mm_castsi128_ps(vTemp))&3;
+ uint32_t CR = 0;
+ if (iTest==3)
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!iTest)
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector2NearEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ FXMVECTOR Epsilon
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ float dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
+ float dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
+ return ((dx <= Epsilon.vector4_f32[0]) &&
+ (dy <= Epsilon.vector4_f32[1]));
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vDelta = vsub_f32(vget_low_u32(V1), vget_low_u32(V2));
+ __n64 vTemp = vacle_f32( vDelta, vget_low_u32(Epsilon) );
+ uint64_t r = vget_lane_u64( vTemp, 0 );
+ return ( r == 0xFFFFFFFFFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Get the difference
+ XMVECTOR vDelta = _mm_sub_ps(V1,V2);
+ // Get the absolute value of the difference
+ XMVECTOR vTemp = _mm_setzero_ps();
+ vTemp = _mm_sub_ps(vTemp,vDelta);
+ vTemp = _mm_max_ps(vTemp,vDelta);
+ vTemp = _mm_cmple_ps(vTemp,Epsilon);
+ // z and w are don't care
+ return (((_mm_movemask_ps(vTemp)&3)==0x3) != 0);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector2NotEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vTemp = vceq_f32( vget_low_f32(V1), vget_low_f32(V2) );
+ return ( vget_lane_u64( vTemp, 0 ) != 0xFFFFFFFFFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
+// z and w are don't care
+ return (((_mm_movemask_ps(vTemp)&3)!=3) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAnyFalse(XMVector2EqualR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector2NotEqualInt
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vTemp = vceq_u32( vget_low_u32(V1), vget_low_u32(V2) );
+ return ( vget_lane_u64( vTemp, 0 ) != 0xFFFFFFFFFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1),_mm_castps_si128(V2));
+ return (((_mm_movemask_ps(_mm_castsi128_ps(vTemp))&3)!=3) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAnyFalse(XMVector2EqualIntR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector2Greater
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vTemp = vcgt_f32( vget_low_f32(V1), vget_low_f32(V2) );
+ return ( vget_lane_u64( vTemp, 0 ) == 0xFFFFFFFFFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
+// z and w are don't care
+ return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAllTrue(XMVector2GreaterR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline uint32_t XMVector2GreaterR
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ uint32_t CR = 0;
+ if ((V1.vector4_f32[0] > V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] > V2.vector4_f32[1]))
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] <= V2.vector4_f32[1]))
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vTemp = vcgt_f32( vget_low_f32(V1), vget_low_f32(V2) );
+ uint64_t r = vget_lane_u64( vTemp, 0 );
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFFFFFFFFFFFU )
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
+ int iTest = _mm_movemask_ps(vTemp)&3;
+ uint32_t CR = 0;
+ if (iTest==3)
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!iTest)
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector2GreaterOrEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vTemp = vcge_f32( vget_low_f32(V1), vget_low_f32(V2) );
+ return ( vget_lane_u64( vTemp, 0 ) == 0xFFFFFFFFFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
+ return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline uint32_t XMVector2GreaterOrEqualR
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ uint32_t CR = 0;
+ if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] >= V2.vector4_f32[1]))
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] < V2.vector4_f32[1]))
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vTemp = vcge_f32( vget_low_f32(V1), vget_low_f32(V2) );
+ uint64_t r = vget_lane_u64( vTemp, 0 );
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFFFFFFFFFFFU )
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
+ int iTest = _mm_movemask_ps(vTemp)&3;
+ uint32_t CR = 0;
+ if (iTest == 3)
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!iTest)
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector2Less
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vTemp = vclt_f32( vget_low_f32(V1), vget_low_f32(V2) );
+ return ( vget_lane_u64( vTemp, 0 ) == 0xFFFFFFFFFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
+ return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAllTrue(XMVector2GreaterR(V2, V1));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector2LessOrEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vTemp = vcle_f32( vget_low_f32(V1), vget_low_f32(V2) );
+ return ( vget_lane_u64( vTemp, 0 ) == 0xFFFFFFFFFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
+ return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V2, V1));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector2InBounds
+(
+ FXMVECTOR V,
+ FXMVECTOR Bounds
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
+ (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32( V );
+ __n64 B = vget_low_f32( Bounds );
+ // Test if less than or equal
+ __n64 vTemp1 = vcle_f32(VL,B);
+ // Negate the bounds
+ __n64 vTemp2 = vneg_f32(B);
+ // Test if greater or equal (Reversed)
+ vTemp2 = vcle_f32(vTemp2,VL);
+ // Blend answers
+ vTemp1 = vand_u32(vTemp1,vTemp2);
+ // x and y in bounds?
+ return ( vget_lane_u64( vTemp1, 0 ) == 0xFFFFFFFFFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Test if less than or equal
+ XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
+ // Negate the bounds
+ XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
+ // Test if greater or equal (Reversed)
+ vTemp2 = _mm_cmple_ps(vTemp2,V);
+ // Blend answers
+ vTemp1 = _mm_and_ps(vTemp1,vTemp2);
+ // x and y in bounds? (z and w are don't care)
+ return (((_mm_movemask_ps(vTemp1)&0x3)==0x3) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAllInBounds(XMVector2InBoundsR(V, Bounds));
+#endif
+}
+
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector2IsNaN
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (XMISNAN(V.vector4_f32[0]) ||
+ XMISNAN(V.vector4_f32[1]));
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32( V );
+ // Test against itself. NaN is always not equal
+ __n64 vTempNan = vceq_f32( VL, VL );
+ // If x or y are NaN, the mask is zero
+ return ( vget_lane_u64( vTempNan, 0 ) != 0xFFFFFFFFFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Test against itself. NaN is always not equal
+ XMVECTOR vTempNan = _mm_cmpneq_ps(V,V);
+ // If x or y are NaN, the mask is non-zero
+ return ((_mm_movemask_ps(vTempNan)&3) != 0);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector2IsInfinite
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ return (XMISINF(V.vector4_f32[0]) ||
+ XMISINF(V.vector4_f32[1]));
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Mask off the sign bit
+ __n64 vTemp = vand_u32( vget_low_f32( V ) , vget_low_f32( g_XMAbsMask ) );
+ // Compare to infinity
+ vTemp = vceq_f32(vTemp, vget_low_f32( g_XMInfinity) );
+ // If any are infinity, the signs are true.
+ return vget_lane_u64( vTemp, 0 ) != 0;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Mask off the sign bit
+ __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
+ // Compare to infinity
+ vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
+ // If x or z are infinity, the signs are true.
+ return ((_mm_movemask_ps(vTemp)&3) != 0);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2Dot
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_f32[0] =
+ Result.vector4_f32[1] =
+ Result.vector4_f32[2] =
+ Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1];
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Perform the dot product on x and y
+ __n64 vTemp = vmul_f32( vget_low_f32(V1), vget_low_f32(V2) );
+ vTemp = vpadd_f32( vTemp, vTemp );
+ return vcombine_f32( vTemp, vTemp );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x and y
+ XMVECTOR vLengthSq = _mm_mul_ps(V1,V2);
+ // vTemp has y splatted
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,1,1,1));
+ // x+y
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
+ return vLengthSq;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2Cross
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+ // [ V1.x*V2.y - V1.y*V2.x, V1.x*V2.y - V1.y*V2.x ]
+
+#if defined(_XM_NO_INTRINSICS_)
+ float fCross = (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]);
+ XMVECTOR vResult = {
+ fCross,
+ fCross,
+ fCross,
+ fCross
+ };
+ return vResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const XMVECTORF32 Negate = { 1.f, -1.f, 0, 0 };
+
+ __n64 vTemp = vmul_f32( vget_low_f32( V1 ), vrev64_f32( vget_low_f32( V2 ) ) );
+ vTemp = vmul_f32( vTemp, vget_low_f32( Negate ) );
+ vTemp = vpadd_f32( vTemp, vTemp );
+ return vcombine_f32( vTemp, vTemp );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Swap x and y
+ XMVECTOR vResult = XM_PERMUTE_PS(V2,_MM_SHUFFLE(0,1,0,1));
+ // Perform the muls
+ vResult = _mm_mul_ps(vResult,V1);
+ // Splat y
+ XMVECTOR vTemp = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(1,1,1,1));
+ // Sub the values
+ vResult = _mm_sub_ss(vResult,vTemp);
+ // Splat the cross product
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(0,0,0,0));
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2LengthSq
+(
+ FXMVECTOR V
+)
+{
+ return XMVector2Dot(V, V);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2ReciprocalLengthEst
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result = XMVector2LengthSq(V);
+ Result = XMVectorReciprocalSqrtEst(Result);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32(V);
+ // Dot2
+ __n64 vTemp = vmul_f32( VL, VL );
+ vTemp = vpadd_f32( vTemp, vTemp );
+ // Reciprocal sqrt (estimate)
+ vTemp = vrsqrte_f32( vTemp );
+ return vcombine_f32( vTemp, vTemp );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x and y
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ // vTemp has y splatted
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,1,1,1));
+ // x+y
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ vLengthSq = _mm_rsqrt_ss(vLengthSq);
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
+ return vLengthSq;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2ReciprocalLength
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result = XMVector2LengthSq(V);
+ Result = XMVectorReciprocalSqrt(Result);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32(V);
+ // Dot2
+ __n64 vTemp = vmul_f32( VL, VL );
+ vTemp = vpadd_f32( vTemp, vTemp );
+ // Reciprocal sqrt
+ __n64 S0 = vrsqrte_f32(vTemp);
+ __n64 P0 = vmul_f32( vTemp, S0 );
+ __n64 R0 = vrsqrts_f32( P0, S0 );
+ __n64 S1 = vmul_f32( S0, R0 );
+ __n64 P1 = vmul_f32( vTemp, S1 );
+ __n64 R1 = vrsqrts_f32( P1, S1 );
+ __n64 Result = vmul_f32( S1, R1 );
+ return vcombine_f32( Result, Result );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x and y
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ // vTemp has y splatted
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,1,1,1));
+ // x+y
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ vLengthSq = _mm_sqrt_ss(vLengthSq);
+ vLengthSq = _mm_div_ss(g_XMOne,vLengthSq);
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
+ return vLengthSq;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2LengthEst
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result = XMVector2LengthSq(V);
+ Result = XMVectorSqrtEst(Result);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32(V);
+ // Dot2
+ __n64 vTemp = vmul_f32( VL, VL );
+ vTemp = vpadd_f32( vTemp, vTemp );
+ const __n64 zero = vdup_n_u32(0);
+ __n64 VEqualsZero = vceq_f32( vTemp, zero );
+ // Sqrt (estimate)
+ __n64 Result = vrsqrte_f32( vTemp );
+ Result = vmul_f32( vTemp, Result );
+ Result = vbsl_f32( VEqualsZero, zero, Result );
+ return vcombine_f32( Result, Result );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x and y
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ // vTemp has y splatted
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,1,1,1));
+ // x+y
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ vLengthSq = _mm_sqrt_ss(vLengthSq);
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
+ return vLengthSq;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2Length
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result = XMVector2LengthSq(V);
+ Result = XMVectorSqrt(Result);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32(V);
+ // Dot2
+ __n64 vTemp = vmul_f32( VL, VL );
+ vTemp = vpadd_f32( vTemp, vTemp );
+ const __n64 zero = vdup_n_u32(0);
+ __n64 VEqualsZero = vceq_f32( vTemp, zero );
+ // Sqrt
+ __n64 S0 = vrsqrte_f32( vTemp );
+ __n64 P0 = vmul_f32( vTemp, S0 );
+ __n64 R0 = vrsqrts_f32( P0, S0 );
+ __n64 S1 = vmul_f32( S0, R0 );
+ __n64 P1 = vmul_f32( vTemp, S1 );
+ __n64 R1 = vrsqrts_f32( P1, S1 );
+ __n64 Result = vmul_f32( S1, R1 );
+ Result = vmul_f32( vTemp, Result );
+ Result = vbsl_f32( VEqualsZero, zero, Result );
+ return vcombine_f32( Result, Result );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x and y
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ // vTemp has y splatted
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,1,1,1));
+ // x+y
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
+ vLengthSq = _mm_sqrt_ps(vLengthSq);
+ return vLengthSq;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// XMVector2NormalizeEst uses a reciprocal estimate and
+// returns QNaN on zero and infinite vectors.
+
+inline XMVECTOR XMVector2NormalizeEst
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result = XMVector2ReciprocalLength(V);
+ Result = XMVectorMultiply(V, Result);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32(V);
+ // Dot2
+ __n64 vTemp = vmul_f32( VL, VL );
+ vTemp = vpadd_f32( vTemp, vTemp );
+ // Reciprocal sqrt (estimate)
+ vTemp = vrsqrte_f32( vTemp );
+ // Normalize
+ __n64 Result = vmul_f32( VL, vTemp );
+ return vcombine_f32( Result, Result );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x and y
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ // vTemp has y splatted
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,1,1,1));
+ // x+y
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ vLengthSq = _mm_rsqrt_ss(vLengthSq);
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
+ vLengthSq = _mm_mul_ps(vLengthSq,V);
+ return vLengthSq;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2Normalize
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR vResult = XMVector2Length( V );
+ float fLength = vResult.vector4_f32[0];
+
+ // Prevent divide by zero
+ if (fLength > 0) {
+ fLength = 1.0f/fLength;
+ }
+
+ vResult.vector4_f32[0] = V.vector4_f32[0]*fLength;
+ vResult.vector4_f32[1] = V.vector4_f32[1]*fLength;
+ vResult.vector4_f32[2] = V.vector4_f32[2]*fLength;
+ vResult.vector4_f32[3] = V.vector4_f32[3]*fLength;
+ return vResult;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32(V);
+ // Dot2
+ __n64 vTemp = vmul_f32( VL, VL );
+ vTemp = vpadd_f32( vTemp, vTemp );
+ __n64 VEqualsZero = vceq_f32( vTemp, vdup_n_u32(0) );
+ __n64 VEqualsInf = vceq_f32( vTemp, vget_low_f32(g_XMInfinity) );
+ // Reciprocal sqrt (2 iterations of Newton-Raphson)
+ __n64 S0 = vrsqrte_f32( vTemp );
+ __n64 P0 = vmul_f32( vTemp, S0 );
+ __n64 R0 = vrsqrts_f32( P0, S0 );
+ __n64 S1 = vmul_f32( S0, R0 );
+ __n64 P1 = vmul_f32( vTemp, S1 );
+ __n64 R1 = vrsqrts_f32( P1, S1 );
+ vTemp = vmul_f32( S1, R1 );
+ // Normalize
+ __n64 Result = vmul_f32( VL, vTemp );
+ Result = vbsl_f32( VEqualsZero, vdup_n_f32(0), Result );
+ Result = vbsl_f32( VEqualsInf, vget_low_f32(g_XMQNaN), Result );
+ return vcombine_f32( Result, Result );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x and y only
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,1,1,1));
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
+ // Prepare for the division
+ XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
+ // Create zero with a single instruction
+ XMVECTOR vZeroMask = _mm_setzero_ps();
+ // Test for a divide by zero (Must be FP to detect -0.0)
+ vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
+ // Failsafe on zero (Or epsilon) length planes
+ // If the length is infinity, set the elements to zero
+ vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
+ // Reciprocal mul to perform the normalization
+ vResult = _mm_div_ps(V,vResult);
+ // Any that are infinity, set to zero
+ vResult = _mm_and_ps(vResult,vZeroMask);
+ // Select qnan or result based on infinite length
+ XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
+ XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
+ vResult = _mm_or_ps(vTemp1,vTemp2);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2ClampLength
+(
+ FXMVECTOR V,
+ float LengthMin,
+ float LengthMax
+)
+{
+ XMVECTOR ClampMax = XMVectorReplicate(LengthMax);
+ XMVECTOR ClampMin = XMVectorReplicate(LengthMin);
+ return XMVector2ClampLengthV(V, ClampMin, ClampMax);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2ClampLengthV
+(
+ FXMVECTOR V,
+ FXMVECTOR LengthMin,
+ FXMVECTOR LengthMax
+)
+{
+ assert((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)));
+ assert((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)));
+ assert(XMVector2GreaterOrEqual(LengthMin, g_XMZero));
+ assert(XMVector2GreaterOrEqual(LengthMax, g_XMZero));
+ assert(XMVector2GreaterOrEqual(LengthMax, LengthMin));
+
+ XMVECTOR LengthSq = XMVector2LengthSq(V);
+
+ const XMVECTOR Zero = XMVectorZero();
+
+ XMVECTOR RcpLength = XMVectorReciprocalSqrt(LengthSq);
+
+ XMVECTOR InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
+ XMVECTOR ZeroLength = XMVectorEqual(LengthSq, Zero);
+
+ XMVECTOR Length = XMVectorMultiply(LengthSq, RcpLength);
+
+ XMVECTOR Normal = XMVectorMultiply(V, RcpLength);
+
+ XMVECTOR Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
+ Length = XMVectorSelect(LengthSq, Length, Select);
+ Normal = XMVectorSelect(LengthSq, Normal, Select);
+
+ XMVECTOR ControlMax = XMVectorGreater(Length, LengthMax);
+ XMVECTOR ControlMin = XMVectorLess(Length, LengthMin);
+
+ XMVECTOR ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
+ ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
+
+ XMVECTOR Result = XMVectorMultiply(Normal, ClampLength);
+
+ // Preserve the original vector (with no precision loss) if the length falls within the given range
+ XMVECTOR Control = XMVectorEqualInt(ControlMax, ControlMin);
+ Result = XMVectorSelect(Result, V, Control);
+
+ return Result;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2Reflect
+(
+ FXMVECTOR Incident,
+ FXMVECTOR Normal
+)
+{
+ // Result = Incident - (2 * dot(Incident, Normal)) * Normal
+
+ XMVECTOR Result;
+ Result = XMVector2Dot(Incident, Normal);
+ Result = XMVectorAdd(Result, Result);
+ Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
+ return Result;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2Refract
+(
+ FXMVECTOR Incident,
+ FXMVECTOR Normal,
+ float RefractionIndex
+)
+{
+ XMVECTOR Index = XMVectorReplicate(RefractionIndex);
+ return XMVector2RefractV(Incident, Normal, Index);
+}
+
+//------------------------------------------------------------------------------
+
+// Return the refraction of a 2D vector
+inline XMVECTOR XMVector2RefractV
+(
+ FXMVECTOR Incident,
+ FXMVECTOR Normal,
+ FXMVECTOR RefractionIndex
+)
+{
+ // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
+ // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float IDotN = (Incident.vector4_f32[0]*Normal.vector4_f32[0])+(Incident.vector4_f32[1]*Normal.vector4_f32[1]);
+ // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
+ float RY = 1.0f-(IDotN*IDotN);
+ float RX = 1.0f-(RY*RefractionIndex.vector4_f32[0]*RefractionIndex.vector4_f32[0]);
+ RY = 1.0f-(RY*RefractionIndex.vector4_f32[1]*RefractionIndex.vector4_f32[1]);
+ if (RX>=0.0f) {
+ RX = (RefractionIndex.vector4_f32[0]*Incident.vector4_f32[0])-(Normal.vector4_f32[0]*((RefractionIndex.vector4_f32[0]*IDotN)+sqrtf(RX)));
+ } else {
+ RX = 0.0f;
+ }
+ if (RY>=0.0f) {
+ RY = (RefractionIndex.vector4_f32[1]*Incident.vector4_f32[1])-(Normal.vector4_f32[1]*((RefractionIndex.vector4_f32[1]*IDotN)+sqrtf(RY)));
+ } else {
+ RY = 0.0f;
+ }
+
+ XMVECTOR vResult;
+ vResult.vector4_f32[0] = RX;
+ vResult.vector4_f32[1] = RY;
+ vResult.vector4_f32[2] = 0.0f;
+ vResult.vector4_f32[3] = 0.0f;
+ return vResult;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 IL = vget_low_f32( Incident );
+ __n64 NL = vget_low_f32( Normal );
+ __n64 RIL = vget_low_f32( RefractionIndex );
+ // Get the 2D Dot product of Incident-Normal
+ __n64 vTemp = vmul_f32(IL, NL);
+ __n64 IDotN = vpadd_f32( vTemp, vTemp );
+ // vTemp = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
+ vTemp = vmls_f32( vget_low_f32( g_XMOne ), IDotN, IDotN);
+ vTemp = vmul_f32(vTemp,RIL);
+ vTemp = vmls_f32(vget_low_f32( g_XMOne ), vTemp, RIL );
+ // If any terms are <=0, sqrt() will fail, punt to zero
+ __n64 vMask = vcgt_f32(vTemp, vget_low_f32(g_XMZero) );
+ // Sqrt(vTemp)
+ __n64 S0 = vrsqrte_f32(vTemp);
+ __n64 P0 = vmul_f32( vTemp, S0 );
+ __n64 R0 = vrsqrts_f32( P0, S0 );
+ __n64 S1 = vmul_f32( S0, R0 );
+ __n64 P1 = vmul_f32( vTemp, S1 );
+ __n64 R1 = vrsqrts_f32( P1, S1 );
+ __n64 S2 = vmul_f32( S1, R1 );
+ vTemp = vmul_f32( vTemp, S2 );
+ // R = RefractionIndex * IDotN + sqrt(R)
+ vTemp = vmla_f32( vTemp, RIL, IDotN );
+ // Result = RefractionIndex * Incident - Normal * R
+ __n64 vResult = vmul_f32(RIL,IL);
+ vResult = vmls_f32( vResult, vTemp, NL );
+ vResult = vand_u32(vResult,vMask);
+ return vcombine_f32(vResult, vResult);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
+ // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
+ // Get the 2D Dot product of Incident-Normal
+ XMVECTOR IDotN = XMVector2Dot(Incident, Normal);
+ // vTemp = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
+ XMVECTOR vTemp = _mm_mul_ps(IDotN,IDotN);
+ vTemp = _mm_sub_ps(g_XMOne,vTemp);
+ vTemp = _mm_mul_ps(vTemp,RefractionIndex);
+ vTemp = _mm_mul_ps(vTemp,RefractionIndex);
+ vTemp = _mm_sub_ps(g_XMOne,vTemp);
+ // If any terms are <=0, sqrt() will fail, punt to zero
+ XMVECTOR vMask = _mm_cmpgt_ps(vTemp,g_XMZero);
+ // R = RefractionIndex * IDotN + sqrt(R)
+ vTemp = _mm_sqrt_ps(vTemp);
+ XMVECTOR vResult = _mm_mul_ps(RefractionIndex,IDotN);
+ vTemp = _mm_add_ps(vTemp,vResult);
+ // Result = RefractionIndex * Incident - Normal * R
+ vResult = _mm_mul_ps(RefractionIndex,Incident);
+ vTemp = _mm_mul_ps(vTemp,Normal);
+ vResult = _mm_sub_ps(vResult,vTemp);
+ vResult = _mm_and_ps(vResult,vMask);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2Orthogonal
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_f32[0] = -V.vector4_f32[1];
+ Result.vector4_f32[1] = V.vector4_f32[0];
+ Result.vector4_f32[2] = 0.f;
+ Result.vector4_f32[3] = 0.f;
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const XMVECTORF32 Negate = { -1.f, 1.f, 0, 0 };
+ const __n64 zero = vdup_n_f32(0);
+
+ __n64 VL = vget_low_f32( V );
+ __n64 Result = vmul_f32( vrev64_f32( VL ), vget_low_f32( Negate ) );
+ return vcombine_f32( Result, zero );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(3,2,0,1));
+ vResult = _mm_mul_ps(vResult,g_XMNegateX);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2AngleBetweenNormalsEst
+(
+ FXMVECTOR N1,
+ FXMVECTOR N2
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR Result = XMVector2Dot(N1, N2);
+ Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v);
+ Result = XMVectorACosEst(Result);
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2AngleBetweenNormals
+(
+ FXMVECTOR N1,
+ FXMVECTOR N2
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR Result = XMVector2Dot(N1, N2);
+ Result = XMVectorClamp(Result, g_XMNegativeOne, g_XMOne);
+ Result = XMVectorACos(Result);
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2AngleBetweenVectors
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR L1 = XMVector2ReciprocalLength(V1);
+ XMVECTOR L2 = XMVector2ReciprocalLength(V2);
+
+ XMVECTOR Dot = XMVector2Dot(V1, V2);
+
+ L1 = XMVectorMultiply(L1, L2);
+
+ XMVECTOR CosAngle = XMVectorMultiply(Dot, L1);
+ CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne.v, g_XMOne.v);
+
+ return XMVectorACos(CosAngle);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2LinePointDistance
+(
+ FXMVECTOR LinePoint1,
+ FXMVECTOR LinePoint2,
+ FXMVECTOR Point
+)
+{
+ // Given a vector PointVector from LinePoint1 to Point and a vector
+ // LineVector from LinePoint1 to LinePoint2, the scaled distance
+ // PointProjectionScale from LinePoint1 to the perpendicular projection
+ // of PointVector onto the line is defined as:
+ //
+ // PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector)
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR PointVector = XMVectorSubtract(Point, LinePoint1);
+ XMVECTOR LineVector = XMVectorSubtract(LinePoint2, LinePoint1);
+
+ XMVECTOR LengthSq = XMVector2LengthSq(LineVector);
+
+ XMVECTOR PointProjectionScale = XMVector2Dot(PointVector, LineVector);
+ PointProjectionScale = XMVectorDivide(PointProjectionScale, LengthSq);
+
+ XMVECTOR DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale);
+ DistanceVector = XMVectorSubtract(PointVector, DistanceVector);
+
+ return XMVector2Length(DistanceVector);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2IntersectLine
+(
+ FXMVECTOR Line1Point1,
+ FXMVECTOR Line1Point2,
+ FXMVECTOR Line2Point1,
+ GXMVECTOR Line2Point2
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR V1 = XMVectorSubtract(Line1Point2, Line1Point1);
+ XMVECTOR V2 = XMVectorSubtract(Line2Point2, Line2Point1);
+ XMVECTOR V3 = XMVectorSubtract(Line1Point1, Line2Point1);
+
+ XMVECTOR C1 = XMVector2Cross(V1, V2);
+ XMVECTOR C2 = XMVector2Cross(V2, V3);
+
+ XMVECTOR Result;
+ const XMVECTOR Zero = XMVectorZero();
+ if (XMVector2NearEqual(C1, Zero, g_XMEpsilon.v))
+ {
+ if (XMVector2NearEqual(C2, Zero, g_XMEpsilon.v))
+ {
+ // Coincident
+ Result = g_XMInfinity.v;
+ }
+ else
+ {
+ // Parallel
+ Result = g_XMQNaN.v;
+ }
+ }
+ else
+ {
+ // Intersection point = Line1Point1 + V1 * (C2 / C1)
+ XMVECTOR Scale = XMVectorReciprocal(C1);
+ Scale = XMVectorMultiply(C2, Scale);
+ Result = XMVectorMultiplyAdd(V1, Scale, Line1Point1);
+ }
+
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR V1 = _mm_sub_ps(Line1Point2, Line1Point1);
+ XMVECTOR V2 = _mm_sub_ps(Line2Point2, Line2Point1);
+ XMVECTOR V3 = _mm_sub_ps(Line1Point1, Line2Point1);
+ // Generate the cross products
+ XMVECTOR C1 = XMVector2Cross(V1, V2);
+ XMVECTOR C2 = XMVector2Cross(V2, V3);
+ // If C1 is not close to epsilon, use the calculated value
+ XMVECTOR vResultMask = _mm_setzero_ps();
+ vResultMask = _mm_sub_ps(vResultMask,C1);
+ vResultMask = _mm_max_ps(vResultMask,C1);
+ // 0xFFFFFFFF if the calculated value is to be used
+ vResultMask = _mm_cmpgt_ps(vResultMask,g_XMEpsilon);
+ // If C1 is close to epsilon, which fail type is it? INFINITY or NAN?
+ XMVECTOR vFailMask = _mm_setzero_ps();
+ vFailMask = _mm_sub_ps(vFailMask,C2);
+ vFailMask = _mm_max_ps(vFailMask,C2);
+ vFailMask = _mm_cmple_ps(vFailMask,g_XMEpsilon);
+ XMVECTOR vFail = _mm_and_ps(vFailMask,g_XMInfinity);
+ vFailMask = _mm_andnot_ps(vFailMask,g_XMQNaN);
+ // vFail is NAN or INF
+ vFail = _mm_or_ps(vFail,vFailMask);
+ // Intersection point = Line1Point1 + V1 * (C2 / C1)
+ XMVECTOR vResult = _mm_div_ps(C2,C1);
+ vResult = _mm_mul_ps(vResult,V1);
+ vResult = _mm_add_ps(vResult,Line1Point1);
+ // Use result, or failure value
+ vResult = _mm_and_ps(vResult,vResultMask);
+ vResultMask = _mm_andnot_ps(vResultMask,vFail);
+ vResult = _mm_or_ps(vResult,vResultMask);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2Transform
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Y = XMVectorSplatY(V);
+ XMVECTOR X = XMVectorSplatX(V);
+
+ XMVECTOR Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
+ Result = XMVectorMultiplyAdd(X, M.r[0], Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32( V );
+ __n128 Y = vdupq_lane_f32( VL, 1 );
+ __n128 Result = vmlaq_f32( M.r[3], Y, M.r[1] );
+ __n128 X = vdupq_lane_f32( VL, 0 );
+ return vmlaq_f32( Result, X, M.r[0] );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(0,0,0,0));
+ vResult = _mm_mul_ps(vResult,M.r[0]);
+ XMVECTOR vTemp = XM_PERMUTE_PS(V,_MM_SHUFFLE(1,1,1,1));
+ vTemp = _mm_mul_ps(vTemp,M.r[1]);
+ vResult = _mm_add_ps(vResult,vTemp);
+ vResult = _mm_add_ps(vResult,M.r[3]);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline XMFLOAT4* XMVector2TransformStream
+(
+ XMFLOAT4* pOutputStream,
+ size_t OutputStride,
+ const XMFLOAT2* pInputStream,
+ size_t InputStride,
+ size_t VectorCount,
+ CXMMATRIX M
+)
+{
+ assert(pOutputStream != NULL);
+ assert(pInputStream != NULL);
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ const uint8_t* pInputVector = (const uint8_t*)pInputStream;
+ uint8_t* pOutputVector = (uint8_t*)pOutputStream;
+
+ const XMVECTOR row0 = M.r[0];
+ const XMVECTOR row1 = M.r[1];
+ const XMVECTOR row3 = M.r[3];
+
+ for (size_t i = 0; i < VectorCount; i++)
+ {
+ XMVECTOR V = XMLoadFloat2((const XMFLOAT2*)pInputVector);
+ XMVECTOR Y = XMVectorSplatY(V);
+ XMVECTOR X = XMVectorSplatX(V);
+
+ XMVECTOR Result = XMVectorMultiplyAdd(Y, row1, row3);
+ Result = XMVectorMultiplyAdd(X, row0, Result);
+
+ XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
+
+ pInputVector += InputStride;
+ pOutputVector += OutputStride;
+ }
+
+ return pOutputStream;
+
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2TransformCoord
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR Y = XMVectorSplatY(V);
+ XMVECTOR X = XMVectorSplatX(V);
+
+ XMVECTOR Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
+ Result = XMVectorMultiplyAdd(X, M.r[0], Result);
+
+ XMVECTOR W = XMVectorSplatW(Result);
+ return XMVectorDivide( Result, W );
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline XMFLOAT2* XMVector2TransformCoordStream
+(
+ XMFLOAT2* pOutputStream,
+ size_t OutputStride,
+ const XMFLOAT2* pInputStream,
+ size_t InputStride,
+ size_t VectorCount,
+ CXMMATRIX M
+)
+{
+ assert(pOutputStream != NULL);
+ assert(pInputStream != NULL);
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ const uint8_t* pInputVector = (const uint8_t*)pInputStream;
+ uint8_t* pOutputVector = (uint8_t*)pOutputStream;
+
+ const XMVECTOR row0 = M.r[0];
+ const XMVECTOR row1 = M.r[1];
+ const XMVECTOR row3 = M.r[3];
+
+ for (size_t i = 0; i < VectorCount; i++)
+ {
+ XMVECTOR V = XMLoadFloat2((const XMFLOAT2*)pInputVector);
+ XMVECTOR Y = XMVectorSplatY(V);
+ XMVECTOR X = XMVectorSplatX(V);
+
+ XMVECTOR Result = XMVectorMultiplyAdd(Y, row1, row3);
+ Result = XMVectorMultiplyAdd(X, row0, Result);
+
+ XMVECTOR W = XMVectorSplatW(Result);
+
+ Result = XMVectorDivide(Result, W);
+
+ XMStoreFloat2((XMFLOAT2*)pOutputVector, Result);
+
+ pInputVector += InputStride;
+ pOutputVector += OutputStride;
+ }
+
+ return pOutputStream;
+
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector2TransformNormal
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Y = XMVectorSplatY(V);
+ XMVECTOR X = XMVectorSplatX(V);
+
+ XMVECTOR Result = XMVectorMultiply(Y, M.r[1]);
+ Result = XMVectorMultiplyAdd(X, M.r[0], Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32( V );
+ __n128 Y = vdupq_lane_f32( VL, 1 );
+ __n128 Result = vmulq_f32( Y, M.r[1] );
+ __n128 X = vdupq_lane_f32( VL, 0 );
+ return vmlaq_f32( Result, X, M.r[0] );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(0,0,0,0));
+ vResult = _mm_mul_ps(vResult,M.r[0]);
+ XMVECTOR vTemp = XM_PERMUTE_PS(V,_MM_SHUFFLE(1,1,1,1));
+ vTemp = _mm_mul_ps(vTemp,M.r[1]);
+ vResult = _mm_add_ps(vResult,vTemp);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline XMFLOAT2* XMVector2TransformNormalStream
+(
+ XMFLOAT2* pOutputStream,
+ size_t OutputStride,
+ const XMFLOAT2* pInputStream,
+ size_t InputStride,
+ size_t VectorCount,
+ CXMMATRIX M
+)
+{
+ assert(pOutputStream != NULL);
+ assert(pInputStream != NULL);
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ const uint8_t* pInputVector = (const uint8_t*)pInputStream;
+ uint8_t* pOutputVector = (uint8_t*)pOutputStream;
+
+ const XMVECTOR row0 = M.r[0];
+ const XMVECTOR row1 = M.r[1];
+
+ for (size_t i = 0; i < VectorCount; i++)
+ {
+ XMVECTOR V = XMLoadFloat2((const XMFLOAT2*)pInputVector);
+ XMVECTOR Y = XMVectorSplatY(V);
+ XMVECTOR X = XMVectorSplatX(V);
+
+ XMVECTOR Result = XMVectorMultiply(Y, row1);
+ Result = XMVectorMultiplyAdd(X, row0, Result);
+
+ XMStoreFloat2((XMFLOAT2*)pOutputVector, Result);
+
+ pInputVector += InputStride;
+ pOutputVector += OutputStride;
+ }
+
+ return pOutputStream;
+
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+/****************************************************************************
+ *
+ * 3D Vector
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector3Equal
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vceqq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( (vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU) == 0xFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
+ return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAllTrue(XMVector3EqualR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline uint32_t XMVector3EqualR
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ uint32_t CR = 0;
+ if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] == V2.vector4_f32[1]) &&
+ (V1.vector4_f32[2] == V2.vector4_f32[2]))
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] != V2.vector4_f32[1]) &&
+ (V1.vector4_f32[2] != V2.vector4_f32[2]))
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vceqq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU;
+
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFU )
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
+ int iTest = _mm_movemask_ps(vTemp)&7;
+ uint32_t CR = 0;
+ if (iTest==7)
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!iTest)
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector3EqualInt
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vceqq_u32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( (vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU) == 0xFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1),_mm_castps_si128(V2));
+ return (((_mm_movemask_ps(_mm_castsi128_ps(vTemp))&7)==7) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAllTrue(XMVector3EqualIntR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline uint32_t XMVector3EqualIntR
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ uint32_t CR = 0;
+ if ((V1.vector4_u32[0] == V2.vector4_u32[0]) &&
+ (V1.vector4_u32[1] == V2.vector4_u32[1]) &&
+ (V1.vector4_u32[2] == V2.vector4_u32[2]))
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) &&
+ (V1.vector4_u32[1] != V2.vector4_u32[1]) &&
+ (V1.vector4_u32[2] != V2.vector4_u32[2]))
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vceqq_u32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU;
+
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFU )
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1),_mm_castps_si128(V2));
+ int iTemp = _mm_movemask_ps(_mm_castsi128_ps(vTemp))&7;
+ uint32_t CR = 0;
+ if (iTemp==7)
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!iTemp)
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector3NearEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ FXMVECTOR Epsilon
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ float dx, dy, dz;
+
+ dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
+ dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
+ dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]);
+ return (((dx <= Epsilon.vector4_f32[0]) &&
+ (dy <= Epsilon.vector4_f32[1]) &&
+ (dz <= Epsilon.vector4_f32[2])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vDelta = vsubq_f32( V1, V2 );
+ __n128 vResult = vacleq_f32( vDelta, Epsilon );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( (vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU) == 0xFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Get the difference
+ XMVECTOR vDelta = _mm_sub_ps(V1,V2);
+ // Get the absolute value of the difference
+ XMVECTOR vTemp = _mm_setzero_ps();
+ vTemp = _mm_sub_ps(vTemp,vDelta);
+ vTemp = _mm_max_ps(vTemp,vDelta);
+ vTemp = _mm_cmple_ps(vTemp,Epsilon);
+ // w is don't care
+ return (((_mm_movemask_ps(vTemp)&7)==0x7) != 0);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector3NotEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vceqq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( (vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU) != 0xFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
+ return (((_mm_movemask_ps(vTemp)&7)!=7) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAnyFalse(XMVector3EqualR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector3NotEqualInt
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vceqq_u32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( (vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU) != 0xFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1),_mm_castps_si128(V2));
+ return (((_mm_movemask_ps(_mm_castsi128_ps(vTemp))&7)!=7) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAnyFalse(XMVector3EqualIntR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector3Greater
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcgtq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( (vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU) == 0xFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
+ return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAllTrue(XMVector3GreaterR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline uint32_t XMVector3GreaterR
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ uint32_t CR = 0;
+ if ((V1.vector4_f32[0] > V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] > V2.vector4_f32[1]) &&
+ (V1.vector4_f32[2] > V2.vector4_f32[2]))
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] <= V2.vector4_f32[1]) &&
+ (V1.vector4_f32[2] <= V2.vector4_f32[2]))
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcgtq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU;
+
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFU )
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
+ uint32_t CR = 0;
+ int iTest = _mm_movemask_ps(vTemp)&7;
+ if (iTest==7)
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!iTest)
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector3GreaterOrEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcgeq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( (vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU) == 0xFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
+ return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline uint32_t XMVector3GreaterOrEqualR
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ uint32_t CR = 0;
+ if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] >= V2.vector4_f32[1]) &&
+ (V1.vector4_f32[2] >= V2.vector4_f32[2]))
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] < V2.vector4_f32[1]) &&
+ (V1.vector4_f32[2] < V2.vector4_f32[2]))
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcgeq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU;
+
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFU )
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
+ uint32_t CR = 0;
+ int iTest = _mm_movemask_ps(vTemp)&7;
+ if (iTest==7)
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!iTest)
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector3Less
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcltq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( (vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU) == 0xFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
+ return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAllTrue(XMVector3GreaterR(V2, V1));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector3LessOrEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcleq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( (vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU) == 0xFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
+ return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
+#else // _XM_VMX128_INTRINSICS_
+ return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V2, V1));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector3InBounds
+(
+ FXMVECTOR V,
+ FXMVECTOR Bounds
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
+ (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
+ (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Test if less than or equal
+ __n128 vTemp1 = vcleq_f32(V,Bounds);
+ // Negate the bounds
+ __n128 vTemp2 = vnegq_f32(Bounds);
+ // Test if greater or equal (Reversed)
+ vTemp2 = vcleq_f32(vTemp2,V);
+ // Blend answers
+ vTemp1 = vandq_u32(vTemp1,vTemp2);
+ // in bounds?
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( (vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU) == 0xFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Test if less than or equal
+ XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
+ // Negate the bounds
+ XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
+ // Test if greater or equal (Reversed)
+ vTemp2 = _mm_cmple_ps(vTemp2,V);
+ // Blend answers
+ vTemp1 = _mm_and_ps(vTemp1,vTemp2);
+ // x,y and z in bounds? (w is don't care)
+ return (((_mm_movemask_ps(vTemp1)&0x7)==0x7) != 0);
+#else
+ return XMComparisonAllInBounds(XMVector3InBoundsR(V, Bounds));
+#endif
+}
+
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector3IsNaN
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ return (XMISNAN(V.vector4_f32[0]) ||
+ XMISNAN(V.vector4_f32[1]) ||
+ XMISNAN(V.vector4_f32[2]));
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Test against itself. NaN is always not equal
+ __n128 vTempNan = vceqq_f32( V, V );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTempNan), vget_high_u8(vTempNan));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ // If x or y or z are NaN, the mask is zero
+ return ( (vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU) != 0xFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Test against itself. NaN is always not equal
+ XMVECTOR vTempNan = _mm_cmpneq_ps(V,V);
+ // If x or y or z are NaN, the mask is non-zero
+ return ((_mm_movemask_ps(vTempNan)&7) != 0);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector3IsInfinite
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (XMISINF(V.vector4_f32[0]) ||
+ XMISINF(V.vector4_f32[1]) ||
+ XMISINF(V.vector4_f32[2]));
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Mask off the sign bit
+ __n128 vTempInf = vandq_u32( V, g_XMAbsMask );
+ // Compare to infinity
+ vTempInf = vceqq_f32(vTempInf, g_XMInfinity );
+ // If any are infinity, the signs are true.
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTempInf), vget_high_u8(vTempInf));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( (vget_lane_u32(vTemp.val[1], 1) & 0xFFFFFFU) != 0 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Mask off the sign bit
+ __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
+ // Compare to infinity
+ vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
+ // If x,y or z are infinity, the signs are true.
+ return ((_mm_movemask_ps(vTemp)&7) != 0);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3Dot
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ float fValue = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2];
+ XMVECTOR vResult = {
+ fValue,
+ fValue,
+ fValue,
+ fValue
+ };
+ return vResult;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vTemp = vmulq_f32( V1, V2 );
+ __n64 v1 = vget_low_f32( vTemp );
+ __n64 v2 = vget_high_f32( vTemp );
+ v1 = vpadd_f32( v1, v1 );
+ v2 = vdup_lane_f32( v2, 0 );
+ v1 = vadd_f32( v1, v2 );
+ return vcombine_f32( v1, v1 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product
+ XMVECTOR vDot = _mm_mul_ps(V1,V2);
+ // x=Dot.vector4_f32[1], y=Dot.vector4_f32[2]
+ XMVECTOR vTemp = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(2,1,2,1));
+ // Result.vector4_f32[0] = x+y
+ vDot = _mm_add_ss(vDot,vTemp);
+ // x=Dot.vector4_f32[2]
+ vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
+ // Result.vector4_f32[0] = (x+y)+z
+ vDot = _mm_add_ss(vDot,vTemp);
+ // Splat x
+ return XM_PERMUTE_PS(vDot,_MM_SHUFFLE(0,0,0,0));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3Cross
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+ // [ V1.y*V2.z - V1.z*V2.y, V1.z*V2.x - V1.x*V2.z, V1.x*V2.y - V1.y*V2.x ]
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR vResult = {
+ (V1.vector4_f32[1] * V2.vector4_f32[2]) - (V1.vector4_f32[2] * V2.vector4_f32[1]),
+ (V1.vector4_f32[2] * V2.vector4_f32[0]) - (V1.vector4_f32[0] * V2.vector4_f32[2]),
+ (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]),
+ 0.0f
+ };
+ return vResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 v1xy = vget_low_f32(V1);
+ __n64 v2xy = vget_low_f32(V2);
+
+ __n64 v1yx = vrev64_f32( v1xy );
+ __n64 v2yx = vrev64_f32( v2xy );
+
+ __n64 v1zz = vdup_lane_f32( vget_high_f32(V1), 0 );
+ __n64 v2zz = vdup_lane_f32( vget_high_f32(V2), 0 );
+
+ __n128 vResult = vmulq_f32( vcombine_f32(v1yx,v1xy), vcombine_f32(v2zz,v2yx) );
+ vResult = vmlsq_f32( vResult, vcombine_f32(v1zz,v1yx), vcombine_f32(v2yx,v2xy) );
+ return veorq_u32( vResult, g_XMFlipY );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // y1,z1,x1,w1
+ XMVECTOR vTemp1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(3,0,2,1));
+ // z2,x2,y2,w2
+ XMVECTOR vTemp2 = XM_PERMUTE_PS(V2,_MM_SHUFFLE(3,1,0,2));
+ // Perform the left operation
+ XMVECTOR vResult = _mm_mul_ps(vTemp1,vTemp2);
+ // z1,x1,y1,w1
+ vTemp1 = XM_PERMUTE_PS(vTemp1,_MM_SHUFFLE(3,0,2,1));
+ // y2,z2,x2,w2
+ vTemp2 = XM_PERMUTE_PS(vTemp2,_MM_SHUFFLE(3,1,0,2));
+ // Perform the right operation
+ vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
+ // Subract the right from left, and return answer
+ vResult = _mm_sub_ps(vResult,vTemp1);
+ // Set w to zero
+ return _mm_and_ps(vResult,g_XMMask3);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3LengthSq
+(
+ FXMVECTOR V
+)
+{
+ return XMVector3Dot(V, V);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3ReciprocalLengthEst
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+
+ Result = XMVector3LengthSq(V);
+ Result = XMVectorReciprocalSqrtEst(Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Dot3
+ __n128 vTemp = vmulq_f32( V, V );
+ __n64 v1 = vget_low_f32( vTemp );
+ __n64 v2 = vget_high_f32( vTemp );
+ v1 = vpadd_f32( v1, v1 );
+ v2 = vdup_lane_f32( v2, 0 );
+ v1 = vadd_f32( v1, v2 );
+ // Reciprocal sqrt (estimate)
+ v2 = vrsqrte_f32( v1 );
+ return vcombine_f32(v2, v2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x,y and z
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ // vTemp has z and y
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,2,1,2));
+ // x+z, y
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ // y,y,y,y
+ vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
+ // x+z+y,??,??,??
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ // Splat the length squared
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
+ // Get the reciprocal
+ vLengthSq = _mm_rsqrt_ps(vLengthSq);
+ return vLengthSq;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3ReciprocalLength
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+
+ Result = XMVector3LengthSq(V);
+ Result = XMVectorReciprocalSqrt(Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Dot3
+ __n128 vTemp = vmulq_f32( V, V );
+ __n64 v1 = vget_low_f32( vTemp );
+ __n64 v2 = vget_high_f32( vTemp );
+ v1 = vpadd_f32( v1, v1 );
+ v2 = vdup_lane_f32( v2, 0 );
+ v1 = vadd_f32( v1, v2 );
+ // Reciprocal sqrt
+ __n64 S0 = vrsqrte_f32(v1);
+ __n64 P0 = vmul_f32( v1, S0 );
+ __n64 R0 = vrsqrts_f32( P0, S0 );
+ __n64 S1 = vmul_f32( S0, R0 );
+ __n64 P1 = vmul_f32( v1, S1 );
+ __n64 R1 = vrsqrts_f32( P1, S1 );
+ __n64 Result = vmul_f32( S1, R1 );
+ return vcombine_f32( Result, Result );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product
+ XMVECTOR vDot = _mm_mul_ps(V,V);
+ // x=Dot.y, y=Dot.z
+ XMVECTOR vTemp = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(2,1,2,1));
+ // Result.x = x+y
+ vDot = _mm_add_ss(vDot,vTemp);
+ // x=Dot.z
+ vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
+ // Result.x = (x+y)+z
+ vDot = _mm_add_ss(vDot,vTemp);
+ // Splat x
+ vDot = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(0,0,0,0));
+ // Get the reciprocal
+ vDot = _mm_sqrt_ps(vDot);
+ // Get the reciprocal
+ vDot = _mm_div_ps(g_XMOne,vDot);
+ return vDot;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3LengthEst
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+
+ Result = XMVector3LengthSq(V);
+ Result = XMVectorSqrtEst(Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Dot3
+ __n128 vTemp = vmulq_f32( V, V );
+ __n64 v1 = vget_low_f32( vTemp );
+ __n64 v2 = vget_high_f32( vTemp );
+ v1 = vpadd_f32( v1, v1 );
+ v2 = vdup_lane_f32( v2, 0 );
+ v1 = vadd_f32( v1, v2 );
+ const __n64 zero = vdup_n_u32(0);
+ __n64 VEqualsZero = vceq_f32( v1, zero );
+ // Sqrt (estimate)
+ __n64 Result = vrsqrte_f32( v1 );
+ Result = vmul_f32( v1, Result );
+ Result = vbsl_f32( VEqualsZero, zero, Result );
+ return vcombine_f32( Result, Result );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x,y and z
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ // vTemp has z and y
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,2,1,2));
+ // x+z, y
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ // y,y,y,y
+ vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
+ // x+z+y,??,??,??
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ // Splat the length squared
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
+ // Get the length
+ vLengthSq = _mm_sqrt_ps(vLengthSq);
+ return vLengthSq;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3Length
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+
+ Result = XMVector3LengthSq(V);
+ Result = XMVectorSqrt(Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Dot3
+ __n128 vTemp = vmulq_f32( V, V );
+ __n64 v1 = vget_low_f32( vTemp );
+ __n64 v2 = vget_high_f32( vTemp );
+ v1 = vpadd_f32( v1, v1 );
+ v2 = vdup_lane_f32( v2, 0 );
+ v1 = vadd_f32( v1, v2 );
+ const __n64 zero = vdup_n_u32(0);
+ __n64 VEqualsZero = vceq_f32( v1, zero );
+ // Sqrt
+ __n64 S0 = vrsqrte_f32( v1 );
+ __n64 P0 = vmul_f32( v1, S0 );
+ __n64 R0 = vrsqrts_f32( P0, S0 );
+ __n64 S1 = vmul_f32( S0, R0 );
+ __n64 P1 = vmul_f32( v1, S1 );
+ __n64 R1 = vrsqrts_f32( P1, S1 );
+ __n64 Result = vmul_f32( S1, R1 );
+ Result = vmul_f32( v1, Result );
+ Result = vbsl_f32( VEqualsZero, zero, Result );
+ return vcombine_f32( Result, Result );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x,y and z
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ // vTemp has z and y
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,2,1,2));
+ // x+z, y
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ // y,y,y,y
+ vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
+ // x+z+y,??,??,??
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ // Splat the length squared
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
+ // Get the length
+ vLengthSq = _mm_sqrt_ps(vLengthSq);
+ return vLengthSq;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// XMVector3NormalizeEst uses a reciprocal estimate and
+// returns QNaN on zero and infinite vectors.
+
+inline XMVECTOR XMVector3NormalizeEst
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result = XMVector3ReciprocalLength(V);
+ Result = XMVectorMultiply(V, Result);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Dot3
+ __n128 vTemp = vmulq_f32( V, V );
+ __n64 v1 = vget_low_f32( vTemp );
+ __n64 v2 = vget_high_f32( vTemp );
+ v1 = vpadd_f32( v1, v1 );
+ v2 = vdup_lane_f32( v2, 0 );
+ v1 = vadd_f32( v1, v2 );
+ // Reciprocal sqrt (estimate)
+ v2 = vrsqrte_f32( v1 );
+ // Normalize
+ return vmulq_f32( V, vcombine_f32(v2,v2) );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product
+ XMVECTOR vDot = _mm_mul_ps(V,V);
+ // x=Dot.y, y=Dot.z
+ XMVECTOR vTemp = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(2,1,2,1));
+ // Result.x = x+y
+ vDot = _mm_add_ss(vDot,vTemp);
+ // x=Dot.z
+ vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
+ // Result.x = (x+y)+z
+ vDot = _mm_add_ss(vDot,vTemp);
+ // Splat x
+ vDot = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(0,0,0,0));
+ // Get the reciprocal
+ vDot = _mm_rsqrt_ps(vDot);
+ // Perform the normalization
+ vDot = _mm_mul_ps(vDot,V);
+ return vDot;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3Normalize
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ float fLength;
+ XMVECTOR vResult;
+
+ vResult = XMVector3Length( V );
+ fLength = vResult.vector4_f32[0];
+
+ // Prevent divide by zero
+ if (fLength > 0) {
+ fLength = 1.0f/fLength;
+ }
+
+ vResult.vector4_f32[0] = V.vector4_f32[0]*fLength;
+ vResult.vector4_f32[1] = V.vector4_f32[1]*fLength;
+ vResult.vector4_f32[2] = V.vector4_f32[2]*fLength;
+ vResult.vector4_f32[3] = V.vector4_f32[3]*fLength;
+ return vResult;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Dot3
+ __n128 vTemp = vmulq_f32( V, V );
+ __n64 v1 = vget_low_f32( vTemp );
+ __n64 v2 = vget_high_f32( vTemp );
+ v1 = vpadd_f32( v1, v1 );
+ v2 = vdup_lane_f32( v2, 0 );
+ v1 = vadd_f32( v1, v2 );
+ __n64 VEqualsZero = vceq_f32( v1, vdup_n_u32(0) );
+ __n64 VEqualsInf = vceq_f32( v1, vget_low_f32(g_XMInfinity) );
+ // Reciprocal sqrt (2 iterations of Newton-Raphson)
+ __n64 S0 = vrsqrte_f32( v1 );
+ __n64 P0 = vmul_f32( v1, S0 );
+ __n64 R0 = vrsqrts_f32( P0, S0 );
+ __n64 S1 = vmul_f32( S0, R0 );
+ __n64 P1 = vmul_f32( v1, S1 );
+ __n64 R1 = vrsqrts_f32( P1, S1 );
+ v2 = vmul_f32( S1, R1 );
+ // Normalize
+ __n128 vResult = vmulq_f32( V, vcombine_f32(v2,v2) );
+ vResult = vbslq_f32( vcombine_f32(VEqualsZero,VEqualsZero), vdupq_n_f32(0), vResult );
+ return vbslq_f32( vcombine_f32(VEqualsInf,VEqualsInf), g_XMQNaN, vResult );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x,y and z only
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,1,2,1));
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
+ vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
+ // Prepare for the division
+ XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
+ // Create zero with a single instruction
+ XMVECTOR vZeroMask = _mm_setzero_ps();
+ // Test for a divide by zero (Must be FP to detect -0.0)
+ vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
+ // Failsafe on zero (Or epsilon) length planes
+ // If the length is infinity, set the elements to zero
+ vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
+ // Divide to perform the normalization
+ vResult = _mm_div_ps(V,vResult);
+ // Any that are infinity, set to zero
+ vResult = _mm_and_ps(vResult,vZeroMask);
+ // Select qnan or result based on infinite length
+ XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
+ XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
+ vResult = _mm_or_ps(vTemp1,vTemp2);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3ClampLength
+(
+ FXMVECTOR V,
+ float LengthMin,
+ float LengthMax
+)
+{
+ XMVECTOR ClampMax = XMVectorReplicate(LengthMax);
+ XMVECTOR ClampMin = XMVectorReplicate(LengthMin);
+
+ return XMVector3ClampLengthV(V, ClampMin, ClampMax);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3ClampLengthV
+(
+ FXMVECTOR V,
+ FXMVECTOR LengthMin,
+ FXMVECTOR LengthMax
+)
+{
+ assert((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)));
+ assert((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)));
+ assert(XMVector3GreaterOrEqual(LengthMin, XMVectorZero()));
+ assert(XMVector3GreaterOrEqual(LengthMax, XMVectorZero()));
+ assert(XMVector3GreaterOrEqual(LengthMax, LengthMin));
+
+ XMVECTOR LengthSq = XMVector3LengthSq(V);
+
+ const XMVECTOR Zero = XMVectorZero();
+
+ XMVECTOR RcpLength = XMVectorReciprocalSqrt(LengthSq);
+
+ XMVECTOR InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
+ XMVECTOR ZeroLength = XMVectorEqual(LengthSq, Zero);
+
+ XMVECTOR Normal = XMVectorMultiply(V, RcpLength);
+
+ XMVECTOR Length = XMVectorMultiply(LengthSq, RcpLength);
+
+ XMVECTOR Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
+ Length = XMVectorSelect(LengthSq, Length, Select);
+ Normal = XMVectorSelect(LengthSq, Normal, Select);
+
+ XMVECTOR ControlMax = XMVectorGreater(Length, LengthMax);
+ XMVECTOR ControlMin = XMVectorLess(Length, LengthMin);
+
+ XMVECTOR ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
+ ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
+
+ XMVECTOR Result = XMVectorMultiply(Normal, ClampLength);
+
+ // Preserve the original vector (with no precision loss) if the length falls within the given range
+ XMVECTOR Control = XMVectorEqualInt(ControlMax, ControlMin);
+ Result = XMVectorSelect(Result, V, Control);
+
+ return Result;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3Reflect
+(
+ FXMVECTOR Incident,
+ FXMVECTOR Normal
+)
+{
+ // Result = Incident - (2 * dot(Incident, Normal)) * Normal
+
+ XMVECTOR Result = XMVector3Dot(Incident, Normal);
+ Result = XMVectorAdd(Result, Result);
+ Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
+
+ return Result;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3Refract
+(
+ FXMVECTOR Incident,
+ FXMVECTOR Normal,
+ float RefractionIndex
+)
+{
+ XMVECTOR Index = XMVectorReplicate(RefractionIndex);
+ return XMVector3RefractV(Incident, Normal, Index);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3RefractV
+(
+ FXMVECTOR Incident,
+ FXMVECTOR Normal,
+ FXMVECTOR RefractionIndex
+)
+{
+ // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
+ // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ const XMVECTOR Zero = XMVectorZero();
+
+ XMVECTOR IDotN = XMVector3Dot(Incident, Normal);
+
+ // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
+ XMVECTOR R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v);
+ R = XMVectorMultiply(R, RefractionIndex);
+ R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v);
+
+ if (XMVector4LessOrEqual(R, Zero))
+ {
+ // Total internal reflection
+ return Zero;
+ }
+ else
+ {
+ // R = RefractionIndex * IDotN + sqrt(R)
+ R = XMVectorSqrt(R);
+ R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R);
+
+ // Result = RefractionIndex * Incident - Normal * R
+ XMVECTOR Result = XMVectorMultiply(RefractionIndex, Incident);
+ Result = XMVectorNegativeMultiplySubtract(Normal, R, Result);
+
+ return Result;
+ }
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR IDotN = XMVector3Dot(Incident,Normal);
+
+ // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
+ __n128 R = vmlsq_f32( g_XMOne, IDotN, IDotN);
+ R = vmulq_f32(R, RefractionIndex);
+ R = vmlsq_f32(g_XMOne, R, RefractionIndex );
+
+ __n128 vResult = vcleq_f32(R,g_XMZero);
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ if ( vget_lane_u32(vTemp.val[1], 1) == 0xFFFFFFFFU )
+ {
+ // Total internal reflection
+ vResult = g_XMZero;
+ }
+ else
+ {
+ // Sqrt(R)
+ __n128 S0 = vrsqrteq_f32(R);
+ __n128 P0 = vmulq_f32( R, S0 );
+ __n128 R0 = vrsqrtsq_f32( P0, S0 );
+ __n128 S1 = vmulq_f32( S0, R0 );
+ __n128 P1 = vmulq_f32( R, S1 );
+ __n128 R1 = vrsqrtsq_f32( P1, S1 );
+ __n128 S2 = vmulq_f32( S1, R1 );
+ R = vmulq_f32( R, S2 );
+ // R = RefractionIndex * IDotN + sqrt(R)
+ R = vmlaq_f32( R, RefractionIndex, IDotN );
+ // Result = RefractionIndex * Incident - Normal * R
+ vResult = vmulq_f32(RefractionIndex, Incident);
+ vResult = vmlsq_f32( vResult, R, Normal );
+ }
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
+ // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
+ XMVECTOR IDotN = XMVector3Dot(Incident, Normal);
+ // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
+ XMVECTOR R = _mm_mul_ps(IDotN, IDotN);
+ R = _mm_sub_ps(g_XMOne,R);
+ R = _mm_mul_ps(R, RefractionIndex);
+ R = _mm_mul_ps(R, RefractionIndex);
+ R = _mm_sub_ps(g_XMOne,R);
+
+ XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero);
+ if (_mm_movemask_ps(vResult)==0x0f)
+ {
+ // Total internal reflection
+ vResult = g_XMZero;
+ }
+ else
+ {
+ // R = RefractionIndex * IDotN + sqrt(R)
+ R = _mm_sqrt_ps(R);
+ vResult = _mm_mul_ps(RefractionIndex,IDotN);
+ R = _mm_add_ps(R,vResult);
+ // Result = RefractionIndex * Incident - Normal * R
+ vResult = _mm_mul_ps(RefractionIndex, Incident);
+ R = _mm_mul_ps(R,Normal);
+ vResult = _mm_sub_ps(vResult,R);
+ }
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3Orthogonal
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR Zero = XMVectorZero();
+ XMVECTOR Z = XMVectorSplatZ(V);
+ XMVECTOR YZYY = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(V);
+
+ XMVECTOR NegativeV = XMVectorSubtract(Zero, V);
+
+ XMVECTOR ZIsNegative = XMVectorLess(Z, Zero);
+ XMVECTOR YZYYIsNegative = XMVectorLess(YZYY, Zero);
+
+ XMVECTOR S = XMVectorAdd(YZYY, Z);
+ XMVECTOR D = XMVectorSubtract(YZYY, Z);
+
+ XMVECTOR Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative);
+
+ XMVECTOR R0 = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X>(NegativeV, S);
+ XMVECTOR R1 = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X>(V, D);
+
+ return XMVectorSelect(R1, R0, Select);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3AngleBetweenNormalsEst
+(
+ FXMVECTOR N1,
+ FXMVECTOR N2
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR Result = XMVector3Dot(N1, N2);
+ Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v);
+ Result = XMVectorACosEst(Result);
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3AngleBetweenNormals
+(
+ FXMVECTOR N1,
+ FXMVECTOR N2
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR Result = XMVector3Dot(N1, N2);
+ Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v);
+ Result = XMVectorACos(Result);
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3AngleBetweenVectors
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR L1 = XMVector3ReciprocalLength(V1);
+ XMVECTOR L2 = XMVector3ReciprocalLength(V2);
+
+ XMVECTOR Dot = XMVector3Dot(V1, V2);
+
+ L1 = XMVectorMultiply(L1, L2);
+
+ XMVECTOR CosAngle = XMVectorMultiply(Dot, L1);
+ CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne.v, g_XMOne.v);
+
+ return XMVectorACos(CosAngle);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3LinePointDistance
+(
+ FXMVECTOR LinePoint1,
+ FXMVECTOR LinePoint2,
+ FXMVECTOR Point
+)
+{
+ // Given a vector PointVector from LinePoint1 to Point and a vector
+ // LineVector from LinePoint1 to LinePoint2, the scaled distance
+ // PointProjectionScale from LinePoint1 to the perpendicular projection
+ // of PointVector onto the line is defined as:
+ //
+ // PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector)
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR PointVector = XMVectorSubtract(Point, LinePoint1);
+ XMVECTOR LineVector = XMVectorSubtract(LinePoint2, LinePoint1);
+
+ XMVECTOR LengthSq = XMVector3LengthSq(LineVector);
+
+ XMVECTOR PointProjectionScale = XMVector3Dot(PointVector, LineVector);
+ PointProjectionScale = XMVectorDivide(PointProjectionScale, LengthSq);
+
+ XMVECTOR DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale);
+ DistanceVector = XMVectorSubtract(PointVector, DistanceVector);
+
+ return XMVector3Length(DistanceVector);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline void XMVector3ComponentsFromNormal
+(
+ XMVECTOR* pParallel,
+ XMVECTOR* pPerpendicular,
+ FXMVECTOR V,
+ FXMVECTOR Normal
+)
+{
+ assert(pParallel != NULL);
+ assert(pPerpendicular != NULL);
+
+ XMVECTOR Scale = XMVector3Dot(V, Normal);
+
+ XMVECTOR Parallel = XMVectorMultiply(Normal, Scale);
+
+ *pParallel = Parallel;
+ *pPerpendicular = XMVectorSubtract(V, Parallel);
+}
+
+//------------------------------------------------------------------------------
+// Transform a vector using a rotation expressed as a unit quaternion
+
+inline XMVECTOR XMVector3Rotate
+(
+ FXMVECTOR V,
+ FXMVECTOR RotationQuaternion
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v);
+ XMVECTOR Q = XMQuaternionConjugate(RotationQuaternion);
+ XMVECTOR Result = XMQuaternionMultiply(Q, A);
+ return XMQuaternionMultiply(Result, RotationQuaternion);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Transform a vector using the inverse of a rotation expressed as a unit quaternion
+
+inline XMVECTOR XMVector3InverseRotate
+(
+ FXMVECTOR V,
+ FXMVECTOR RotationQuaternion
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v);
+ XMVECTOR Result = XMQuaternionMultiply(RotationQuaternion, A);
+ XMVECTOR Q = XMQuaternionConjugate(RotationQuaternion);
+ return XMQuaternionMultiply(Result, Q);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3Transform
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Z = XMVectorSplatZ(V);
+ XMVECTOR Y = XMVectorSplatY(V);
+ XMVECTOR X = XMVectorSplatX(V);
+
+ XMVECTOR Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
+ Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
+ Result = XMVectorMultiplyAdd(X, M.r[0], Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32( V );
+ XMVECTOR vResult = vdupq_lane_f32( VL, 0 ); // X
+ XMVECTOR vTemp = vdupq_lane_f32( VL, 1 ); // Y
+ vResult = vmlaq_f32( M.r[3], vResult, M.r[0] );
+ vResult = vmlaq_f32( vResult, vTemp, M.r[1] );
+ vTemp = vdupq_lane_f32( vget_high_f32( V ), 0 ); // Z
+ return vmlaq_f32( vResult, vTemp, M.r[2] );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(0,0,0,0));
+ vResult = _mm_mul_ps(vResult,M.r[0]);
+ XMVECTOR vTemp = XM_PERMUTE_PS(V,_MM_SHUFFLE(1,1,1,1));
+ vTemp = _mm_mul_ps(vTemp,M.r[1]);
+ vResult = _mm_add_ps(vResult,vTemp);
+ vTemp = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
+ vTemp = _mm_mul_ps(vTemp,M.r[2]);
+ vResult = _mm_add_ps(vResult,vTemp);
+ vResult = _mm_add_ps(vResult,M.r[3]);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline XMFLOAT4* XMVector3TransformStream
+(
+ XMFLOAT4* pOutputStream,
+ size_t OutputStride,
+ const XMFLOAT3* pInputStream,
+ size_t InputStride,
+ size_t VectorCount,
+ CXMMATRIX M
+)
+{
+ assert(pOutputStream != NULL);
+ assert(pInputStream != NULL);
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ const uint8_t* pInputVector = (const uint8_t*)pInputStream;
+ uint8_t* pOutputVector = (uint8_t*)pOutputStream;
+
+ const XMVECTOR row0 = M.r[0];
+ const XMVECTOR row1 = M.r[1];
+ const XMVECTOR row2 = M.r[2];
+ const XMVECTOR row3 = M.r[3];
+
+ for (size_t i = 0; i < VectorCount; i++)
+ {
+ XMVECTOR V = XMLoadFloat3((const XMFLOAT3*)pInputVector);
+ XMVECTOR Z = XMVectorSplatZ(V);
+ XMVECTOR Y = XMVectorSplatY(V);
+ XMVECTOR X = XMVectorSplatX(V);
+
+ XMVECTOR Result = XMVectorMultiplyAdd(Z, row2, row3);
+ Result = XMVectorMultiplyAdd(Y, row1, Result);
+ Result = XMVectorMultiplyAdd(X, row0, Result);
+
+ XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
+
+ pInputVector += InputStride;
+ pOutputVector += OutputStride;
+ }
+
+ return pOutputStream;
+
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3TransformCoord
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR Z = XMVectorSplatZ(V);
+ XMVECTOR Y = XMVectorSplatY(V);
+ XMVECTOR X = XMVectorSplatX(V);
+
+ XMVECTOR Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
+ Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
+ Result = XMVectorMultiplyAdd(X, M.r[0], Result);
+
+ XMVECTOR W = XMVectorSplatW(Result);
+ return XMVectorDivide( Result, W );
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline XMFLOAT3* XMVector3TransformCoordStream
+(
+ XMFLOAT3* pOutputStream,
+ size_t OutputStride,
+ const XMFLOAT3* pInputStream,
+ size_t InputStride,
+ size_t VectorCount,
+ CXMMATRIX M
+)
+{
+ assert(pOutputStream != NULL);
+ assert(pInputStream != NULL);
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ const uint8_t* pInputVector = (const uint8_t*)pInputStream;
+ uint8_t* pOutputVector = (uint8_t*)pOutputStream;
+
+ const XMVECTOR row0 = M.r[0];
+ const XMVECTOR row1 = M.r[1];
+ const XMVECTOR row2 = M.r[2];
+ const XMVECTOR row3 = M.r[3];
+
+ for (size_t i = 0; i < VectorCount; i++)
+ {
+ XMVECTOR V = XMLoadFloat3((const XMFLOAT3*)pInputVector);
+ XMVECTOR Z = XMVectorSplatZ(V);
+ XMVECTOR Y = XMVectorSplatY(V);
+ XMVECTOR X = XMVectorSplatX(V);
+
+ XMVECTOR Result = XMVectorMultiplyAdd(Z, row2, row3);
+ Result = XMVectorMultiplyAdd(Y, row1, Result);
+ Result = XMVectorMultiplyAdd(X, row0, Result);
+
+ XMVECTOR W = XMVectorSplatW(Result);
+
+ Result = XMVectorDivide(Result, W);
+
+ XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
+
+ pInputVector += InputStride;
+ pOutputVector += OutputStride;
+ }
+
+ return pOutputStream;
+
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3TransformNormal
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Z = XMVectorSplatZ(V);
+ XMVECTOR Y = XMVectorSplatY(V);
+ XMVECTOR X = XMVectorSplatX(V);
+
+ XMVECTOR Result = XMVectorMultiply(Z, M.r[2]);
+ Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
+ Result = XMVectorMultiplyAdd(X, M.r[0], Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32( V );
+ XMVECTOR vResult = vdupq_lane_f32( VL, 0 ); // X
+ XMVECTOR vTemp = vdupq_lane_f32( VL, 1 ); // Y
+ vResult = vmulq_f32( vResult, M.r[0] );
+ vResult = vmlaq_f32( vResult, vTemp, M.r[1] );
+ vTemp = vdupq_lane_f32( vget_high_f32( V ), 0 ); // Z
+ return vmlaq_f32( vResult, vTemp, M.r[2] );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(0,0,0,0));
+ vResult = _mm_mul_ps(vResult,M.r[0]);
+ XMVECTOR vTemp = XM_PERMUTE_PS(V,_MM_SHUFFLE(1,1,1,1));
+ vTemp = _mm_mul_ps(vTemp,M.r[1]);
+ vResult = _mm_add_ps(vResult,vTemp);
+ vTemp = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
+ vTemp = _mm_mul_ps(vTemp,M.r[2]);
+ vResult = _mm_add_ps(vResult,vTemp);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline XMFLOAT3* XMVector3TransformNormalStream
+(
+ XMFLOAT3* pOutputStream,
+ size_t OutputStride,
+ const XMFLOAT3* pInputStream,
+ size_t InputStride,
+ size_t VectorCount,
+ CXMMATRIX M
+)
+{
+ assert(pOutputStream != NULL);
+ assert(pInputStream != NULL);
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ const uint8_t* pInputVector = (const uint8_t*)pInputStream;
+ uint8_t* pOutputVector = (uint8_t*)pOutputStream;
+
+ const XMVECTOR row0 = M.r[0];
+ const XMVECTOR row1 = M.r[1];
+ const XMVECTOR row2 = M.r[2];
+
+ for (size_t i = 0; i < VectorCount; i++)
+ {
+ XMVECTOR V = XMLoadFloat3((const XMFLOAT3*)pInputVector);
+ XMVECTOR Z = XMVectorSplatZ(V);
+ XMVECTOR Y = XMVectorSplatY(V);
+ XMVECTOR X = XMVectorSplatX(V);
+
+ XMVECTOR Result = XMVectorMultiply(Z, row2);
+ Result = XMVectorMultiplyAdd(Y, row1, Result);
+ Result = XMVectorMultiplyAdd(X, row0, Result);
+
+ XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
+
+ pInputVector += InputStride;
+ pOutputVector += OutputStride;
+ }
+
+ return pOutputStream;
+
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3Project
+(
+ FXMVECTOR V,
+ float ViewportX,
+ float ViewportY,
+ float ViewportWidth,
+ float ViewportHeight,
+ float ViewportMinZ,
+ float ViewportMaxZ,
+ CXMMATRIX Projection,
+ CXMMATRIX View,
+ CXMMATRIX World
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ const float HalfViewportWidth = ViewportWidth * 0.5f;
+ const float HalfViewportHeight = ViewportHeight * 0.5f;
+
+ XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f);
+ XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f);
+
+ XMMATRIX Transform = XMMatrixMultiply(World, View);
+ Transform = XMMatrixMultiply(Transform, Projection);
+
+ XMVECTOR Result = XMVector3TransformCoord(V, Transform);
+
+ Result = XMVectorMultiplyAdd(Result, Scale, Offset);
+
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline XMFLOAT3* XMVector3ProjectStream
+(
+ XMFLOAT3* pOutputStream,
+ size_t OutputStride,
+ const XMFLOAT3* pInputStream,
+ size_t InputStride,
+ size_t VectorCount,
+ float ViewportX,
+ float ViewportY,
+ float ViewportWidth,
+ float ViewportHeight,
+ float ViewportMinZ,
+ float ViewportMaxZ,
+ CXMMATRIX Projection,
+ CXMMATRIX View,
+ CXMMATRIX World
+)
+{
+ assert(pOutputStream != NULL);
+ assert(pInputStream != NULL);
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+
+ const float HalfViewportWidth = ViewportWidth * 0.5f;
+ const float HalfViewportHeight = ViewportHeight * 0.5f;
+
+ XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 1.0f);
+ XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f);
+
+ XMMATRIX Transform = XMMatrixMultiply(World, View);
+ Transform = XMMatrixMultiply(Transform, Projection);
+
+ const uint8_t* pInputVector = (const uint8_t*)pInputStream;
+ uint8_t* pOutputVector = (uint8_t*)pOutputStream;
+
+ for (size_t i = 0; i < VectorCount; i++)
+ {
+ XMVECTOR V = XMLoadFloat3((const XMFLOAT3*)pInputVector);
+
+ XMVECTOR Result = XMVector3TransformCoord(V, Transform);
+ Result = XMVectorMultiplyAdd(Result, Scale, Offset);
+
+ XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
+
+ pInputVector += InputStride;
+ pOutputVector += OutputStride;
+ }
+
+ return pOutputStream;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector3Unproject
+(
+ FXMVECTOR V,
+ float ViewportX,
+ float ViewportY,
+ float ViewportWidth,
+ float ViewportHeight,
+ float ViewportMinZ,
+ float ViewportMaxZ,
+ CXMMATRIX Projection,
+ CXMMATRIX View,
+ CXMMATRIX World
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 D = { -1.0f, 1.0f, 0.0f, 0.0f };
+
+ XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f);
+ Scale = XMVectorReciprocal(Scale);
+
+ XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f);
+ Offset = XMVectorMultiplyAdd(Scale, Offset, D.v);
+
+ XMMATRIX Transform = XMMatrixMultiply(World, View);
+ Transform = XMMatrixMultiply(Transform, Projection);
+ Transform = XMMatrixInverse(NULL, Transform);
+
+ XMVECTOR Result = XMVectorMultiplyAdd(V, Scale, Offset);
+
+ return XMVector3TransformCoord(Result, Transform);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline XMFLOAT3* XMVector3UnprojectStream
+(
+ XMFLOAT3* pOutputStream,
+ size_t OutputStride,
+ const XMFLOAT3* pInputStream,
+ size_t InputStride,
+ size_t VectorCount,
+ float ViewportX,
+ float ViewportY,
+ float ViewportWidth,
+ float ViewportHeight,
+ float ViewportMinZ,
+ float ViewportMaxZ,
+ CXMMATRIX Projection,
+ CXMMATRIX View,
+ CXMMATRIX World)
+{
+ assert(pOutputStream != NULL);
+ assert(pInputStream != NULL);
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 D = { -1.0f, 1.0f, 0.0f, 0.0f };
+
+ XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f);
+ Scale = XMVectorReciprocal(Scale);
+
+ XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f);
+ Offset = XMVectorMultiplyAdd(Scale, Offset, D.v);
+
+ XMMATRIX Transform = XMMatrixMultiply(World, View);
+ Transform = XMMatrixMultiply(Transform, Projection);
+ Transform = XMMatrixInverse(NULL, Transform);
+
+ const uint8_t* pInputVector = (const uint8_t*)pInputStream;
+ uint8_t* pOutputVector = (uint8_t*)pOutputStream;
+
+ for (size_t i = 0; i < VectorCount; i++)
+ {
+ XMVECTOR V = XMLoadFloat3((const XMFLOAT3*)pInputVector);
+
+ XMVECTOR Result = XMVectorMultiplyAdd(V, Scale, Offset);
+
+ Result = XMVector3TransformCoord(Result, Transform);
+
+ XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
+
+ pInputVector += InputStride;
+ pOutputVector += OutputStride;
+ }
+
+ return pOutputStream;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+/****************************************************************************
+ *
+ * 4D Vector
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector4Equal
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2]) && (V1.vector4_f32[3] == V2.vector4_f32[3])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vceqq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( vget_lane_u32(vTemp.val[1], 1) == 0xFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
+ return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
+#else
+ return XMComparisonAllTrue(XMVector4EqualR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline uint32_t XMVector4EqualR
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ uint32_t CR = 0;
+
+ if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] == V2.vector4_f32[1]) &&
+ (V1.vector4_f32[2] == V2.vector4_f32[2]) &&
+ (V1.vector4_f32[3] == V2.vector4_f32[3]))
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] != V2.vector4_f32[1]) &&
+ (V1.vector4_f32[2] != V2.vector4_f32[2]) &&
+ (V1.vector4_f32[3] != V2.vector4_f32[3]))
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vceqq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFFFU )
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
+ int iTest = _mm_movemask_ps(vTemp);
+ uint32_t CR = 0;
+ if (iTest==0xf) // All equal?
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (iTest==0) // All not equal?
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector4EqualInt
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2]) && (V1.vector4_u32[3] == V2.vector4_u32[3])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vceqq_u32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( vget_lane_u32(vTemp.val[1], 1) == 0xFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1),_mm_castps_si128(V2));
+ return ((_mm_movemask_ps(_mm_castsi128_ps(vTemp))==0xf) != 0);
+#else
+ return XMComparisonAllTrue(XMVector4EqualIntR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline uint32_t XMVector4EqualIntR
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ uint32_t CR = 0;
+ if (V1.vector4_u32[0] == V2.vector4_u32[0] &&
+ V1.vector4_u32[1] == V2.vector4_u32[1] &&
+ V1.vector4_u32[2] == V2.vector4_u32[2] &&
+ V1.vector4_u32[3] == V2.vector4_u32[3])
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (V1.vector4_u32[0] != V2.vector4_u32[0] &&
+ V1.vector4_u32[1] != V2.vector4_u32[1] &&
+ V1.vector4_u32[2] != V2.vector4_u32[2] &&
+ V1.vector4_u32[3] != V2.vector4_u32[3])
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vceqq_u32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFFFU )
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1),_mm_castps_si128(V2));
+ int iTest = _mm_movemask_ps(_mm_castsi128_ps(vTemp));
+ uint32_t CR = 0;
+ if (iTest==0xf) // All equal?
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (iTest==0) // All not equal?
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+inline bool XMVector4NearEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ FXMVECTOR Epsilon
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ float dx, dy, dz, dw;
+
+ dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
+ dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
+ dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]);
+ dw = fabsf(V1.vector4_f32[3]-V2.vector4_f32[3]);
+ return (((dx <= Epsilon.vector4_f32[0]) &&
+ (dy <= Epsilon.vector4_f32[1]) &&
+ (dz <= Epsilon.vector4_f32[2]) &&
+ (dw <= Epsilon.vector4_f32[3])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vDelta = vsubq_f32( V1, V2 );
+ __n128 vResult = vacleq_f32( vDelta, Epsilon );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( vget_lane_u32(vTemp.val[1], 1) == 0xFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Get the difference
+ XMVECTOR vDelta = _mm_sub_ps(V1,V2);
+ // Get the absolute value of the difference
+ XMVECTOR vTemp = _mm_setzero_ps();
+ vTemp = _mm_sub_ps(vTemp,vDelta);
+ vTemp = _mm_max_ps(vTemp,vDelta);
+ vTemp = _mm_cmple_ps(vTemp,Epsilon);
+ return ((_mm_movemask_ps(vTemp)==0xf) != 0);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector4NotEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2]) || (V1.vector4_f32[3] != V2.vector4_f32[3])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vceqq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( vget_lane_u32(vTemp.val[1], 1) != 0xFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpneq_ps(V1,V2);
+ return ((_mm_movemask_ps(vTemp)) != 0);
+#else
+ return XMComparisonAnyFalse(XMVector4EqualR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector4NotEqualInt
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2]) || (V1.vector4_u32[3] != V2.vector4_u32[3])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vceqq_u32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( vget_lane_u32(vTemp.val[1], 1) != 0xFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1),_mm_castps_si128(V2));
+ return ((_mm_movemask_ps(_mm_castsi128_ps(vTemp))!=0xF) != 0);
+#else
+ return XMComparisonAnyFalse(XMVector4EqualIntR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector4Greater
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2]) && (V1.vector4_f32[3] > V2.vector4_f32[3])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcgtq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( vget_lane_u32(vTemp.val[1], 1) == 0xFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
+ return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
+#else
+ return XMComparisonAllTrue(XMVector4GreaterR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline uint32_t XMVector4GreaterR
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ uint32_t CR = 0;
+ if (V1.vector4_f32[0] > V2.vector4_f32[0] &&
+ V1.vector4_f32[1] > V2.vector4_f32[1] &&
+ V1.vector4_f32[2] > V2.vector4_f32[2] &&
+ V1.vector4_f32[3] > V2.vector4_f32[3])
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (V1.vector4_f32[0] <= V2.vector4_f32[0] &&
+ V1.vector4_f32[1] <= V2.vector4_f32[1] &&
+ V1.vector4_f32[2] <= V2.vector4_f32[2] &&
+ V1.vector4_f32[3] <= V2.vector4_f32[3])
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcgtq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFFFU )
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#elif defined(_XM_SSE_INTRINSICS_)
+ uint32_t CR = 0;
+ XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
+ int iTest = _mm_movemask_ps(vTemp);
+ if (iTest==0xf) {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!iTest)
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector4GreaterOrEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2]) && (V1.vector4_f32[3] >= V2.vector4_f32[3])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcgeq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( vget_lane_u32(vTemp.val[1], 1) == 0xFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
+ return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
+#else
+ return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V1, V2));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline uint32_t XMVector4GreaterOrEqualR
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ uint32_t CR = 0;
+ if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] >= V2.vector4_f32[1]) &&
+ (V1.vector4_f32[2] >= V2.vector4_f32[2]) &&
+ (V1.vector4_f32[3] >= V2.vector4_f32[3]))
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
+ (V1.vector4_f32[1] < V2.vector4_f32[1]) &&
+ (V1.vector4_f32[2] < V2.vector4_f32[2]) &&
+ (V1.vector4_f32[3] < V2.vector4_f32[3]))
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcgeq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+
+ uint32_t CR = 0;
+ if ( r == 0xFFFFFFFFU )
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if ( !r )
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#elif defined(_XM_SSE_INTRINSICS_)
+ uint32_t CR = 0;
+ XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
+ int iTest = _mm_movemask_ps(vTemp);
+ if (iTest==0x0f)
+ {
+ CR = XM_CRMASK_CR6TRUE;
+ }
+ else if (!iTest)
+ {
+ CR = XM_CRMASK_CR6FALSE;
+ }
+ return CR;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector4Less
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2]) && (V1.vector4_f32[3] < V2.vector4_f32[3])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcltq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( vget_lane_u32(vTemp.val[1], 1) == 0xFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
+ return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
+#else
+ return XMComparisonAllTrue(XMVector4GreaterR(V2, V1));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector4LessOrEqual
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2]) && (V1.vector4_f32[3] <= V2.vector4_f32[3])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vcleq_f32( V1, V2 );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( vget_lane_u32(vTemp.val[1], 1) == 0xFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
+ return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
+#else
+ return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V2, V1));
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector4InBounds
+(
+ FXMVECTOR V,
+ FXMVECTOR Bounds
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
+ (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
+ (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) &&
+ (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3])) != 0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Test if less than or equal
+ __n128 vTemp1 = vcleq_f32(V,Bounds);
+ // Negate the bounds
+ __n128 vTemp2 = vnegq_f32(Bounds);
+ // Test if greater or equal (Reversed)
+ vTemp2 = vcleq_f32(vTemp2,V);
+ // Blend answers
+ vTemp1 = vandq_u32(vTemp1,vTemp2);
+ // in bounds?
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( vget_lane_u32(vTemp.val[1], 1) == 0xFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Test if less than or equal
+ XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
+ // Negate the bounds
+ XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
+ // Test if greater or equal (Reversed)
+ vTemp2 = _mm_cmple_ps(vTemp2,V);
+ // Blend answers
+ vTemp1 = _mm_and_ps(vTemp1,vTemp2);
+ // All in bounds?
+ return ((_mm_movemask_ps(vTemp1)==0x0f) != 0);
+#else
+ return XMComparisonAllInBounds(XMVector4InBoundsR(V, Bounds));
+#endif
+}
+
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector4IsNaN
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ return (XMISNAN(V.vector4_f32[0]) ||
+ XMISNAN(V.vector4_f32[1]) ||
+ XMISNAN(V.vector4_f32[2]) ||
+ XMISNAN(V.vector4_f32[3]));
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Test against itself. NaN is always not equal
+ __n128 vTempNan = vceqq_f32( V, V );
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTempNan), vget_high_u8(vTempNan));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ // If any are NaN, the mask is zero
+ return ( vget_lane_u32(vTemp.val[1], 1) != 0xFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Test against itself. NaN is always not equal
+ XMVECTOR vTempNan = _mm_cmpneq_ps(V,V);
+ // If any are NaN, the mask is non-zero
+ return (_mm_movemask_ps(vTempNan)!=0);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMVector4IsInfinite
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ return (XMISINF(V.vector4_f32[0]) ||
+ XMISINF(V.vector4_f32[1]) ||
+ XMISINF(V.vector4_f32[2]) ||
+ XMISINF(V.vector4_f32[3]));
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Mask off the sign bit
+ __n128 vTempInf = vandq_u32( V, g_XMAbsMask );
+ // Compare to infinity
+ vTempInf = vceqq_f32(vTempInf, g_XMInfinity );
+ // If any are infinity, the signs are true.
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTempInf), vget_high_u8(vTempInf));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ return ( vget_lane_u32(vTemp.val[1], 1) != 0 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Mask off the sign bit
+ XMVECTOR vTemp = _mm_and_ps(V,g_XMAbsMask);
+ // Compare to infinity
+ vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
+ // If any are infinity, the signs are true.
+ return (_mm_movemask_ps(vTemp) != 0);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4Dot
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_f32[0] =
+ Result.vector4_f32[1] =
+ Result.vector4_f32[2] =
+ Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2] + V1.vector4_f32[3] * V2.vector4_f32[3];
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vTemp = vmulq_f32( V1, V2 );
+ __n64 v1 = vget_low_f32( vTemp );
+ __n64 v2 = vget_high_f32( vTemp );
+ v1 = vpadd_f32( v1, v1 );
+ v2 = vpadd_f32( v2, v2 );
+ v1 = vadd_f32( v1, v2 );
+ return vcombine_f32( v1, v1 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp2 = V2;
+ XMVECTOR vTemp = _mm_mul_ps(V1,vTemp2);
+ vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
+ vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
+ vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
+ vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
+ return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4Cross
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2,
+ FXMVECTOR V3
+)
+{
+ // [ ((v2.z*v3.w-v2.w*v3.z)*v1.y)-((v2.y*v3.w-v2.w*v3.y)*v1.z)+((v2.y*v3.z-v2.z*v3.y)*v1.w),
+ // ((v2.w*v3.z-v2.z*v3.w)*v1.x)-((v2.w*v3.x-v2.x*v3.w)*v1.z)+((v2.z*v3.x-v2.x*v3.z)*v1.w),
+ // ((v2.y*v3.w-v2.w*v3.y)*v1.x)-((v2.x*v3.w-v2.w*v3.x)*v1.y)+((v2.x*v3.y-v2.y*v3.x)*v1.w),
+ // ((v2.z*v3.y-v2.y*v3.z)*v1.x)-((v2.z*v3.x-v2.x*v3.z)*v1.y)+((v2.y*v3.x-v2.x*v3.y)*v1.z) ]
+
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTOR Result;
+
+ Result.vector4_f32[0] = (((V2.vector4_f32[2]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[2]))*V1.vector4_f32[1])-(((V2.vector4_f32[1]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[1]))*V1.vector4_f32[2])+(((V2.vector4_f32[1]*V3.vector4_f32[2])-(V2.vector4_f32[2]*V3.vector4_f32[1]))*V1.vector4_f32[3]);
+ Result.vector4_f32[1] = (((V2.vector4_f32[3]*V3.vector4_f32[2])-(V2.vector4_f32[2]*V3.vector4_f32[3]))*V1.vector4_f32[0])-(((V2.vector4_f32[3]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[3]))*V1.vector4_f32[2])+(((V2.vector4_f32[2]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[2]))*V1.vector4_f32[3]);
+ Result.vector4_f32[2] = (((V2.vector4_f32[1]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[1]))*V1.vector4_f32[0])-(((V2.vector4_f32[0]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[0]))*V1.vector4_f32[1])+(((V2.vector4_f32[0]*V3.vector4_f32[1])-(V2.vector4_f32[1]*V3.vector4_f32[0]))*V1.vector4_f32[3]);
+ Result.vector4_f32[3] = (((V2.vector4_f32[2]*V3.vector4_f32[1])-(V2.vector4_f32[1]*V3.vector4_f32[2]))*V1.vector4_f32[0])-(((V2.vector4_f32[2]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[2]))*V1.vector4_f32[1])+(((V2.vector4_f32[1]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[1]))*V1.vector4_f32[2]);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ const __n64 select = vget_low_f32( g_XMMaskX );
+
+ // Term1: V2zwyz * V3wzwy
+ const __n64 v2xy = vget_low_f32(V2);
+ const __n64 v2zw = vget_high_f32(V2);
+ const __n64 v2yx = vrev64_f32(v2xy);
+ const __n64 v2wz = vrev64_f32(v2zw);
+ const __n64 v2yz = vbsl_f32( select, v2yx, v2wz );
+
+ const __n64 v3zw = vget_high_f32(V3);
+ const __n64 v3wz = vrev64_f32(v3zw);
+ const __n64 v3xy = vget_low_f32(V3);
+ const __n64 v3wy = vbsl_f32( select, v3wz, v3xy );
+
+ __n128 vTemp1 = vcombine_f32(v2zw,v2yz);
+ __n128 vTemp2 = vcombine_f32(v3wz,v3wy);
+ __n128 vResult = vmulq_f32( vTemp1, vTemp2 );
+
+ // - V2wzwy * V3zwyz
+ const __n64 v2wy = vbsl_f32( select, v2wz, v2xy );
+
+ const __n64 v3yx = vrev64_f32(v3xy);
+ const __n64 v3yz = vbsl_f32( select, v3yx, v3wz );
+
+ vTemp1 = vcombine_f32(v2wz,v2wy);
+ vTemp2 = vcombine_f32(v3zw,v3yz);
+ vResult = vmlsq_f32( vResult, vTemp1, vTemp2 );
+
+ // term1 * V1yxxx
+ const __n64 v1xy = vget_low_f32(V1);
+ const __n64 v1yx = vrev64_f32(v1xy);
+
+ vTemp1 = vcombine_f32( v1yx, vdup_lane_f32( v1yx, 1 ) );
+ vResult = vmulq_f32( vResult, vTemp1 );
+
+ // Term2: V2ywxz * V3wxwx
+ const __n64 v2yw = vrev64_f32(v2wy);
+ const __n64 v2xz = vbsl_f32( select, v2xy, v2wz );
+
+ const __n64 v3wx = vbsl_f32( select, v3wz, v3yx );
+
+ vTemp1 = vcombine_f32(v2yw,v2xz);
+ vTemp2 = vcombine_f32(v3wx,v3wx);
+ __n128 vTerm = vmulq_f32( vTemp1, vTemp2 );
+
+ // - V2wxwx * V3ywxz
+ const __n64 v2wx = vbsl_f32( select, v2wz, v2yx );
+
+ const __n64 v3yw = vrev64_f32(v3wy);
+ const __n64 v3xz = vbsl_f32( select, v3xy, v3wz );
+
+ vTemp1 = vcombine_f32(v2wx,v2wx);
+ vTemp2 = vcombine_f32(v3yw,v3xz);
+ vTerm = vmlsq_f32( vTerm, vTemp1, vTemp2 );
+
+ // vResult - term2 * V1zzyy
+ const __n64 v1zw = vget_high_f32(V1);
+
+ vTemp1 = vcombine_f32( vdup_lane_f32(v1zw, 0), vdup_lane_f32(v1yx, 0) );
+ vResult = vmlsq_f32( vResult, vTerm, vTemp1 );
+
+ // Term3: V2yzxy * V3zxyx
+ const __n64 v3zx = vrev64_f32(v3xz);
+
+ vTemp1 = vcombine_f32(v2yz,v2xy);
+ vTemp2 = vcombine_f32(v3zx,v3yx);
+ vTerm = vmulq_f32( vTemp1, vTemp2 );
+
+ // - V2zxyx * V3yzxy
+ const __n64 v2zx = vrev64_f32(v2xz);
+
+ vTemp1 = vcombine_f32(v2zx,v2yx);
+ vTemp2 = vcombine_f32(v3yz,v3xy);
+ vTerm = vmlsq_f32( vTerm, vTemp1, vTemp2 );
+
+ // vResult + term3 * V1wwwz
+ const __n64 v1wz = vrev64_f32(v1zw);
+
+ vTemp1 = vcombine_f32( vdup_lane_f32( v1wz, 0 ), v1wz );
+ return vmlaq_f32( vResult, vTerm, vTemp1 );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // V2zwyz * V3wzwy
+ XMVECTOR vResult = XM_PERMUTE_PS(V2,_MM_SHUFFLE(2,1,3,2));
+ XMVECTOR vTemp3 = XM_PERMUTE_PS(V3,_MM_SHUFFLE(1,3,2,3));
+ vResult = _mm_mul_ps(vResult,vTemp3);
+ // - V2wzwy * V3zwyz
+ XMVECTOR vTemp2 = XM_PERMUTE_PS(V2,_MM_SHUFFLE(1,3,2,3));
+ vTemp3 = XM_PERMUTE_PS(vTemp3,_MM_SHUFFLE(1,3,0,1));
+ vTemp2 = _mm_mul_ps(vTemp2,vTemp3);
+ vResult = _mm_sub_ps(vResult,vTemp2);
+ // term1 * V1yxxx
+ XMVECTOR vTemp1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(0,0,0,1));
+ vResult = _mm_mul_ps(vResult,vTemp1);
+
+ // V2ywxz * V3wxwx
+ vTemp2 = XM_PERMUTE_PS(V2,_MM_SHUFFLE(2,0,3,1));
+ vTemp3 = XM_PERMUTE_PS(V3,_MM_SHUFFLE(0,3,0,3));
+ vTemp3 = _mm_mul_ps(vTemp3,vTemp2);
+ // - V2wxwx * V3ywxz
+ vTemp2 = XM_PERMUTE_PS(vTemp2,_MM_SHUFFLE(2,1,2,1));
+ vTemp1 = XM_PERMUTE_PS(V3,_MM_SHUFFLE(2,0,3,1));
+ vTemp2 = _mm_mul_ps(vTemp2,vTemp1);
+ vTemp3 = _mm_sub_ps(vTemp3,vTemp2);
+ // vResult - temp * V1zzyy
+ vTemp1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(1,1,2,2));
+ vTemp1 = _mm_mul_ps(vTemp1,vTemp3);
+ vResult = _mm_sub_ps(vResult,vTemp1);
+
+ // V2yzxy * V3zxyx
+ vTemp2 = XM_PERMUTE_PS(V2,_MM_SHUFFLE(1,0,2,1));
+ vTemp3 = XM_PERMUTE_PS(V3,_MM_SHUFFLE(0,1,0,2));
+ vTemp3 = _mm_mul_ps(vTemp3,vTemp2);
+ // - V2zxyx * V3yzxy
+ vTemp2 = XM_PERMUTE_PS(vTemp2,_MM_SHUFFLE(2,0,2,1));
+ vTemp1 = XM_PERMUTE_PS(V3,_MM_SHUFFLE(1,0,2,1));
+ vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
+ vTemp3 = _mm_sub_ps(vTemp3,vTemp1);
+ // vResult + term * V1wwwz
+ vTemp1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(2,3,3,3));
+ vTemp3 = _mm_mul_ps(vTemp3,vTemp1);
+ vResult = _mm_add_ps(vResult,vTemp3);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4LengthSq
+(
+ FXMVECTOR V
+)
+{
+ return XMVector4Dot(V, V);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4ReciprocalLengthEst
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+
+ Result = XMVector4LengthSq(V);
+ Result = XMVectorReciprocalSqrtEst(Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Dot4
+ __n128 vTemp = vmulq_f32( V, V );
+ __n64 v1 = vget_low_f32( vTemp );
+ __n64 v2 = vget_high_f32( vTemp );
+ v1 = vpadd_f32( v1, v1 );
+ v2 = vpadd_f32( v2, v2 );
+ v1 = vadd_f32( v1, v2 );
+ // Reciprocal sqrt (estimate)
+ v2 = vrsqrte_f32( v1 );
+ return vcombine_f32(v2, v2);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x,y,z and w
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ // vTemp has z and w
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(3,2,3,2));
+ // x+z, y+w
+ vLengthSq = _mm_add_ps(vLengthSq,vTemp);
+ // x+z,x+z,x+z,y+w
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,0,0,0));
+ // ??,??,y+w,y+w
+ vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
+ // ??,??,x+z+y+w,??
+ vLengthSq = _mm_add_ps(vLengthSq,vTemp);
+ // Splat the length
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,2,2,2));
+ // Get the reciprocal
+ vLengthSq = _mm_rsqrt_ps(vLengthSq);
+ return vLengthSq;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4ReciprocalLength
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+
+ Result = XMVector4LengthSq(V);
+ Result = XMVectorReciprocalSqrt(Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Dot4
+ __n128 vTemp = vmulq_f32( V, V );
+ __n64 v1 = vget_low_f32( vTemp );
+ __n64 v2 = vget_high_f32( vTemp );
+ v1 = vpadd_f32( v1, v1 );
+ v2 = vpadd_f32( v2, v2 );
+ v1 = vadd_f32( v1, v2 );
+ // Reciprocal sqrt
+ __n64 S0 = vrsqrte_f32(v1);
+ __n64 P0 = vmul_f32( v1, S0 );
+ __n64 R0 = vrsqrts_f32( P0, S0 );
+ __n64 S1 = vmul_f32( S0, R0 );
+ __n64 P1 = vmul_f32( v1, S1 );
+ __n64 R1 = vrsqrts_f32( P1, S1 );
+ __n64 Result = vmul_f32( S1, R1 );
+ return vcombine_f32( Result, Result );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x,y,z and w
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ // vTemp has z and w
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(3,2,3,2));
+ // x+z, y+w
+ vLengthSq = _mm_add_ps(vLengthSq,vTemp);
+ // x+z,x+z,x+z,y+w
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,0,0,0));
+ // ??,??,y+w,y+w
+ vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
+ // ??,??,x+z+y+w,??
+ vLengthSq = _mm_add_ps(vLengthSq,vTemp);
+ // Splat the length
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,2,2,2));
+ // Get the reciprocal
+ vLengthSq = _mm_sqrt_ps(vLengthSq);
+ // Accurate!
+ vLengthSq = _mm_div_ps(g_XMOne,vLengthSq);
+ return vLengthSq;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4LengthEst
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+
+ Result = XMVector4LengthSq(V);
+ Result = XMVectorSqrtEst(Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Dot4
+ __n128 vTemp = vmulq_f32( V, V );
+ __n64 v1 = vget_low_f32( vTemp );
+ __n64 v2 = vget_high_f32( vTemp );
+ v1 = vpadd_f32( v1, v1 );
+ v2 = vpadd_f32( v2, v2 );
+ v1 = vadd_f32( v1, v2 );
+ const __n64 zero = vdup_n_u32(0);
+ __n64 VEqualsZero = vceq_f32( v1, zero );
+ // Sqrt (estimate)
+ __n64 Result = vrsqrte_f32( v1 );
+ Result = vmul_f32( v1, Result );
+ Result = vbsl_f32( VEqualsZero, zero, Result );
+ return vcombine_f32( Result, Result );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x,y,z and w
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ // vTemp has z and w
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(3,2,3,2));
+ // x+z, y+w
+ vLengthSq = _mm_add_ps(vLengthSq,vTemp);
+ // x+z,x+z,x+z,y+w
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,0,0,0));
+ // ??,??,y+w,y+w
+ vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
+ // ??,??,x+z+y+w,??
+ vLengthSq = _mm_add_ps(vLengthSq,vTemp);
+ // Splat the length
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,2,2,2));
+ // Prepare for the division
+ vLengthSq = _mm_sqrt_ps(vLengthSq);
+ return vLengthSq;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4Length
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+
+ Result = XMVector4LengthSq(V);
+ Result = XMVectorSqrt(Result);
+
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Dot4
+ __n128 vTemp = vmulq_f32( V, V );
+ __n64 v1 = vget_low_f32( vTemp );
+ __n64 v2 = vget_high_f32( vTemp );
+ v1 = vpadd_f32( v1, v1 );
+ v2 = vpadd_f32( v2, v2 );
+ v1 = vadd_f32( v1, v2 );
+ const __n64 zero = vdup_n_u32(0);
+ __n64 VEqualsZero = vceq_f32( v1, zero );
+ // Sqrt
+ __n64 S0 = vrsqrte_f32( v1 );
+ __n64 P0 = vmul_f32( v1, S0 );
+ __n64 R0 = vrsqrts_f32( P0, S0 );
+ __n64 S1 = vmul_f32( S0, R0 );
+ __n64 P1 = vmul_f32( v1, S1 );
+ __n64 R1 = vrsqrts_f32( P1, S1 );
+ __n64 Result = vmul_f32( S1, R1 );
+ Result = vmul_f32( v1, Result );
+ Result = vbsl_f32( VEqualsZero, zero, Result );
+ return vcombine_f32( Result, Result );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x,y,z and w
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ // vTemp has z and w
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(3,2,3,2));
+ // x+z, y+w
+ vLengthSq = _mm_add_ps(vLengthSq,vTemp);
+ // x+z,x+z,x+z,y+w
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,0,0,0));
+ // ??,??,y+w,y+w
+ vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
+ // ??,??,x+z+y+w,??
+ vLengthSq = _mm_add_ps(vLengthSq,vTemp);
+ // Splat the length
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,2,2,2));
+ // Prepare for the division
+ vLengthSq = _mm_sqrt_ps(vLengthSq);
+ return vLengthSq;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// XMVector4NormalizeEst uses a reciprocal estimate and
+// returns QNaN on zero and infinite vectors.
+
+inline XMVECTOR XMVector4NormalizeEst
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result = XMVector4ReciprocalLength(V);
+ Result = XMVectorMultiply(V, Result);
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Dot4
+ __n128 vTemp = vmulq_f32( V, V );
+ __n64 v1 = vget_low_f32( vTemp );
+ __n64 v2 = vget_high_f32( vTemp );
+ v1 = vpadd_f32( v1, v1 );
+ v2 = vpadd_f32( v2, v2 );
+ v1 = vadd_f32( v1, v2 );
+ // Reciprocal sqrt (estimate)
+ v2 = vrsqrte_f32( v1 );
+ // Normalize
+ return vmulq_f32( V, vcombine_f32(v2,v2) );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x,y,z and w
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ // vTemp has z and w
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(3,2,3,2));
+ // x+z, y+w
+ vLengthSq = _mm_add_ps(vLengthSq,vTemp);
+ // x+z,x+z,x+z,y+w
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,0,0,0));
+ // ??,??,y+w,y+w
+ vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
+ // ??,??,x+z+y+w,??
+ vLengthSq = _mm_add_ps(vLengthSq,vTemp);
+ // Splat the length
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,2,2,2));
+ // Get the reciprocal
+ XMVECTOR vResult = _mm_rsqrt_ps(vLengthSq);
+ // Reciprocal mul to perform the normalization
+ vResult = _mm_mul_ps(vResult,V);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4Normalize
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ float fLength;
+ XMVECTOR vResult;
+
+ vResult = XMVector4Length( V );
+ fLength = vResult.vector4_f32[0];
+
+ // Prevent divide by zero
+ if (fLength > 0) {
+ fLength = 1.0f/fLength;
+ }
+
+ vResult.vector4_f32[0] = V.vector4_f32[0]*fLength;
+ vResult.vector4_f32[1] = V.vector4_f32[1]*fLength;
+ vResult.vector4_f32[2] = V.vector4_f32[2]*fLength;
+ vResult.vector4_f32[3] = V.vector4_f32[3]*fLength;
+ return vResult;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Dot4
+ __n128 vTemp = vmulq_f32( V, V );
+ __n64 v1 = vget_low_f32( vTemp );
+ __n64 v2 = vget_high_f32( vTemp );
+ v1 = vpadd_f32( v1, v1 );
+ v2 = vpadd_f32( v2, v2 );
+ v1 = vadd_f32( v1, v2 );
+ __n64 VEqualsZero = vceq_f32( v1, vdup_n_u32(0) );
+ __n64 VEqualsInf = vceq_f32( v1, vget_low_f32(g_XMInfinity) );
+ // Reciprocal sqrt (2 iterations of Newton-Raphson)
+ __n64 S0 = vrsqrte_f32( v1 );
+ __n64 P0 = vmul_f32( v1, S0 );
+ __n64 R0 = vrsqrts_f32( P0, S0 );
+ __n64 S1 = vmul_f32( S0, R0 );
+ __n64 P1 = vmul_f32( v1, S1 );
+ __n64 R1 = vrsqrts_f32( P1, S1 );
+ v2 = vmul_f32( S1, R1 );
+ // Normalize
+ __n128 vResult = vmulq_f32( V, vcombine_f32(v2,v2) );
+ vResult = vbslq_f32( vcombine_f32(VEqualsZero,VEqualsZero), vdupq_n_f32(0), vResult );
+ return vbslq_f32( vcombine_f32(VEqualsInf,VEqualsInf), g_XMQNaN, vResult );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Perform the dot product on x,y,z and w
+ XMVECTOR vLengthSq = _mm_mul_ps(V,V);
+ // vTemp has z and w
+ XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(3,2,3,2));
+ // x+z, y+w
+ vLengthSq = _mm_add_ps(vLengthSq,vTemp);
+ // x+z,x+z,x+z,y+w
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(1,0,0,0));
+ // ??,??,y+w,y+w
+ vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
+ // ??,??,x+z+y+w,??
+ vLengthSq = _mm_add_ps(vLengthSq,vTemp);
+ // Splat the length
+ vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,2,2,2));
+ // Prepare for the division
+ XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
+ // Create zero with a single instruction
+ XMVECTOR vZeroMask = _mm_setzero_ps();
+ // Test for a divide by zero (Must be FP to detect -0.0)
+ vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
+ // Failsafe on zero (Or epsilon) length planes
+ // If the length is infinity, set the elements to zero
+ vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
+ // Divide to perform the normalization
+ vResult = _mm_div_ps(V,vResult);
+ // Any that are infinity, set to zero
+ vResult = _mm_and_ps(vResult,vZeroMask);
+ // Select qnan or result based on infinite length
+ XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
+ XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
+ vResult = _mm_or_ps(vTemp1,vTemp2);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4ClampLength
+(
+ FXMVECTOR V,
+ float LengthMin,
+ float LengthMax
+)
+{
+ XMVECTOR ClampMax = XMVectorReplicate(LengthMax);
+ XMVECTOR ClampMin = XMVectorReplicate(LengthMin);
+
+ return XMVector4ClampLengthV(V, ClampMin, ClampMax);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4ClampLengthV
+(
+ FXMVECTOR V,
+ FXMVECTOR LengthMin,
+ FXMVECTOR LengthMax
+)
+{
+ assert((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetW(LengthMin) == XMVectorGetX(LengthMin)));
+ assert((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetW(LengthMax) == XMVectorGetX(LengthMax)));
+ assert(XMVector4GreaterOrEqual(LengthMin, XMVectorZero()));
+ assert(XMVector4GreaterOrEqual(LengthMax, XMVectorZero()));
+ assert(XMVector4GreaterOrEqual(LengthMax, LengthMin));
+
+ XMVECTOR LengthSq = XMVector4LengthSq(V);
+
+ const XMVECTOR Zero = XMVectorZero();
+
+ XMVECTOR RcpLength = XMVectorReciprocalSqrt(LengthSq);
+
+ XMVECTOR InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
+ XMVECTOR ZeroLength = XMVectorEqual(LengthSq, Zero);
+
+ XMVECTOR Normal = XMVectorMultiply(V, RcpLength);
+
+ XMVECTOR Length = XMVectorMultiply(LengthSq, RcpLength);
+
+ XMVECTOR Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
+ Length = XMVectorSelect(LengthSq, Length, Select);
+ Normal = XMVectorSelect(LengthSq, Normal, Select);
+
+ XMVECTOR ControlMax = XMVectorGreater(Length, LengthMax);
+ XMVECTOR ControlMin = XMVectorLess(Length, LengthMin);
+
+ XMVECTOR ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
+ ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
+
+ XMVECTOR Result = XMVectorMultiply(Normal, ClampLength);
+
+ // Preserve the original vector (with no precision loss) if the length falls within the given range
+ XMVECTOR Control = XMVectorEqualInt(ControlMax, ControlMin);
+ Result = XMVectorSelect(Result, V, Control);
+
+ return Result;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4Reflect
+(
+ FXMVECTOR Incident,
+ FXMVECTOR Normal
+)
+{
+ // Result = Incident - (2 * dot(Incident, Normal)) * Normal
+
+ XMVECTOR Result = XMVector4Dot(Incident, Normal);
+ Result = XMVectorAdd(Result, Result);
+ Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
+
+ return Result;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4Refract
+(
+ FXMVECTOR Incident,
+ FXMVECTOR Normal,
+ float RefractionIndex
+)
+{
+ XMVECTOR Index = XMVectorReplicate(RefractionIndex);
+ return XMVector4RefractV(Incident, Normal, Index);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4RefractV
+(
+ FXMVECTOR Incident,
+ FXMVECTOR Normal,
+ FXMVECTOR RefractionIndex
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR IDotN;
+ XMVECTOR R;
+ const XMVECTOR Zero = XMVectorZero();
+
+ // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
+ // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
+
+ IDotN = XMVector4Dot(Incident, Normal);
+
+ // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
+ R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v);
+ R = XMVectorMultiply(R, RefractionIndex);
+ R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v);
+
+ if (XMVector4LessOrEqual(R, Zero))
+ {
+ // Total internal reflection
+ return Zero;
+ }
+ else
+ {
+ XMVECTOR Result;
+
+ // R = RefractionIndex * IDotN + sqrt(R)
+ R = XMVectorSqrt(R);
+ R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R);
+
+ // Result = RefractionIndex * Incident - Normal * R
+ Result = XMVectorMultiply(RefractionIndex, Incident);
+ Result = XMVectorNegativeMultiplySubtract(Normal, R, Result);
+
+ return Result;
+ }
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR IDotN = XMVector4Dot(Incident,Normal);
+
+ // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
+ __n128 R = vmlsq_f32( g_XMOne, IDotN, IDotN);
+ R = vmulq_f32(R, RefractionIndex);
+ R = vmlsq_f32(g_XMOne, R, RefractionIndex );
+
+ __n128 vResult = vcleq_f32(R,g_XMZero);
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vResult), vget_high_u8(vResult));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ if ( vget_lane_u32(vTemp.val[1], 1) == 0xFFFFFFFFU )
+ {
+ // Total internal reflection
+ vResult = g_XMZero;
+ }
+ else
+ {
+ // Sqrt(R)
+ __n128 S0 = vrsqrteq_f32(R);
+ __n128 P0 = vmulq_f32( R, S0 );
+ __n128 R0 = vrsqrtsq_f32( P0, S0 );
+ __n128 S1 = vmulq_f32( S0, R0 );
+ __n128 P1 = vmulq_f32( R, S1 );
+ __n128 R1 = vrsqrtsq_f32( P1, S1 );
+ __n128 S2 = vmulq_f32( S1, R1 );
+ R = vmulq_f32( R, S2 );
+ // R = RefractionIndex * IDotN + sqrt(R)
+ R = vmlaq_f32( R, RefractionIndex, IDotN );
+ // Result = RefractionIndex * Incident - Normal * R
+ vResult = vmulq_f32(RefractionIndex, Incident);
+ vResult = vmlsq_f32( vResult, R, Normal );
+ }
+ return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR IDotN = XMVector4Dot(Incident,Normal);
+
+ // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
+ XMVECTOR R = _mm_mul_ps(IDotN,IDotN);
+ R = _mm_sub_ps(g_XMOne,R);
+ R = _mm_mul_ps(R, RefractionIndex);
+ R = _mm_mul_ps(R, RefractionIndex);
+ R = _mm_sub_ps(g_XMOne,R);
+
+ XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero);
+ if (_mm_movemask_ps(vResult)==0x0f)
+ {
+ // Total internal reflection
+ vResult = g_XMZero;
+ }
+ else
+ {
+ // R = RefractionIndex * IDotN + sqrt(R)
+ R = _mm_sqrt_ps(R);
+ vResult = _mm_mul_ps(RefractionIndex, IDotN);
+ R = _mm_add_ps(R,vResult);
+ // Result = RefractionIndex * Incident - Normal * R
+ vResult = _mm_mul_ps(RefractionIndex, Incident);
+ R = _mm_mul_ps(R,Normal);
+ vResult = _mm_sub_ps(vResult,R);
+ }
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4Orthogonal
+(
+ FXMVECTOR V
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMVECTOR Result;
+ Result.vector4_f32[0] = V.vector4_f32[2];
+ Result.vector4_f32[1] = V.vector4_f32[3];
+ Result.vector4_f32[2] = -V.vector4_f32[0];
+ Result.vector4_f32[3] = -V.vector4_f32[1];
+ return Result;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const XMVECTORF32 Negate = { 1.f, 1.f, -1.f, -1.f };
+
+ __n128 Result = vcombine_f32( vget_high_f32( V ), vget_low_f32( V ) );
+ return vmulq_f32( Result, Negate );
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 FlipZW = {1.0f,1.0f,-1.0f,-1.0f};
+ XMVECTOR vResult = XM_PERMUTE_PS(V,_MM_SHUFFLE(1,0,3,2));
+ vResult = _mm_mul_ps(vResult,FlipZW);
+ return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4AngleBetweenNormalsEst
+(
+ FXMVECTOR N1,
+ FXMVECTOR N2
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR Result = XMVector4Dot(N1, N2);
+ Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v);
+ Result = XMVectorACosEst(Result);
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4AngleBetweenNormals
+(
+ FXMVECTOR N1,
+ FXMVECTOR N2
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR Result = XMVector4Dot(N1, N2);
+ Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v);
+ Result = XMVectorACos(Result);
+ return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4AngleBetweenVectors
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR L1 = XMVector4ReciprocalLength(V1);
+ XMVECTOR L2 = XMVector4ReciprocalLength(V2);
+
+ XMVECTOR Dot = XMVector4Dot(V1, V2);
+
+ L1 = XMVectorMultiply(L1, L2);
+
+ XMVECTOR CosAngle = XMVectorMultiply(Dot, L1);
+ CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne.v, g_XMOne.v);
+
+ return XMVectorACos(CosAngle);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMVector4Transform
+(
+ FXMVECTOR V,
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ float fX = (M.m[0][0]*V.vector4_f32[0])+(M.m[1][0]*V.vector4_f32[1])+(M.m[2][0]*V.vector4_f32[2])+(M.m[3][0]*V.vector4_f32[3]);
+ float fY = (M.m[0][1]*V.vector4_f32[0])+(M.m[1][1]*V.vector4_f32[1])+(M.m[2][1]*V.vector4_f32[2])+(M.m[3][1]*V.vector4_f32[3]);
+ float fZ = (M.m[0][2]*V.vector4_f32[0])+(M.m[1][2]*V.vector4_f32[1])+(M.m[2][2]*V.vector4_f32[2])+(M.m[3][2]*V.vector4_f32[3]);
+ float fW = (M.m[0][3]*V.vector4_f32[0])+(M.m[1][3]*V.vector4_f32[1])+(M.m[2][3]*V.vector4_f32[2])+(M.m[3][3]*V.vector4_f32[3]);
+ XMVECTOR vResult = {
+ fX,
+ fY,
+ fZ,
+ fW
+ };
+ return vResult;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32( V );
+ XMVECTOR vTemp1 = vdupq_lane_f32( VL, 0 ); // X
+ XMVECTOR vTemp2 = vdupq_lane_f32( VL, 1 ); // Y
+ XMVECTOR vResult = vmulq_f32( vTemp1, M.r[0] );
+ vResult = vmlaq_f32( vResult, vTemp2, M.r[1] );
+ __n64 VH = vget_high_f32( V );
+ vTemp1 = vdupq_lane_f32( VH, 0 ); // Z
+ vTemp2 = vdupq_lane_f32( VH, 1 ); // W
+ vResult = vmlaq_f32( vResult, vTemp1, M.r[2] );
+ return vmlaq_f32( vResult, vTemp2, M.r[3] );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Splat x,y,z and w
+ XMVECTOR vTempX = XM_PERMUTE_PS(V,_MM_SHUFFLE(0,0,0,0));
+ XMVECTOR vTempY = XM_PERMUTE_PS(V,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR vTempZ = XM_PERMUTE_PS(V,_MM_SHUFFLE(2,2,2,2));
+ XMVECTOR vTempW = XM_PERMUTE_PS(V,_MM_SHUFFLE(3,3,3,3));
+ // Mul by the matrix
+ vTempX = _mm_mul_ps(vTempX,M.r[0]);
+ vTempY = _mm_mul_ps(vTempY,M.r[1]);
+ vTempZ = _mm_mul_ps(vTempZ,M.r[2]);
+ vTempW = _mm_mul_ps(vTempW,M.r[3]);
+ // Add them all together
+ vTempX = _mm_add_ps(vTempX,vTempY);
+ vTempZ = _mm_add_ps(vTempZ,vTempW);
+ vTempX = _mm_add_ps(vTempX,vTempZ);
+ return vTempX;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMFLOAT4* XMVector4TransformStream
+(
+ XMFLOAT4* pOutputStream,
+ size_t OutputStride,
+ const XMFLOAT4* pInputStream,
+ size_t InputStride,
+ size_t VectorCount,
+ CXMMATRIX M
+)
+{
+ assert(pOutputStream != NULL);
+ assert(pInputStream != NULL);
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ const uint8_t* pInputVector = (const uint8_t*)pInputStream;
+ uint8_t* pOutputVector = (uint8_t*)pOutputStream;
+
+ const XMVECTOR row0 = M.r[0];
+ const XMVECTOR row1 = M.r[1];
+ const XMVECTOR row2 = M.r[2];
+ const XMVECTOR row3 = M.r[3];
+
+ for (size_t i = 0; i < VectorCount; i++)
+ {
+ XMVECTOR V = XMLoadFloat4((const XMFLOAT4*)pInputVector);
+ XMVECTOR W = XMVectorSplatW(V);
+ XMVECTOR Z = XMVectorSplatZ(V);
+ XMVECTOR Y = XMVectorSplatY(V);
+ XMVECTOR X = XMVectorSplatX(V);
+
+ XMVECTOR Result = XMVectorMultiply(W, row3);
+ Result = XMVectorMultiplyAdd(Z, row2, Result);
+ Result = XMVectorMultiplyAdd(Y, row1, Result);
+ Result = XMVectorMultiplyAdd(X, row0, Result);
+
+ XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
+
+ pInputVector += InputStride;
+ pOutputVector += OutputStride;
+ }
+
+ return pOutputStream;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+/****************************************************************************
+ *
+ * XMVECTOR operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR operator+ (FXMVECTOR V)
+{
+ return V;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR operator- (FXMVECTOR V)
+{
+ return XMVectorNegate(V);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR& operator+=
+(
+ XMVECTOR& V1,
+ FXMVECTOR V2
+)
+{
+ V1 = XMVectorAdd(V1, V2);
+ return V1;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR& operator-=
+(
+ XMVECTOR& V1,
+ FXMVECTOR V2
+)
+{
+ V1 = XMVectorSubtract(V1, V2);
+ return V1;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR& operator*=
+(
+ XMVECTOR& V1,
+ FXMVECTOR V2
+)
+{
+ V1 = XMVectorMultiply(V1, V2);
+ return V1;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR& operator/=
+(
+ XMVECTOR& V1,
+ FXMVECTOR V2
+)
+{
+ V1 = XMVectorDivide(V1,V2);
+ return V1;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR& operator*=
+(
+ XMVECTOR& V,
+ const float S
+)
+{
+ V = XMVectorScale(V, S);
+ return V;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR& operator/=
+(
+ XMVECTOR& V,
+ const float S
+)
+{
+ assert( S != 0.0f );
+ V = XMVectorScale(V, 1.0f / S);
+ return V;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR operator+
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+ return XMVectorAdd(V1, V2);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR operator-
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+ return XMVectorSubtract(V1, V2);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR operator*
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+ return XMVectorMultiply(V1, V2);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR operator/
+(
+ FXMVECTOR V1,
+ FXMVECTOR V2
+)
+{
+ return XMVectorDivide(V1,V2);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR operator*
+(
+ FXMVECTOR V,
+ const float S
+)
+{
+ return XMVectorScale(V, S);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR operator/
+(
+ FXMVECTOR V,
+ const float S
+)
+{
+ assert( S != 0.0f );
+ return XMVectorScale(V, 1.0f / S);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR operator*
+(
+ float S,
+ FXMVECTOR V
+)
+{
+ return XMVectorScale(V, S);
+}
+
+#if defined(_XM_NO_INTRINSICS_)
+#undef XMISNAN
+#undef XMISINF
+#endif
+
+
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXPackedVector.h b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXPackedVector.h
new file mode 100644
index 00000000..66df02fd
--- /dev/null
+++ b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXPackedVector.h
@@ -0,0 +1,995 @@
+//-------------------------------------------------------------------------------------
+// DirectXPackedVector.h -- SIMD C++ Math library
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//-------------------------------------------------------------------------------------
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+#include "DirectXMath.h"
+
+namespace DirectX
+{
+
+namespace PackedVector
+{
+
+#ifdef _XM_BIGENDIAN_
+#pragma bitfield_order(push)
+#pragma bitfield_order(lsb_to_msb)
+#endif
+
+#pragma warning(push)
+#pragma warning(disable:4201 4365 4324)
+
+//------------------------------------------------------------------------------
+// ARGB Color; 8-8-8-8 bit unsigned normalized integer components packed into
+// a 32 bit integer. The normalized color is packed into 32 bits using 8 bit
+// unsigned, normalized integers for the alpha, red, green, and blue components.
+// The alpha component is stored in the most significant bits and the blue
+// component in the least significant bits (A8R8G8B8):
+// [32] aaaaaaaa rrrrrrrr gggggggg bbbbbbbb [0]
+struct XMCOLOR
+{
+ union
+ {
+ struct
+ {
+ uint8_t b; // Blue: 0/255 to 255/255
+ uint8_t g; // Green: 0/255 to 255/255
+ uint8_t r; // Red: 0/255 to 255/255
+ uint8_t a; // Alpha: 0/255 to 255/255
+ };
+ uint32_t c;
+ };
+
+ XMCOLOR() {}
+ XMCOLOR(uint32_t Color) : c(Color) {}
+ XMCOLOR(float _r, float _g, float _b, float _a);
+ explicit XMCOLOR(_In_reads_(4) const float *pArray);
+
+ operator uint32_t () const { return c; }
+
+ XMCOLOR& operator= (const XMCOLOR& Color) { c = Color.c; return *this; }
+ XMCOLOR& operator= (const uint32_t Color) { c = Color; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 16 bit floating point number consisting of a sign bit, a 5 bit biased
+// exponent, and a 10 bit mantissa
+typedef uint16_t HALF;
+
+//------------------------------------------------------------------------------
+// 2D Vector; 16 bit floating point components
+struct XMHALF2
+{
+ union
+ {
+ struct
+ {
+ HALF x;
+ HALF y;
+ };
+ uint32_t v;
+ };
+
+ XMHALF2() {}
+ explicit XMHALF2(uint32_t Packed) : v(Packed) {}
+ XMHALF2(HALF _x, HALF _y) : x(_x), y(_y) {}
+ explicit XMHALF2(_In_reads_(2) const HALF *pArray) : x(pArray[0]), y(pArray[1]) {}
+ XMHALF2(float _x, float _y);
+ explicit XMHALF2(_In_reads_(2) const float *pArray);
+
+ XMHALF2& operator= (const XMHALF2& Half2) { x = Half2.x; y = Half2.y; return *this; }
+ XMHALF2& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 2D Vector; 16 bit signed normalized integer components
+struct XMSHORTN2
+{
+ union
+ {
+ struct
+ {
+ int16_t x;
+ int16_t y;
+ };
+ uint32_t v;
+ };
+
+ XMSHORTN2() {}
+ explicit XMSHORTN2(uint32_t Packed) : v(Packed) {}
+ XMSHORTN2(int16_t _x, int16_t _y) : x(_x), y(_y) {}
+ explicit XMSHORTN2(_In_reads_(2) const int16_t *pArray) : x(pArray[0]), y(pArray[1]) {}
+ XMSHORTN2(float _x, float _y);
+ explicit XMSHORTN2(_In_reads_(2) const float *pArray);
+
+ XMSHORTN2& operator= (const XMSHORTN2& ShortN2) { x = ShortN2.x; y = ShortN2.y; return *this; }
+ XMSHORTN2& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+// 2D Vector; 16 bit signed integer components
+struct XMSHORT2
+{
+ union
+ {
+ struct
+ {
+ int16_t x;
+ int16_t y;
+ };
+ uint32_t v;
+ };
+
+ XMSHORT2() {}
+ explicit XMSHORT2(uint32_t Packed) : v(Packed) {}
+ XMSHORT2(int16_t _x, int16_t _y) : x(_x), y(_y) {}
+ explicit XMSHORT2(_In_reads_(2) const int16_t *pArray) : x(pArray[0]), y(pArray[1]) {}
+ XMSHORT2(float _x, float _y);
+ explicit XMSHORT2(_In_reads_(2) const float *pArray);
+
+ XMSHORT2& operator= (const XMSHORT2& Short2) { x = Short2.x; y = Short2.y; return *this; }
+ XMSHORT2& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+// 2D Vector; 16 bit unsigned normalized integer components
+struct XMUSHORTN2
+{
+ union
+ {
+ struct
+ {
+ uint16_t x;
+ uint16_t y;
+ };
+ uint32_t v;
+ };
+
+ XMUSHORTN2() {}
+ explicit XMUSHORTN2(uint32_t Packed) : v(Packed) {}
+ XMUSHORTN2(uint16_t _x, uint16_t _y) : x(_x), y(_y) {}
+ explicit XMUSHORTN2(_In_reads_(2) const uint16_t *pArray) : x(pArray[0]), y(pArray[1]) {}
+ XMUSHORTN2(float _x, float _y);
+ explicit XMUSHORTN2(_In_reads_(2) const float *pArray);
+
+ XMUSHORTN2& operator= (const XMUSHORTN2& UShortN2) { x = UShortN2.x; y = UShortN2.y; return *this; }
+ XMUSHORTN2& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+// 2D Vector; 16 bit unsigned integer components
+struct XMUSHORT2
+{
+ union
+ {
+ struct
+ {
+ uint16_t x;
+ uint16_t y;
+ };
+ uint32_t v;
+ };
+
+ XMUSHORT2() {}
+ explicit XMUSHORT2(uint32_t Packed) : v(Packed) {}
+ XMUSHORT2(uint16_t _x, uint16_t _y) : x(_x), y(_y) {}
+ explicit XMUSHORT2(_In_reads_(2) const uint16_t *pArray) : x(pArray[0]), y(pArray[1]) {}
+ XMUSHORT2(float _x, float _y);
+ explicit XMUSHORT2(_In_reads_(2) const float *pArray);
+
+ XMUSHORT2& operator= (const XMUSHORT2& UShort2) { x = UShort2.x; y = UShort2.y; return *this; }
+ XMUSHORT2& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 2D Vector; 8 bit signed normalized integer components
+struct XMBYTEN2
+{
+ union
+ {
+ struct
+ {
+ int8_t x;
+ int8_t y;
+ };
+ uint16_t v;
+ };
+
+ XMBYTEN2() {}
+ explicit XMBYTEN2(uint16_t Packed) : v(Packed) {}
+ XMBYTEN2(int8_t _x, int8_t _y) : x(_x), y(_y) {}
+ explicit XMBYTEN2(_In_reads_(2) const int8_t *pArray) : x(pArray[0]), y(pArray[1]) {}
+ XMBYTEN2(float _x, float _y);
+ explicit XMBYTEN2(_In_reads_(2) const float *pArray);
+
+ XMBYTEN2& operator= (const XMBYTEN2& ByteN2) { x = ByteN2.x; y = ByteN2.y; return *this; }
+ XMBYTEN2& operator= (uint16_t Packed) { v = Packed; return *this; }
+};
+
+// 2D Vector; 8 bit signed integer components
+struct XMBYTE2
+{
+ union
+ {
+ struct
+ {
+ int8_t x;
+ int8_t y;
+ };
+ uint16_t v;
+ };
+
+ XMBYTE2() {}
+ explicit XMBYTE2(uint16_t Packed) : v(Packed) {}
+ XMBYTE2(int8_t _x, int8_t _y) : x(_x), y(_y) {}
+ explicit XMBYTE2(_In_reads_(2) const int8_t *pArray) : x(pArray[0]), y(pArray[1]) {}
+ XMBYTE2(float _x, float _y);
+ explicit XMBYTE2(_In_reads_(2) const float *pArray);
+
+ XMBYTE2& operator= (const XMBYTE2& Byte2) { x = Byte2.x; y = Byte2.y; return *this; }
+ XMBYTE2& operator= (uint16_t Packed) { v = Packed; return *this; }
+};
+
+// 2D Vector; 8 bit unsigned normalized integer components
+struct XMUBYTEN2
+{
+ union
+ {
+ struct
+ {
+ uint8_t x;
+ uint8_t y;
+ };
+ uint16_t v;
+ };
+
+ XMUBYTEN2() {}
+ explicit XMUBYTEN2(uint16_t Packed) : v(Packed) {}
+ XMUBYTEN2(uint8_t _x, uint8_t _y) : x(_x), y(_y) {}
+ explicit XMUBYTEN2(_In_reads_(2) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]) {}
+ XMUBYTEN2(float _x, float _y);
+ explicit XMUBYTEN2(_In_reads_(2) const float *pArray);
+
+ XMUBYTEN2& operator= (const XMUBYTEN2& UByteN2) { x = UByteN2.x; y = UByteN2.y; return *this; }
+ XMUBYTEN2& operator= (uint16_t Packed) { v = Packed; return *this; }
+};
+
+// 2D Vector; 8 bit unsigned integer components
+struct XMUBYTE2
+{
+ union
+ {
+ struct
+ {
+ uint8_t x;
+ uint8_t y;
+ };
+ uint16_t v;
+ };
+
+ XMUBYTE2() {}
+ explicit XMUBYTE2(uint16_t Packed) : v(Packed) {}
+ XMUBYTE2(uint8_t _x, uint8_t _y) : x(_x), y(_y) {}
+ explicit XMUBYTE2(_In_reads_(2) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]) {}
+ XMUBYTE2(float _x, float _y);
+ explicit XMUBYTE2(_In_reads_(2) const float *pArray);
+
+ XMUBYTE2& operator= (const XMUBYTE2& UByte2) { x = UByte2.x; y = UByte2.y; return *this; }
+ XMUBYTE2& operator= (uint16_t Packed) { v = Packed; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 3D vector: 5/6/5 unsigned integer components
+struct XMU565
+{
+ union
+ {
+ struct
+ {
+ uint16_t x : 5; // 0 to 31
+ uint16_t y : 6; // 0 to 63
+ uint16_t z : 5; // 0 to 31
+ };
+ uint16_t v;
+ };
+
+ XMU565() {}
+ explicit XMU565(uint16_t Packed) : v(Packed) {}
+ XMU565(uint8_t _x, uint8_t _y, uint8_t _z) : x(_x), y(_y), z(_z) {}
+ explicit XMU565(_In_reads_(3) const int8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]) {}
+ XMU565(float _x, float _y, float _z);
+ explicit XMU565(_In_reads_(3) const float *pArray);
+
+ operator uint16_t () const { return v; }
+
+ XMU565& operator= (const XMU565& U565) { v = U565.v; return *this; }
+ XMU565& operator= (uint16_t Packed) { v = Packed; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 3D vector: 11/11/10 floating-point components
+// The 3D vector is packed into 32 bits as follows: a 5-bit biased exponent
+// and 6-bit mantissa for x component, a 5-bit biased exponent and
+// 6-bit mantissa for y component, a 5-bit biased exponent and a 5-bit
+// mantissa for z. The z component is stored in the most significant bits
+// and the x component in the least significant bits. No sign bits so
+// all partial-precision numbers are positive.
+// (Z10Y11X11): [32] ZZZZZzzz zzzYYYYY yyyyyyXX XXXxxxxx [0]
+struct XMFLOAT3PK
+{
+ union
+ {
+ struct
+ {
+ uint32_t xm : 6; // x-mantissa
+ uint32_t xe : 5; // x-exponent
+ uint32_t ym : 6; // y-mantissa
+ uint32_t ye : 5; // y-exponent
+ uint32_t zm : 5; // z-mantissa
+ uint32_t ze : 5; // z-exponent
+ };
+ uint32_t v;
+ };
+
+ XMFLOAT3PK() {}
+ explicit XMFLOAT3PK(uint32_t Packed) : v(Packed) {}
+ XMFLOAT3PK(float _x, float _y, float _z);
+ explicit XMFLOAT3PK(_In_reads_(3) const float *pArray);
+
+ operator uint32_t () const { return v; }
+
+ XMFLOAT3PK& operator= (const XMFLOAT3PK& float3pk) { v = float3pk.v; return *this; }
+ XMFLOAT3PK& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 3D vector: 9/9/9 floating-point components with shared 5-bit exponent
+// The 3D vector is packed into 32 bits as follows: a 5-bit biased exponent
+// with 9-bit mantissa for the x, y, and z component. The shared exponent
+// is stored in the most significant bits and the x component mantissa is in
+// the least significant bits. No sign bits so all partial-precision numbers
+// are positive.
+// (E5Z9Y9X9): [32] EEEEEzzz zzzzzzyy yyyyyyyx xxxxxxxx [0]
+struct XMFLOAT3SE
+{
+ union
+ {
+ struct
+ {
+ uint32_t xm : 9; // x-mantissa
+ uint32_t ym : 9; // y-mantissa
+ uint32_t zm : 9; // z-mantissa
+ uint32_t e : 5; // shared exponent
+ };
+ uint32_t v;
+ };
+
+ XMFLOAT3SE() {}
+ explicit XMFLOAT3SE(uint32_t Packed) : v(Packed) {}
+ XMFLOAT3SE(float _x, float _y, float _z);
+ explicit XMFLOAT3SE(_In_reads_(3) const float *pArray);
+
+ operator uint32_t () const { return v; }
+
+ XMFLOAT3SE& operator= (const XMFLOAT3SE& float3se) { v = float3se.v; return *this; }
+ XMFLOAT3SE& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 4D Vector; 16 bit floating point components
+struct XMHALF4
+{
+ union
+ {
+ struct
+ {
+ HALF x;
+ HALF y;
+ HALF z;
+ HALF w;
+ };
+ uint64_t v;
+ };
+
+ XMHALF4() {}
+ explicit XMHALF4(uint64_t Packed) : v(Packed) {}
+ XMHALF4(HALF _x, HALF _y, HALF _z, HALF _w) : x(_x), y(_y), z(_z), w(_w) {}
+ explicit XMHALF4(_In_reads_(4) const HALF *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
+ XMHALF4(float _x, float _y, float _z, float _w);
+ explicit XMHALF4(_In_reads_(4) const float *pArray);
+
+ XMHALF4& operator= (const XMHALF4& Half4) { x = Half4.x; y = Half4.y; z = Half4.z; w = Half4.w; return *this; }
+ XMHALF4& operator= (uint64_t Packed) { v = Packed; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 4D Vector; 16 bit signed normalized integer components
+struct XMSHORTN4
+{
+ union
+ {
+ struct
+ {
+ int16_t x;
+ int16_t y;
+ int16_t z;
+ int16_t w;
+ };
+ uint64_t v;
+ };
+
+ XMSHORTN4() {}
+ explicit XMSHORTN4(uint64_t Packed) : v(Packed) {}
+ XMSHORTN4(int16_t _x, int16_t _y, int16_t _z, int16_t _w) : x(_x), y(_y), z(_z), w(_w) {}
+ explicit XMSHORTN4(_In_reads_(4) const int16_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
+ XMSHORTN4(float _x, float _y, float _z, float _w);
+ explicit XMSHORTN4(_In_reads_(4) const float *pArray);
+
+ XMSHORTN4& operator= (const XMSHORTN4& ShortN4) { x = ShortN4.x; y = ShortN4.y; z = ShortN4.z; w = ShortN4.w; return *this; }
+ XMSHORTN4& operator= (uint64_t Packed) { v = Packed; return *this; }
+};
+
+// 4D Vector; 16 bit signed integer components
+struct XMSHORT4
+{
+ union
+ {
+ struct
+ {
+ int16_t x;
+ int16_t y;
+ int16_t z;
+ int16_t w;
+ };
+ uint64_t v;
+ };
+
+ XMSHORT4() {}
+ explicit XMSHORT4(uint64_t Packed) : v(Packed) {}
+ XMSHORT4(int16_t _x, int16_t _y, int16_t _z, int16_t _w) : x(_x), y(_y), z(_z), w(_w) {}
+ explicit XMSHORT4(_In_reads_(4) const int16_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
+ XMSHORT4(float _x, float _y, float _z, float _w);
+ explicit XMSHORT4(_In_reads_(4) const float *pArray);
+
+ XMSHORT4& operator= (const XMSHORT4& Short4) { x = Short4.x; y = Short4.y; z = Short4.z; w = Short4.w; return *this; }
+ XMSHORT4& operator= (uint64_t Packed) { v = Packed; return *this; }
+};
+
+// 4D Vector; 16 bit unsigned normalized integer components
+struct XMUSHORTN4
+{
+ union
+ {
+ struct
+ {
+ uint16_t x;
+ uint16_t y;
+ uint16_t z;
+ uint16_t w;
+ };
+ uint64_t v;
+ };
+
+ XMUSHORTN4() {}
+ explicit XMUSHORTN4(uint64_t Packed) : v(Packed) {}
+ XMUSHORTN4(uint16_t _x, uint16_t _y, uint16_t _z, uint16_t _w) : x(_x), y(_y), z(_z), w(_w) {}
+ explicit XMUSHORTN4(_In_reads_(4) const uint16_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
+ XMUSHORTN4(float _x, float _y, float _z, float _w);
+ explicit XMUSHORTN4(_In_reads_(4) const float *pArray);
+
+ XMUSHORTN4& operator= (const XMUSHORTN4& UShortN4) { x = UShortN4.x; y = UShortN4.y; z = UShortN4.z; w = UShortN4.w; return *this; }
+ XMUSHORTN4& operator= (uint64_t Packed) { v = Packed; return *this; }
+};
+
+// 4D Vector; 16 bit unsigned integer components
+struct XMUSHORT4
+{
+ union
+ {
+ struct
+ {
+ uint16_t x;
+ uint16_t y;
+ uint16_t z;
+ uint16_t w;
+ };
+ uint64_t v;
+ };
+
+ XMUSHORT4() {}
+ explicit XMUSHORT4(uint64_t Packed) : v(Packed) {}
+ XMUSHORT4(uint16_t _x, uint16_t _y, uint16_t _z, uint16_t _w) : x(_x), y(_y), z(_z), w(_w) {}
+ explicit XMUSHORT4(_In_reads_(4) const uint16_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
+ XMUSHORT4(float _x, float _y, float _z, float _w);
+ explicit XMUSHORT4(_In_reads_(4) const float *pArray);
+
+ XMUSHORT4& operator= (const XMUSHORT4& UShort4) { x = UShort4.x; y = UShort4.y; z = UShort4.z; w = UShort4.w; return *this; }
+ XMUSHORT4& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer
+// The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned,
+// normalized integer for the w component and 10 bit signed, normalized
+// integers for the z, y, and x components. The w component is stored in the
+// most significant bits and the x component in the least significant bits
+// (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0]
+struct XMXDECN4
+{
+ union
+ {
+ struct
+ {
+ int32_t x : 10; // -511/511 to 511/511
+ int32_t y : 10; // -511/511 to 511/511
+ int32_t z : 10; // -511/511 to 511/511
+ uint32_t w : 2; // 0/3 to 3/3
+ };
+ uint32_t v;
+ };
+
+ XMXDECN4() {}
+ explicit XMXDECN4(uint32_t Packed) : v(Packed) {}
+ XMXDECN4(float _x, float _y, float _z, float _w);
+ explicit XMXDECN4(_In_reads_(4) const float *pArray);
+
+ operator uint32_t () const { return v; }
+
+ XMXDECN4& operator= (const XMXDECN4& XDecN4) { v = XDecN4.v; return *this; }
+ XMXDECN4& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+// 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer
+// The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned
+// integer for the w component and 10 bit signed integers for the
+// z, y, and x components. The w component is stored in the
+// most significant bits and the x component in the least significant bits
+// (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0]
+struct XMXDEC4
+{
+ union
+ {
+ struct
+ {
+ int32_t x : 10; // -511 to 511
+ int32_t y : 10; // -511 to 511
+ int32_t z : 10; // -511 to 511
+ uint32_t w : 2; // 0 to 3
+ };
+ uint32_t v;
+ };
+
+ XMXDEC4() {}
+ explicit XMXDEC4(uint32_t Packed) : v(Packed) {}
+ XMXDEC4(float _x, float _y, float _z, float _w);
+ explicit XMXDEC4(_In_reads_(4) const float *pArray);
+
+ operator uint32_t () const { return v; }
+
+ XMXDEC4& operator= (const XMXDEC4& XDec4) { v = XDec4.v; return *this; }
+ XMXDEC4& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+// 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer
+// The normalized 4D Vector is packed into 32 bits as follows: a 2 bit signed,
+// normalized integer for the w component and 10 bit signed, normalized
+// integers for the z, y, and x components. The w component is stored in the
+// most significant bits and the x component in the least significant bits
+// (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0]
+struct XMDECN4
+{
+ union
+ {
+ struct
+ {
+ int32_t x : 10; // -511/511 to 511/511
+ int32_t y : 10; // -511/511 to 511/511
+ int32_t z : 10; // -511/511 to 511/511
+ int32_t w : 2; // -1/1 to 1/1
+ };
+ uint32_t v;
+ };
+
+ XMDECN4() {}
+ explicit XMDECN4(uint32_t Packed) : v(Packed) {}
+ XMDECN4(float _x, float _y, float _z, float _w);
+ explicit XMDECN4(_In_reads_(4) const float *pArray);
+
+ operator uint32_t () const { return v; }
+
+ XMDECN4& operator= (const XMDECN4& DecN4) { v = DecN4.v; return *this; }
+ XMDECN4& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+// 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer
+// The 4D Vector is packed into 32 bits as follows: a 2 bit signed,
+// integer for the w component and 10 bit signed integers for the
+// z, y, and x components. The w component is stored in the
+// most significant bits and the x component in the least significant bits
+// (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0]
+struct XMDEC4
+{
+ union
+ {
+ struct
+ {
+ int32_t x : 10; // -511 to 511
+ int32_t y : 10; // -511 to 511
+ int32_t z : 10; // -511 to 511
+ int32_t w : 2; // -1 to 1
+ };
+ uint32_t v;
+ };
+
+ XMDEC4() {}
+ explicit XMDEC4(uint32_t Packed) : v(Packed) {}
+ XMDEC4(float _x, float _y, float _z, float _w);
+ explicit XMDEC4(_In_reads_(4) const float *pArray);
+
+ operator uint32_t () const { return v; }
+
+ XMDEC4& operator= (const XMDEC4& Dec4) { v = Dec4.v; return *this; }
+ XMDEC4& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+// 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer
+// The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned,
+// normalized integer for the w component and 10 bit unsigned, normalized
+// integers for the z, y, and x components. The w component is stored in the
+// most significant bits and the x component in the least significant bits
+// (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0]
+struct XMUDECN4
+{
+ union
+ {
+ struct
+ {
+ uint32_t x : 10; // 0/1023 to 1023/1023
+ uint32_t y : 10; // 0/1023 to 1023/1023
+ uint32_t z : 10; // 0/1023 to 1023/1023
+ uint32_t w : 2; // 0/3 to 3/3
+ };
+ uint32_t v;
+ };
+
+ XMUDECN4() {}
+ explicit XMUDECN4(uint32_t Packed) : v(Packed) {}
+ XMUDECN4(float _x, float _y, float _z, float _w);
+ explicit XMUDECN4(_In_reads_(4) const float *pArray);
+
+ operator uint32_t () const { return v; }
+
+ XMUDECN4& operator= (const XMUDECN4& UDecN4) { v = UDecN4.v; return *this; }
+ XMUDECN4& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+// 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer
+// The 4D Vector is packed into 32 bits as follows: a 2 bit unsigned,
+// integer for the w component and 10 bit unsigned integers
+// for the z, y, and x components. The w component is stored in the
+// most significant bits and the x component in the least significant bits
+// (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0]
+struct XMUDEC4
+{
+ union
+ {
+ struct
+ {
+ uint32_t x : 10; // 0 to 1023
+ uint32_t y : 10; // 0 to 1023
+ uint32_t z : 10; // 0 to 1023
+ uint32_t w : 2; // 0 to 3
+ };
+ uint32_t v;
+ };
+
+ XMUDEC4() {}
+ explicit XMUDEC4(uint32_t Packed) : v(Packed) {}
+ XMUDEC4(float _x, float _y, float _z, float _w);
+ explicit XMUDEC4(_In_reads_(4) const float *pArray);
+
+ operator uint32_t () const { return v; }
+
+ XMUDEC4& operator= (const XMUDEC4& UDec4) { v = UDec4.v; return *this; }
+ XMUDEC4& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 4D Vector; 8 bit signed normalized integer components
+struct XMBYTEN4
+{
+ union
+ {
+ struct
+ {
+ int8_t x;
+ int8_t y;
+ int8_t z;
+ int8_t w;
+ };
+ uint32_t v;
+ };
+
+ XMBYTEN4() {}
+ XMBYTEN4(int8_t _x, int8_t _y, int8_t _z, int8_t _w) : x(_x), y(_y), z(_z), w(_w) {}
+ explicit XMBYTEN4(uint32_t Packed) : v(Packed) {}
+ explicit XMBYTEN4(_In_reads_(4) const int8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
+ XMBYTEN4(float _x, float _y, float _z, float _w);
+ explicit XMBYTEN4(_In_reads_(4) const float *pArray);
+
+ XMBYTEN4& operator= (const XMBYTEN4& ByteN4) { x = ByteN4.x; y = ByteN4.y; z = ByteN4.z; w = ByteN4.w; return *this; }
+ XMBYTEN4& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+// 4D Vector; 8 bit signed integer components
+struct XMBYTE4
+{
+ union
+ {
+ struct
+ {
+ int8_t x;
+ int8_t y;
+ int8_t z;
+ int8_t w;
+ };
+ uint32_t v;
+ };
+
+ XMBYTE4() {}
+ XMBYTE4(int8_t _x, int8_t _y, int8_t _z, int8_t _w) : x(_x), y(_y), z(_z), w(_w) {}
+ explicit XMBYTE4(uint32_t Packed) : v(Packed) {}
+ explicit XMBYTE4(_In_reads_(4) const int8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
+ XMBYTE4(float _x, float _y, float _z, float _w);
+ explicit XMBYTE4(_In_reads_(4) const float *pArray);
+
+ XMBYTE4& operator= (const XMBYTE4& Byte4) { x = Byte4.x; y = Byte4.y; z = Byte4.z; w = Byte4.w; return *this; }
+ XMBYTE4& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+// 4D Vector; 8 bit unsigned normalized integer components
+struct XMUBYTEN4
+{
+ union
+ {
+ struct
+ {
+ uint8_t x;
+ uint8_t y;
+ uint8_t z;
+ uint8_t w;
+ };
+ uint32_t v;
+ };
+
+ XMUBYTEN4() {}
+ XMUBYTEN4(uint8_t _x, uint8_t _y, uint8_t _z, uint8_t _w) : x(_x), y(_y), z(_z), w(_w) {}
+ explicit XMUBYTEN4(uint32_t Packed) : v(Packed) {}
+ explicit XMUBYTEN4(_In_reads_(4) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
+ XMUBYTEN4(float _x, float _y, float _z, float _w);
+ explicit XMUBYTEN4(_In_reads_(4) const float *pArray);
+
+ XMUBYTEN4& operator= (const XMUBYTEN4& UByteN4) { x = UByteN4.x; y = UByteN4.y; z = UByteN4.z; w = UByteN4.w; return *this; }
+ XMUBYTEN4& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+// 4D Vector; 8 bit unsigned integer components
+struct XMUBYTE4
+{
+ union
+ {
+ struct
+ {
+ uint8_t x;
+ uint8_t y;
+ uint8_t z;
+ uint8_t w;
+ };
+ uint32_t v;
+ };
+
+ XMUBYTE4() {}
+ XMUBYTE4(uint8_t _x, uint8_t _y, uint8_t _z, uint8_t _w) : x(_x), y(_y), z(_z), w(_w) {}
+ explicit XMUBYTE4(uint32_t Packed) : v(Packed) {}
+ explicit XMUBYTE4(_In_reads_(4) const uint8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
+ XMUBYTE4(float _x, float _y, float _z, float _w);
+ explicit XMUBYTE4(_In_reads_(4) const float *pArray);
+
+ XMUBYTE4& operator= (const XMUBYTE4& UByte4) { x = UByte4.x; y = UByte4.y; z = UByte4.z; w = UByte4.w; return *this; }
+ XMUBYTE4& operator= (uint32_t Packed) { v = Packed; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 4D vector; 4 bit unsigned integer components
+struct XMUNIBBLE4
+{
+ union
+ {
+ struct
+ {
+ uint16_t x : 4; // 0 to 15
+ uint16_t y : 4; // 0 to 15
+ uint16_t z : 4; // 0 to 15
+ uint16_t w : 4; // 0 to 15
+ };
+ uint16_t v;
+ };
+
+ XMUNIBBLE4() {}
+ explicit XMUNIBBLE4(uint16_t Packed) : v(Packed) {}
+ XMUNIBBLE4(int8_t _x, int8_t _y, int8_t _z, int8_t _w) : x(_x), y(_y), z(_z), w(_w) {}
+ explicit XMUNIBBLE4(_In_reads_(4) const int8_t *pArray) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {}
+ XMUNIBBLE4(float _x, float _y, float _z, float _w);
+ explicit XMUNIBBLE4(_In_reads_(4) const float *pArray);
+
+ operator uint16_t () const { return v; }
+
+ XMUNIBBLE4& operator= (const XMUNIBBLE4& UNibble4) { v = UNibble4.v; return *this; }
+ XMUNIBBLE4& operator= (uint16_t Packed) { v = Packed; return *this; }
+};
+
+//------------------------------------------------------------------------------
+// 4D vector: 5/5/5/1 unsigned integer components
+struct XMU555
+{
+ union
+ {
+ struct
+ {
+ uint16_t x : 5; // 0 to 31
+ uint16_t y : 5; // 0 to 31
+ uint16_t z : 5; // 0 to 31
+ uint16_t w : 1; // 0 or 1
+ };
+ uint16_t v;
+ };
+
+ XMU555() {}
+ explicit XMU555(uint16_t Packed) : v(Packed) {}
+ XMU555(int8_t _x, int8_t _y, int8_t _z, bool _w) : x(_x), y(_y), z(_z), w(_w ? 0x1 : 0) {}
+ XMU555(_In_reads_(3) const int8_t *pArray, _In_ bool _w) : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(_w ? 0x1 : 0) {}
+ XMU555(float _x, float _y, float _z, bool _w);
+ XMU555(_In_reads_(3) const float *pArray, _In_ bool _w);
+
+ operator uint16_t () const { return v; }
+
+ XMU555& operator= (const XMU555& U555) { v = U555.v; return *this; }
+ XMU555& operator= (uint16_t Packed) { v = Packed; return *this; }
+};
+
+
+#pragma warning(pop)
+
+#ifdef _XM_BIGENDIAN_
+#pragma bitfield_order(pop)
+#endif
+
+
+/****************************************************************************
+ *
+ * Data conversion operations
+ *
+ ****************************************************************************/
+
+float XMConvertHalfToFloat(HALF Value);
+float* XMConvertHalfToFloatStream(_Out_writes_bytes_(sizeof(float)+OutputStride*(HalfCount-1)) float* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(sizeof(HALF)+InputStride*(HalfCount-1)) const HALF* pInputStream,
+ _In_ size_t InputStride, _In_ size_t HalfCount);
+HALF XMConvertFloatToHalf(float Value);
+HALF* XMConvertFloatToHalfStream(_Out_writes_bytes_(sizeof(HALF)+OutputStride*(FloatCount-1)) HALF* pOutputStream,
+ _In_ size_t OutputStride,
+ _In_reads_bytes_(sizeof(float)+InputStride*(FloatCount-1)) const float* pInputStream,
+ _In_ size_t InputStride, _In_ size_t FloatCount);
+
+/****************************************************************************
+ *
+ * Load operations
+ *
+ ****************************************************************************/
+
+XMVECTOR XMLoadColor(_In_ const XMCOLOR* pSource);
+
+XMVECTOR XMLoadHalf2(_In_ const XMHALF2* pSource);
+XMVECTOR XMLoadShortN2(_In_ const XMSHORTN2* pSource);
+XMVECTOR XMLoadShort2(_In_ const XMSHORT2* pSource);
+XMVECTOR XMLoadUShortN2(_In_ const XMUSHORTN2* pSource);
+XMVECTOR XMLoadUShort2(_In_ const XMUSHORT2* pSource);
+XMVECTOR XMLoadByteN2(_In_ const XMBYTEN2* pSource);
+XMVECTOR XMLoadByte2(_In_ const XMBYTE2* pSource);
+XMVECTOR XMLoadUByteN2(_In_ const XMUBYTEN2* pSource);
+XMVECTOR XMLoadUByte2(_In_ const XMUBYTE2* pSource);
+
+XMVECTOR XMLoadU565(_In_ const XMU565* pSource);
+XMVECTOR XMLoadFloat3PK(_In_ const XMFLOAT3PK* pSource);
+XMVECTOR XMLoadFloat3SE(_In_ const XMFLOAT3SE* pSource);
+
+XMVECTOR XMLoadHalf4(_In_ const XMHALF4* pSource);
+XMVECTOR XMLoadShortN4(_In_ const XMSHORTN4* pSource);
+XMVECTOR XMLoadShort4(_In_ const XMSHORT4* pSource);
+XMVECTOR XMLoadUShortN4(_In_ const XMUSHORTN4* pSource);
+XMVECTOR XMLoadUShort4(_In_ const XMUSHORT4* pSource);
+XMVECTOR XMLoadXDecN4(_In_ const XMXDECN4* pSource);
+XMVECTOR XMLoadXDec4(_In_ const XMXDEC4* pSource);
+XMVECTOR XMLoadDecN4(_In_ const XMDECN4* pSource);
+XMVECTOR XMLoadDec4(_In_ const XMDEC4* pSource);
+XMVECTOR XMLoadUDecN4(_In_ const XMUDECN4* pSource);
+XMVECTOR XMLoadUDec4(_In_ const XMUDEC4* pSource);
+XMVECTOR XMLoadByteN4(_In_ const XMBYTEN4* pSource);
+XMVECTOR XMLoadByte4(_In_ const XMBYTE4* pSource);
+XMVECTOR XMLoadUByteN4(_In_ const XMUBYTEN4* pSource);
+XMVECTOR XMLoadUByte4(_In_ const XMUBYTE4* pSource);
+XMVECTOR XMLoadUNibble4(_In_ const XMUNIBBLE4* pSource);
+XMVECTOR XMLoadU555(_In_ const XMU555* pSource);
+
+
+/****************************************************************************
+ *
+ * Store operations
+ *
+ ****************************************************************************/
+
+void XMStoreColor(_Out_ XMCOLOR* pDestination, _In_ FXMVECTOR V);
+
+void XMStoreHalf2(_Out_ XMHALF2* pDestination, _In_ FXMVECTOR V);
+void XMStoreShortN2(_Out_ XMSHORTN2* pDestination, _In_ FXMVECTOR V);
+void XMStoreShort2(_Out_ XMSHORT2* pDestination, _In_ FXMVECTOR V);
+void XMStoreUShortN2(_Out_ XMUSHORTN2* pDestination, _In_ FXMVECTOR V);
+void XMStoreUShort2(_Out_ XMUSHORT2* pDestination, _In_ FXMVECTOR V);
+void XMStoreByteN2(_Out_ XMBYTEN2* pDestination, _In_ FXMVECTOR V);
+void XMStoreByte2(_Out_ XMBYTE2* pDestination, _In_ FXMVECTOR V);
+void XMStoreUByteN2(_Out_ XMUBYTEN2* pDestination, _In_ FXMVECTOR V);
+void XMStoreUByte2(_Out_ XMUBYTE2* pDestination, _In_ FXMVECTOR V);
+
+void XMStoreU565(_Out_ XMU565* pDestination, _In_ FXMVECTOR V);
+void XMStoreFloat3PK(_Out_ XMFLOAT3PK* pDestination, _In_ FXMVECTOR V);
+void XMStoreFloat3SE(_Out_ XMFLOAT3SE* pDestination, _In_ FXMVECTOR V);
+
+void XMStoreHalf4(_Out_ XMHALF4* pDestination, _In_ FXMVECTOR V);
+void XMStoreShortN4(_Out_ XMSHORTN4* pDestination, _In_ FXMVECTOR V);
+void XMStoreShort4(_Out_ XMSHORT4* pDestination, _In_ FXMVECTOR V);
+void XMStoreUShortN4(_Out_ XMUSHORTN4* pDestination, _In_ FXMVECTOR V);
+void XMStoreUShort4(_Out_ XMUSHORT4* pDestination, _In_ FXMVECTOR V);
+void XMStoreXDecN4(_Out_ XMXDECN4* pDestination, _In_ FXMVECTOR V);
+void XMStoreXDec4(_Out_ XMXDEC4* pDestination, _In_ FXMVECTOR V);
+void XMStoreDecN4(_Out_ XMDECN4* pDestination, _In_ FXMVECTOR V);
+void XMStoreDec4(_Out_ XMDEC4* pDestination, _In_ FXMVECTOR V);
+void XMStoreUDecN4(_Out_ XMUDECN4* pDestination, _In_ FXMVECTOR V);
+void XMStoreUDec4(_Out_ XMUDEC4* pDestination, _In_ FXMVECTOR V);
+void XMStoreByteN4(_Out_ XMBYTEN4* pDestination, _In_ FXMVECTOR V);
+void XMStoreByte4(_Out_ XMBYTE4* pDestination, _In_ FXMVECTOR V);
+void XMStoreUByteN4(_Out_ XMUBYTEN4* pDestination, _In_ FXMVECTOR V);
+void XMStoreUByte4(_Out_ XMUBYTE4* pDestination, _In_ FXMVECTOR V);
+void XMStoreUNibble4(_Out_ XMUNIBBLE4* pDestination, _In_ FXMVECTOR V);
+void XMStoreU555(_Out_ XMU555* pDestination, _In_ FXMVECTOR V);
+
+
+/****************************************************************************
+ *
+ * Implementation
+ *
+ ****************************************************************************/
+
+#pragma warning(push)
+#pragma warning(disable:4068 4214 4204 4365 4616 6001)
+
+#pragma prefast(push)
+#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes")
+
+#include "DirectXPackedVector.inl"
+
+#pragma prefast(pop)
+#pragma warning(pop)
+
+}; // namespace PackedVector
+
+}; // namespace DirectX
+
+
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXPackedVector.inl b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXPackedVector.inl
new file mode 100644
index 00000000..b4ed1a77
--- /dev/null
+++ b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXPackedVector.inl
@@ -0,0 +1,3545 @@
+//-------------------------------------------------------------------------------------
+// DirectXPackedVector.inl -- SIMD C++ Math library
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//-------------------------------------------------------------------------------------
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+
+/****************************************************************************
+ *
+ * Data conversion
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline float PackedVector::XMConvertHalfToFloat
+(
+ HALF Value
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ uint32_t Mantissa = (uint32_t)(Value & 0x03FF);
+
+ uint32_t Exponent;
+ if ((Value & 0x7C00) != 0) // The value is normalized
+ {
+ Exponent = (uint32_t)((Value >> 10) & 0x1F);
+ }
+ else if (Mantissa != 0) // The value is denormalized
+ {
+ // Normalize the value in the resulting float
+ Exponent = 1;
+
+ do
+ {
+ Exponent--;
+ Mantissa <<= 1;
+ } while ((Mantissa & 0x0400) == 0);
+
+ Mantissa &= 0x03FF;
+ }
+ else // The value is zero
+ {
+ Exponent = (uint32_t)-112;
+ }
+
+ uint32_t Result = ((Value & 0x8000) << 16) | // Sign
+ ((Exponent + 112) << 23) | // Exponent
+ (Mantissa << 13); // Mantissa
+
+ return reinterpret_cast<float*>(&Result)[0];
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline float* PackedVector::XMConvertHalfToFloatStream
+(
+ float* pOutputStream,
+ size_t OutputStride,
+ const HALF* pInputStream,
+ size_t InputStride,
+ size_t HalfCount
+)
+{
+ assert(pOutputStream);
+ assert(pInputStream);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+
+ const uint8_t* pHalf = reinterpret_cast<const uint8_t*>(pInputStream);
+ uint8_t* pFloat = reinterpret_cast<uint8_t*>(pOutputStream);
+
+ for (size_t i = 0; i < HalfCount; i++)
+ {
+ *reinterpret_cast<float*>(pFloat) = XMConvertHalfToFloat(reinterpret_cast<const HALF*>(pHalf)[0]);
+ pHalf += InputStride;
+ pFloat += OutputStride;
+ }
+
+ return pOutputStream;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::HALF PackedVector::XMConvertFloatToHalf
+(
+ float Value
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ uint32_t Result;
+
+ uint32_t IValue = reinterpret_cast<uint32_t *>(&Value)[0];
+ uint32_t Sign = (IValue & 0x80000000U) >> 16U;
+ IValue = IValue & 0x7FFFFFFFU; // Hack off the sign
+
+ if (IValue > 0x47FFEFFFU)
+ {
+ // The number is too large to be represented as a half. Saturate to infinity.
+ Result = 0x7FFFU;
+ }
+ else
+ {
+ if (IValue < 0x38800000U)
+ {
+ // The number is too small to be represented as a normalized half.
+ // Convert it to a denormalized value.
+ uint32_t Shift = 113U - (IValue >> 23U);
+ IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift;
+ }
+ else
+ {
+ // Rebias the exponent to represent the value as a normalized half.
+ IValue += 0xC8000000U;
+ }
+
+ Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U)&0x7FFFU;
+ }
+ return (HALF)(Result|Sign);
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::HALF* PackedVector::XMConvertFloatToHalfStream
+(
+ HALF* pOutputStream,
+ size_t OutputStride,
+ const float* pInputStream,
+ size_t InputStride,
+ size_t FloatCount
+)
+{
+ assert(pOutputStream);
+ assert(pInputStream);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+
+ const uint8_t* pFloat = reinterpret_cast<const uint8_t*>(pInputStream);
+ uint8_t* pHalf = reinterpret_cast<uint8_t*>(pOutputStream);
+
+ for (size_t i = 0; i < FloatCount; i++)
+ {
+ *reinterpret_cast<HALF*>(pHalf) = XMConvertFloatToHalf(reinterpret_cast<const float*>(pFloat)[0]);
+ pFloat += InputStride;
+ pHalf += OutputStride;
+ }
+ return pOutputStream;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+/****************************************************************************
+ *
+ * Vector and matrix load operations
+ *
+ ****************************************************************************/
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadColor
+(
+ const XMCOLOR* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ // int32_t -> Float conversions are done in one instruction.
+ // uint32_t -> Float calls a runtime function. Keep in int32_t
+ int32_t iColor = (int32_t)(pSource->c);
+ XMVECTORF32 vColor = {
+ (float)((iColor >> 16) & 0xFF) * (1.0f/255.0f),
+ (float)((iColor >> 8) & 0xFF) * (1.0f/255.0f),
+ (float)(iColor & 0xFF) * (1.0f/255.0f),
+ (float)((iColor >> 24) & 0xFF) * (1.0f/255.0f)
+ };
+ return vColor.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Splat the color in all four entries
+ __m128i vInt = _mm_set1_epi32(pSource->c);
+ // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000
+ vInt = _mm_and_si128(vInt,g_XMMaskA8R8G8B8);
+ // a is unsigned! Flip the bit to convert the order to signed
+ vInt = _mm_xor_si128(vInt,g_XMFlipA8R8G8B8);
+ // Convert to floating point numbers
+ XMVECTOR vTemp = _mm_cvtepi32_ps(vInt);
+ // RGB + 0, A + 0x80000000.f to undo the signed order.
+ vTemp = _mm_add_ps(vTemp,g_XMFixAA8R8G8B8);
+ // Convert 0-255 to 0.0f-1.0f
+ return _mm_mul_ps(vTemp,g_XMNormalizeA8R8G8B8);
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadHalf2
+(
+ const XMHALF2* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ XMConvertHalfToFloat(pSource->x),
+ XMConvertHalfToFloat(pSource->y),
+ 0.0f,
+ 0.0f
+ };
+ return vResult.v;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadShortN2
+(
+ const XMSHORTN2* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ (pSource->x == -32768) ? -1.f : ((float)pSource->x * (1.0f/32767.0f)),
+ (pSource->y == -32768) ? -1.f : ((float)pSource->y * (1.0f/32767.0f)),
+ 0.0f,
+ 0.0f
+ };
+ return vResult.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Splat the two shorts in all four entries (WORD alignment okay,
+ // DWORD alignment preferred)
+ __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x));
+ // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0
+ vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16);
+ // x needs to be sign extended
+ vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // x - 0x8000 to undo the signed order.
+ vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16);
+ // Convert -1.0f - 1.0f
+ vTemp = _mm_mul_ps(vTemp,g_XMNormalizeX16Y16);
+ // Clamp result (for case of -32768)
+ return _mm_max_ps( vTemp, g_XMNegativeOne );
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadShort2
+(
+ const XMSHORT2* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ (float)pSource->x,
+ (float)pSource->y,
+ 0.f,
+ 0.f
+ };
+ return vResult.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Splat the two shorts in all four entries (WORD alignment okay,
+ // DWORD alignment preferred)
+ __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x));
+ // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0
+ vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16);
+ // x needs to be sign extended
+ vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // x - 0x8000 to undo the signed order.
+ vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16);
+ // Y is 65536 too large
+ return _mm_mul_ps(vTemp,g_XMFixupY16);
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadUShortN2
+(
+ const XMUSHORTN2* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ (float)pSource->x / 65535.0f,
+ (float)pSource->y / 65535.0f,
+ 0.f,
+ 0.f
+ };
+ return vResult.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 FixupY16 = {1.0f/65535.0f,1.0f/(65535.0f*65536.0f),0.0f,0.0f};
+ static const XMVECTORF32 FixaddY16 = {0,32768.0f*65536.0f,0,0};
+ // Splat the two shorts in all four entries (WORD alignment okay,
+ // DWORD alignment preferred)
+ __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x));
+ // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0
+ vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16);
+ // y needs to be sign flipped
+ vTemp = _mm_xor_ps(vTemp,g_XMFlipY);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // y + 0x8000 to undo the signed order.
+ vTemp = _mm_add_ps(vTemp,FixaddY16);
+ // Y is 65536 times too large
+ vTemp = _mm_mul_ps(vTemp,FixupY16);
+ return vTemp;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadUShort2
+(
+ const XMUSHORT2* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ (float)pSource->x,
+ (float)pSource->y,
+ 0.f,
+ 0.f
+ };
+ return vResult.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 FixaddY16 = {0,32768.0f,0,0};
+ // Splat the two shorts in all four entries (WORD alignment okay,
+ // DWORD alignment preferred)
+ __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x));
+ // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0
+ vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16);
+ // y needs to be sign flipped
+ vTemp = _mm_xor_ps(vTemp,g_XMFlipY);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // Y is 65536 times too large
+ vTemp = _mm_mul_ps(vTemp,g_XMFixupY16);
+ // y + 0x8000 to undo the signed order.
+ vTemp = _mm_add_ps(vTemp,FixaddY16);
+ return vTemp;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadByteN2
+(
+ const XMBYTEN2* pSource
+)
+{
+ assert(pSource);
+ XMVECTORF32 vResult = {
+ (pSource->x == -128) ? -1.f : ((float)pSource->x * (1.0f/127.0f)),
+ (pSource->y == -128) ? -1.f : ((float)pSource->y * (1.0f/127.0f)),
+ 0.0f,
+ 0.0f
+ };
+ return vResult.v;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadByte2
+(
+ const XMBYTE2* pSource
+)
+{
+ assert(pSource);
+ XMVECTORF32 vResult = {
+ (float)pSource->x,
+ (float)pSource->y,
+ 0.0f,
+ 0.0f
+ };
+ return vResult.v;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadUByteN2
+(
+ const XMUBYTEN2* pSource
+)
+{
+ assert(pSource);
+ XMVECTORF32 vResult = {
+ (float)pSource->x * (1.0f/255.0f),
+ (float)pSource->y * (1.0f/255.0f),
+ 0.0f,
+ 0.0f
+ };
+ return vResult.v;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadUByte2
+(
+ const XMUBYTE2* pSource
+)
+{
+ assert(pSource);
+ XMVECTORF32 vResult = {
+ (float)pSource->x,
+ (float)pSource->y,
+ 0.0f,
+ 0.0f
+ };
+ return vResult.v;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadU565
+(
+ const XMU565* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ static const XMVECTORI32 U565And = {0x1F,0x3F<<5,0x1F<<11,0};
+ static const XMVECTORF32 U565Mul = {1.0f,1.0f/32.0f,1.0f/2048.f,0};
+ // Get the 32 bit value and splat it
+ XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
+ // Mask off x, y and z
+ vResult = _mm_and_ps(vResult,U565And);
+ // Convert to float
+ vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
+ // Normalize x, y, and z
+ vResult = _mm_mul_ps(vResult,U565Mul);
+ return vResult;
+#else
+ XMVECTORF32 vResult = {
+ float(pSource->v & 0x1F),
+ float((pSource->v >> 5) & 0x3F),
+ float((pSource->v >> 11) & 0x1F),
+ 0.f,
+ };
+ return vResult.v;
+#endif // !_XM_SSE_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadFloat3PK
+(
+ const XMFLOAT3PK* pSource
+)
+{
+ assert(pSource);
+
+ __declspec(align(16)) uint32_t Result[4];
+ uint32_t Mantissa;
+ uint32_t Exponent;
+
+ // X Channel (6-bit mantissa)
+ Mantissa = pSource->xm;
+
+ if ( pSource->xe == 0x1f ) // INF or NAN
+ {
+ Result[0] = 0x7f800000 | (pSource->xm << 17);
+ }
+ else
+ {
+ if ( pSource->xe != 0 ) // The value is normalized
+ {
+ Exponent = pSource->xe;
+ }
+ else if (Mantissa != 0) // The value is denormalized
+ {
+ // Normalize the value in the resulting float
+ Exponent = 1;
+
+ do
+ {
+ Exponent--;
+ Mantissa <<= 1;
+ } while ((Mantissa & 0x40) == 0);
+
+ Mantissa &= 0x3F;
+ }
+ else // The value is zero
+ {
+ Exponent = (uint32_t)-112;
+ }
+
+ Result[0] = ((Exponent + 112) << 23) | (Mantissa << 17);
+ }
+
+ // Y Channel (6-bit mantissa)
+ Mantissa = pSource->ym;
+
+ if ( pSource->ye == 0x1f ) // INF or NAN
+ {
+ Result[1] = 0x7f800000 | (pSource->ym << 17);
+ }
+ else
+ {
+ if ( pSource->ye != 0 ) // The value is normalized
+ {
+ Exponent = pSource->ye;
+ }
+ else if (Mantissa != 0) // The value is denormalized
+ {
+ // Normalize the value in the resulting float
+ Exponent = 1;
+
+ do
+ {
+ Exponent--;
+ Mantissa <<= 1;
+ } while ((Mantissa & 0x40) == 0);
+
+ Mantissa &= 0x3F;
+ }
+ else // The value is zero
+ {
+ Exponent = (uint32_t)-112;
+ }
+
+ Result[1] = ((Exponent + 112) << 23) | (Mantissa << 17);
+ }
+
+ // Z Channel (5-bit mantissa)
+ Mantissa = pSource->zm;
+
+ if ( pSource->ze == 0x1f ) // INF or NAN
+ {
+ Result[2] = 0x7f800000 | (pSource->zm << 17);
+ }
+ else
+ {
+ if ( pSource->ze != 0 ) // The value is normalized
+ {
+ Exponent = pSource->ze;
+ }
+ else if (Mantissa != 0) // The value is denormalized
+ {
+ // Normalize the value in the resulting float
+ Exponent = 1;
+
+ do
+ {
+ Exponent--;
+ Mantissa <<= 1;
+ } while ((Mantissa & 0x20) == 0);
+
+ Mantissa &= 0x1F;
+ }
+ else // The value is zero
+ {
+ Exponent = (uint32_t)-112;
+ }
+
+ Result[2] = ((Exponent + 112) << 23) | (Mantissa << 18);
+ }
+
+ return XMLoadFloat3A( reinterpret_cast<const XMFLOAT3A*>(&Result) );
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadFloat3SE
+(
+ const XMFLOAT3SE* pSource
+)
+{
+ assert(pSource);
+
+ __declspec(align(16)) uint32_t Result[4];
+ uint32_t Mantissa;
+ uint32_t Exponent, ExpBits;
+
+ if ( pSource->e == 0x1f ) // INF or NAN
+ {
+ Result[0] = 0x7f800000 | (pSource->xm << 14);
+ Result[1] = 0x7f800000 | (pSource->ym << 14);
+ Result[2] = 0x7f800000 | (pSource->zm << 14);
+ }
+ else if ( pSource->e != 0 ) // The values are all normalized
+ {
+ Exponent = pSource->e;
+
+ ExpBits = (Exponent + 112) << 23;
+
+ Mantissa = pSource->xm;
+ Result[0] = ExpBits | (Mantissa << 14);
+
+ Mantissa = pSource->ym;
+ Result[1] = ExpBits | (Mantissa << 14);
+
+ Mantissa = pSource->zm;
+ Result[2] = ExpBits | (Mantissa << 14);
+ }
+ else
+ {
+ // X Channel
+ Mantissa = pSource->xm;
+
+ if (Mantissa != 0) // The value is denormalized
+ {
+ // Normalize the value in the resulting float
+ Exponent = 1;
+
+ do
+ {
+ Exponent--;
+ Mantissa <<= 1;
+ } while ((Mantissa & 0x200) == 0);
+
+ Mantissa &= 0x1FF;
+ }
+ else // The value is zero
+ {
+ Exponent = (uint32_t)-112;
+ }
+
+ Result[0] = ((Exponent + 112) << 23) | (Mantissa << 14);
+
+ // Y Channel
+ Mantissa = pSource->ym;
+
+ if (Mantissa != 0) // The value is denormalized
+ {
+ // Normalize the value in the resulting float
+ Exponent = 1;
+
+ do
+ {
+ Exponent--;
+ Mantissa <<= 1;
+ } while ((Mantissa & 0x200) == 0);
+
+ Mantissa &= 0x1FF;
+ }
+ else // The value is zero
+ {
+ Exponent = (uint32_t)-112;
+ }
+
+ Result[1] = ((Exponent + 112) << 23) | (Mantissa << 14);
+
+ // Z Channel
+ Mantissa = pSource->zm;
+
+ if (Mantissa != 0) // The value is denormalized
+ {
+ // Normalize the value in the resulting float
+ Exponent = 1;
+
+ do
+ {
+ Exponent--;
+ Mantissa <<= 1;
+ } while ((Mantissa & 0x200) == 0);
+
+ Mantissa &= 0x1FF;
+ }
+ else // The value is zero
+ {
+ Exponent = (uint32_t)-112;
+ }
+
+ Result[2] = ((Exponent + 112) << 23) | (Mantissa << 14);
+ }
+
+ return XMLoadFloat3A( reinterpret_cast<const XMFLOAT3A*>(&Result) );
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadHalf4
+(
+ const XMHALF4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ XMConvertHalfToFloat(pSource->x),
+ XMConvertHalfToFloat(pSource->y),
+ XMConvertHalfToFloat(pSource->z),
+ XMConvertHalfToFloat(pSource->w)
+ };
+ return vResult.v;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadShortN4
+(
+ const XMSHORTN4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ (pSource->x == -32768) ? -1.f : ((float)pSource->x * (1.0f/32767.0f)),
+ (pSource->y == -32768) ? -1.f : ((float)pSource->y * (1.0f/32767.0f)),
+ (pSource->z == -32768) ? -1.f : ((float)pSource->z * (1.0f/32767.0f)),
+ (pSource->w == -32768) ? -1.f : ((float)pSource->w * (1.0f/32767.0f))
+ };
+ return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vInt = vld1_s16( (const int16_t*)pSource );
+ __n128 V = vmovl_s16( vInt );
+ V = vcvtq_f32_s32( V );
+ const __n128 Scale = vdupq_n_f32( 1.0f/32767.0f );
+ V = vmulq_f32( V, Scale );
+ return vmaxq_f32( V, g_XMNegativeOne );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Splat the color in all four entries (x,z,y,w)
+ __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x));
+ // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000
+ __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16);
+ // x and z are unsigned! Flip the bits to convert the order to signed
+ vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16Z16W16);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // x and z - 0x8000 to complete the conversion
+ vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16Z16W16);
+ // Convert to -1.0f - 1.0f
+ vTemp = _mm_mul_ps(vTemp,g_XMNormalizeX16Y16Z16W16);
+ // Very important! The entries are x,z,y,w, flip it to x,y,z,w
+ vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
+ // Clamp result (for case of -32768)
+ return _mm_max_ps( vTemp, g_XMNegativeOne );
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadShort4
+(
+ const XMSHORT4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ (float)pSource->x,
+ (float)pSource->y,
+ (float)pSource->z,
+ (float)pSource->w
+ };
+ return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vInt = vld1_s16( (const int16_t*)pSource );
+ __n128 V = vmovl_s16( vInt );
+ return vcvtq_f32_s32( V );
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Splat the color in all four entries (x,z,y,w)
+ __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x));
+ // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000
+ __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16);
+ // x and z are unsigned! Flip the bits to convert the order to signed
+ vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16Z16W16);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // x and z - 0x8000 to complete the conversion
+ vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16Z16W16);
+ // Fix y and w because they are 65536 too large
+ vTemp = _mm_mul_ps(vTemp,g_XMFixupY16W16);
+ // Very important! The entries are x,z,y,w, flip it to x,y,z,w
+ return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadUShortN4
+(
+ const XMUSHORTN4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ (float)pSource->x / 65535.0f,
+ (float)pSource->y / 65535.0f,
+ (float)pSource->z / 65535.0f,
+ (float)pSource->w / 65535.0f
+ };
+ return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vInt = vld1_u16( (const uint16_t*)pSource );
+ __n128 V = vmovl_u16( vInt );
+ V = vcvtq_f32_u32( V );
+ const __n128 Scale = vdupq_n_f32( 1.0f/65535.0f );
+ return vmulq_f32( V, Scale );
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 FixupY16W16 = {1.0f/65535.0f,1.0f/65535.0f,1.0f/(65535.0f*65536.0f),1.0f/(65535.0f*65536.0f)};
+ static const XMVECTORF32 FixaddY16W16 = {0,0,32768.0f*65536.0f,32768.0f*65536.0f};
+ // Splat the color in all four entries (x,z,y,w)
+ __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x));
+ // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000
+ __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16);
+ // y and w are signed! Flip the bits to convert the order to unsigned
+ vTemp = _mm_xor_ps(vTemp,g_XMFlipZW);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // y and w + 0x8000 to complete the conversion
+ vTemp = _mm_add_ps(vTemp,FixaddY16W16);
+ // Fix y and w because they are 65536 too large
+ vTemp = _mm_mul_ps(vTemp,FixupY16W16);
+ // Very important! The entries are x,z,y,w, flip it to x,y,z,w
+ return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadUShort4
+(
+ const XMUSHORT4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ (float)pSource->x,
+ (float)pSource->y,
+ (float)pSource->z,
+ (float)pSource->w
+ };
+ return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 vInt = vld1_u16( (const uint16_t*)pSource );
+ __n128 V = vmovl_u16( vInt );
+ return vcvtq_f32_u32( V );
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 FixaddY16W16 = {0,0,32768.0f,32768.0f};
+ // Splat the color in all four entries (x,z,y,w)
+ __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x));
+ // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000
+ __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16);
+ // y and w are signed! Flip the bits to convert the order to unsigned
+ vTemp = _mm_xor_ps(vTemp,g_XMFlipZW);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // Fix y and w because they are 65536 too large
+ vTemp = _mm_mul_ps(vTemp,g_XMFixupY16W16);
+ // y and w + 0x8000 to complete the conversion
+ vTemp = _mm_add_ps(vTemp,FixaddY16W16);
+ // Very important! The entries are x,z,y,w, flip it to x,y,z,w
+ return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadXDecN4
+(
+ const XMXDECN4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00};
+
+ uint32_t ElementX = pSource->v & 0x3FF;
+ uint32_t ElementY = (pSource->v >> 10) & 0x3FF;
+ uint32_t ElementZ = (pSource->v >> 20) & 0x3FF;
+
+ XMVECTORF32 vResult = {
+ (ElementX == 0x200) ? -1.f : ((float)(int16_t)(ElementX | SignExtend[ElementX >> 9]) / 511.0f),
+ (ElementY == 0x200) ? -1.f : ((float)(int16_t)(ElementY | SignExtend[ElementY >> 9]) / 511.0f),
+ (ElementZ == 0x200) ? -1.f : ((float)(int16_t)(ElementZ | SignExtend[ElementZ >> 9]) / 511.0f),
+ (float)(pSource->v >> 30) / 3.0f
+ };
+ return vResult.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Splat the color in all four entries
+ __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
+ // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000
+ vTemp = _mm_and_ps(vTemp,g_XMMaskA2B10G10R10);
+ // a is unsigned! Flip the bit to convert the order to signed
+ vTemp = _mm_xor_ps(vTemp,g_XMFlipA2B10G10R10);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // RGB + 0, A + 0x80000000.f to undo the signed order.
+ vTemp = _mm_add_ps(vTemp,g_XMFixAA2B10G10R10);
+ // Convert 0-255 to 0.0f-1.0f
+ vTemp = _mm_mul_ps(vTemp,g_XMNormalizeA2B10G10R10);
+ // Clamp result (for case of -512)
+ return _mm_max_ps( vTemp, g_XMNegativeOne );
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadXDec4
+(
+ const XMXDEC4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00};
+
+ uint32_t ElementX = pSource->v & 0x3FF;
+ uint32_t ElementY = (pSource->v >> 10) & 0x3FF;
+ uint32_t ElementZ = (pSource->v >> 20) & 0x3FF;
+
+ XMVECTORF32 vResult = {
+ (float)(int16_t)(ElementX | SignExtend[ElementX >> 9]),
+ (float)(int16_t)(ElementY | SignExtend[ElementY >> 9]),
+ (float)(int16_t)(ElementZ | SignExtend[ElementZ >> 9]),
+ (float)(pSource->v >> 30)
+ };
+ return vResult.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORI32 XDec4Xor = {0x200, 0x200<<10, 0x200<<20, 0x80000000};
+ static const XMVECTORF32 XDec4Add = {-512.0f,-512.0f*1024.0f,-512.0f*1024.0f*1024.0f,32768*65536.0f};
+ // Splat the color in all four entries
+ XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
+ // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000
+ vTemp = _mm_and_ps(vTemp,g_XMMaskDec4);
+ // a is unsigned! Flip the bit to convert the order to signed
+ vTemp = _mm_xor_ps(vTemp,XDec4Xor);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // RGB + 0, A + 0x80000000.f to undo the signed order.
+ vTemp = _mm_add_ps(vTemp,XDec4Add);
+ // Convert 0-255 to 0.0f-1.0f
+ vTemp = _mm_mul_ps(vTemp,g_XMMulDec4);
+ return vTemp;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadUDecN4
+(
+ const XMUDECN4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ uint32_t ElementX = pSource->v & 0x3FF;
+ uint32_t ElementY = (pSource->v >> 10) & 0x3FF;
+ uint32_t ElementZ = (pSource->v >> 20) & 0x3FF;
+
+ XMVECTORF32 vResult = {
+ (float)ElementX / 1023.0f,
+ (float)ElementY / 1023.0f,
+ (float)ElementZ / 1023.0f,
+ (float)(pSource->v >> 30) / 3.0f
+ };
+ return vResult.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 UDecN4Mul = {1.0f/1023.0f,1.0f/(1023.0f*1024.0f),1.0f/(1023.0f*1024.0f*1024.0f),1.0f/(3.0f*1024.0f*1024.0f*1024.0f)};
+ // Splat the color in all four entries
+ XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
+ // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000
+ vTemp = _mm_and_ps(vTemp,g_XMMaskDec4);
+ // a is unsigned! Flip the bit to convert the order to signed
+ vTemp = _mm_xor_ps(vTemp,g_XMFlipW);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // RGB + 0, A + 0x80000000.f to undo the signed order.
+ vTemp = _mm_add_ps(vTemp,g_XMAddUDec4);
+ // Convert 0-255 to 0.0f-1.0f
+ vTemp = _mm_mul_ps(vTemp,UDecN4Mul);
+ return vTemp;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadUDec4
+(
+ const XMUDEC4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ uint32_t ElementX = pSource->v & 0x3FF;
+ uint32_t ElementY = (pSource->v >> 10) & 0x3FF;
+ uint32_t ElementZ = (pSource->v >> 20) & 0x3FF;
+
+ XMVECTORF32 vResult = {
+ (float)ElementX,
+ (float)ElementY,
+ (float)ElementZ,
+ (float)(pSource->v >> 30)
+ };
+ return vResult.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Splat the color in all four entries
+ XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
+ // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000
+ vTemp = _mm_and_ps(vTemp,g_XMMaskDec4);
+ // a is unsigned! Flip the bit to convert the order to signed
+ vTemp = _mm_xor_ps(vTemp,g_XMFlipW);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // RGB + 0, A + 0x80000000.f to undo the signed order.
+ vTemp = _mm_add_ps(vTemp,g_XMAddUDec4);
+ // Convert 0-255 to 0.0f-1.0f
+ vTemp = _mm_mul_ps(vTemp,g_XMMulDec4);
+ return vTemp;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadDecN4
+(
+ const XMDECN4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00};
+ static const uint32_t SignExtendW[] = {0x00000000, 0xFFFFFFFC};
+
+ uint32_t ElementX = pSource->v & 0x3FF;
+ uint32_t ElementY = (pSource->v >> 10) & 0x3FF;
+ uint32_t ElementZ = (pSource->v >> 20) & 0x3FF;
+ uint32_t ElementW = pSource->v >> 30;
+
+ XMVECTORF32 vResult = {
+ (ElementX == 0x200) ? -1.f : ((float)(int16_t)(ElementX | SignExtend[ElementX >> 9]) / 511.0f),
+ (ElementY == 0x200) ? -1.f : ((float)(int16_t)(ElementY | SignExtend[ElementY >> 9]) / 511.0f),
+ (ElementZ == 0x200) ? -1.f : ((float)(int16_t)(ElementZ | SignExtend[ElementZ >> 9]) / 511.0f),
+ (ElementW == 0x2) ? -1.f : ((float)(int16_t)(ElementW | SignExtendW[(ElementW >> 1) & 1]))
+ };
+ return vResult.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 DecN4Mul = {1.0f/511.0f,1.0f/(511.0f*1024.0f),1.0f/(511.0f*1024.0f*1024.0f),1.0f/(1024.0f*1024.0f*1024.0f)};
+ // Splat the color in all four entries
+ XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
+ // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000
+ vTemp = _mm_and_ps(vTemp,g_XMMaskDec4);
+ // a is unsigned! Flip the bit to convert the order to signed
+ vTemp = _mm_xor_ps(vTemp,g_XMXorDec4);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // RGB + 0, A + 0x80000000.f to undo the signed order.
+ vTemp = _mm_add_ps(vTemp,g_XMAddDec4);
+ // Convert 0-255 to 0.0f-1.0f
+ vTemp = _mm_mul_ps(vTemp,DecN4Mul);
+ // Clamp result (for case of -512/-1)
+ return _mm_max_ps( vTemp, g_XMNegativeOne );
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadDec4
+(
+ const XMDEC4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00};
+ static const uint32_t SignExtendW[] = {0x00000000, 0xFFFFFFFC};
+
+ uint32_t ElementX = pSource->v & 0x3FF;
+ uint32_t ElementY = (pSource->v >> 10) & 0x3FF;
+ uint32_t ElementZ = (pSource->v >> 20) & 0x3FF;
+ uint32_t ElementW = pSource->v >> 30;
+
+ XMVECTORF32 vResult = {
+ (float)(int16_t)(ElementX | SignExtend[ElementX >> 9]),
+ (float)(int16_t)(ElementY | SignExtend[ElementY >> 9]),
+ (float)(int16_t)(ElementZ | SignExtend[ElementZ >> 9]),
+ (float)(int16_t)(ElementW | SignExtendW[ElementW >> 1])
+ };
+ return vResult.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Splat the color in all four entries
+ XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
+ // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000
+ vTemp = _mm_and_ps(vTemp,g_XMMaskDec4);
+ // a is unsigned! Flip the bit to convert the order to signed
+ vTemp = _mm_xor_ps(vTemp,g_XMXorDec4);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // RGB + 0, A + 0x80000000.f to undo the signed order.
+ vTemp = _mm_add_ps(vTemp,g_XMAddDec4);
+ // Convert 0-255 to 0.0f-1.0f
+ vTemp = _mm_mul_ps(vTemp,g_XMMulDec4);
+ return vTemp;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadUByteN4
+(
+ const XMUBYTEN4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ (float)pSource->x / 255.0f,
+ (float)pSource->y / 255.0f,
+ (float)pSource->z / 255.0f,
+ (float)pSource->w / 255.0f
+ };
+ return vResult.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 LoadUByteN4Mul = {1.0f/255.0f,1.0f/(255.0f*256.0f),1.0f/(255.0f*65536.0f),1.0f/(255.0f*65536.0f*256.0f)};
+ // Splat the color in all four entries (x,z,y,w)
+ XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x));
+ // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000
+ vTemp = _mm_and_ps(vTemp,g_XMMaskByte4);
+ // w is signed! Flip the bits to convert the order to unsigned
+ vTemp = _mm_xor_ps(vTemp,g_XMFlipW);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // w + 0x80 to complete the conversion
+ vTemp = _mm_add_ps(vTemp,g_XMAddUDec4);
+ // Fix y, z and w because they are too large
+ vTemp = _mm_mul_ps(vTemp,LoadUByteN4Mul);
+ return vTemp;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadUByte4
+(
+ const XMUBYTE4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ (float)pSource->x,
+ (float)pSource->y,
+ (float)pSource->z,
+ (float)pSource->w
+ };
+ return vResult.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 LoadUByte4Mul = {1.0f,1.0f/256.0f,1.0f/65536.0f,1.0f/(65536.0f*256.0f)};
+ // Splat the color in all four entries (x,z,y,w)
+ XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x));
+ // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000
+ vTemp = _mm_and_ps(vTemp,g_XMMaskByte4);
+ // w is signed! Flip the bits to convert the order to unsigned
+ vTemp = _mm_xor_ps(vTemp,g_XMFlipW);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // w + 0x80 to complete the conversion
+ vTemp = _mm_add_ps(vTemp,g_XMAddUDec4);
+ // Fix y, z and w because they are too large
+ vTemp = _mm_mul_ps(vTemp,LoadUByte4Mul);
+ return vTemp;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadByteN4
+(
+ const XMBYTEN4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ (pSource->x == -128) ? -1.f : ((float)pSource->x / 127.0f),
+ (pSource->y == -128) ? -1.f : ((float)pSource->y / 127.0f),
+ (pSource->z == -128) ? -1.f : ((float)pSource->z / 127.0f),
+ (pSource->w == -128) ? -1.f : ((float)pSource->w / 127.0f)
+ };
+ return vResult.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 LoadByteN4Mul = {1.0f/127.0f,1.0f/(127.0f*256.0f),1.0f/(127.0f*65536.0f),1.0f/(127.0f*65536.0f*256.0f)};
+ // Splat the color in all four entries (x,z,y,w)
+ XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x));
+ // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000
+ vTemp = _mm_and_ps(vTemp,g_XMMaskByte4);
+ // x,y and z are unsigned! Flip the bits to convert the order to signed
+ vTemp = _mm_xor_ps(vTemp,g_XMXorByte4);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // x, y and z - 0x80 to complete the conversion
+ vTemp = _mm_add_ps(vTemp,g_XMAddByte4);
+ // Fix y, z and w because they are too large
+ vTemp = _mm_mul_ps(vTemp,LoadByteN4Mul);
+ // Clamp result (for case of -128)
+ return _mm_max_ps( vTemp, g_XMNegativeOne );
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadByte4
+(
+ const XMBYTE4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTORF32 vResult = {
+ (float)pSource->x,
+ (float)pSource->y,
+ (float)pSource->z,
+ (float)pSource->w
+ };
+ return vResult.v;
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 LoadByte4Mul = {1.0f,1.0f/256.0f,1.0f/65536.0f,1.0f/(65536.0f*256.0f)};
+ // Splat the color in all four entries (x,z,y,w)
+ XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x));
+ // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000
+ vTemp = _mm_and_ps(vTemp,g_XMMaskByte4);
+ // x,y and z are unsigned! Flip the bits to convert the order to signed
+ vTemp = _mm_xor_ps(vTemp,g_XMXorByte4);
+ // Convert to floating point numbers
+ vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
+ // x, y and z - 0x80 to complete the conversion
+ vTemp = _mm_add_ps(vTemp,g_XMAddByte4);
+ // Fix y, z and w because they are too large
+ vTemp = _mm_mul_ps(vTemp,LoadByte4Mul);
+ return vTemp;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadUNibble4
+(
+ const XMUNIBBLE4* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ static const XMVECTORI32 UNibble4And = {0xF,0xF0,0xF00,0xF000};
+ static const XMVECTORF32 UNibble4Mul = {1.0f,1.0f/16.f,1.0f/256.f,1.0f/4096.f};
+ // Get the 32 bit value and splat it
+ XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
+ // Mask off x, y and z
+ vResult = _mm_and_ps(vResult,UNibble4And);
+ // Convert to float
+ vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
+ // Normalize x, y, and z
+ vResult = _mm_mul_ps(vResult,UNibble4Mul);
+ return vResult;
+#else
+ XMVECTORF32 vResult = {
+ float(pSource->v & 0xF),
+ float((pSource->v >> 4) & 0xF),
+ float((pSource->v >> 8) & 0xF),
+ float((pSource->v >> 12) & 0xF)
+ };
+ return vResult.v;
+#endif // !_XM_SSE_INTRISICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMVECTOR PackedVector::XMLoadU555
+(
+ const XMU555* pSource
+)
+{
+ assert(pSource);
+#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ static const XMVECTORI32 U555And = {0x1F,0x1F<<5,0x1F<<10,0x8000};
+ static const XMVECTORF32 U555Mul = {1.0f,1.0f/32.f,1.0f/1024.f,1.0f/32768.f};
+ // Get the 32 bit value and splat it
+ XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
+ // Mask off x, y and z
+ vResult = _mm_and_ps(vResult,U555And);
+ // Convert to float
+ vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
+ // Normalize x, y, and z
+ vResult = _mm_mul_ps(vResult,U555Mul);
+ return vResult;
+#else
+ XMVECTORF32 vResult = {
+ float(pSource->v & 0x1F),
+ float((pSource->v >> 5) & 0x1F),
+ float((pSource->v >> 10) & 0x1F),
+ float((pSource->v >> 15) & 0x1)
+ };
+ return vResult.v;
+#endif // !_XM_SSE_INTRISICS_
+}
+
+
+/****************************************************************************
+ *
+ * Vector and matrix store operations
+ *
+ ****************************************************************************/
+_Use_decl_annotations_
+inline void PackedVector::XMStoreColor
+(
+ XMCOLOR* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Scale = {255.0f, 255.0f, 255.0f, 255.0f};
+
+ XMVECTOR N = XMVectorSaturate(V);
+ N = XMVectorMultiply(N, Scale.v);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A( &tmp, N );
+
+ pDestination->c = ((uint32_t)tmp.w << 24) |
+ ((uint32_t)tmp.x << 16) |
+ ((uint32_t)tmp.y << 8) |
+ ((uint32_t)tmp.z);
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Scale = {255.0f,255.0f,255.0f,255.0f};
+ // Set <0 to 0
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ // Set>1 to 1
+ vResult = _mm_min_ps(vResult,g_XMOne);
+ // Convert to 0-255
+ vResult = _mm_mul_ps(vResult,Scale);
+ // Shuffle RGBA to ARGB
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,0,1,2));
+ // Convert to int
+ __m128i vInt = _mm_cvtps_epi32(vResult);
+ // Mash to shorts
+ vInt = _mm_packs_epi32(vInt,vInt);
+ // Mash to bytes
+ vInt = _mm_packus_epi16(vInt,vInt);
+ // Store the color
+ _mm_store_ss(reinterpret_cast<float *>(&pDestination->c),reinterpret_cast<__m128 *>(&vInt)[0]);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreHalf2
+(
+ XMHALF2* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ pDestination->x = XMConvertFloatToHalf(XMVectorGetX(V));
+ pDestination->y = XMConvertFloatToHalf(XMVectorGetY(V));
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreShortN2
+(
+ XMSHORTN2* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
+
+ XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v);
+ N = XMVectorMultiply(N, Scale.v);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A( &tmp, N );
+
+ pDestination->x = (int16_t)tmp.x;
+ pDestination->y = (int16_t)tmp.y;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
+
+ XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne);
+ vResult = _mm_min_ps(vResult,g_XMOne);
+ vResult = _mm_mul_ps(vResult,Scale);
+ __m128i vResulti = _mm_cvtps_epi32(vResult);
+ vResulti = _mm_packs_epi32(vResulti,vResulti);
+ _mm_store_ss(reinterpret_cast<float *>(&pDestination->x),_mm_castsi128_ps(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreShort2
+(
+ XMSHORT2* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f};
+ static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
+
+ XMVECTOR N = XMVectorClamp(V, Min, Max);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A( &tmp, N );
+
+ pDestination->x = (int16_t)tmp.x;
+ pDestination->y = (int16_t)tmp.y;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f};
+ static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
+ // Bounds check
+ XMVECTOR vResult = _mm_max_ps(V,Min);
+ vResult = _mm_min_ps(vResult,Max);
+ // Convert to int with rounding
+ __m128i vInt = _mm_cvtps_epi32(vResult);
+ // Pack the ints into shorts
+ vInt = _mm_packs_epi32(vInt,vInt);
+ _mm_store_ss(reinterpret_cast<float *>(&pDestination->x),_mm_castsi128_ps(vInt));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreUShortN2
+(
+ XMUSHORTN2* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
+
+ XMVECTOR N = XMVectorSaturate(V);
+ N = XMVectorMultiplyAdd(N, Scale.v, g_XMOneHalf.v);
+ N = XMVectorTruncate(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A( &tmp, N );
+
+ pDestination->x = (int16_t)tmp.x;
+ pDestination->y = (int16_t)tmp.y;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
+ // Bounds check
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ vResult = _mm_min_ps(vResult,g_XMOne);
+ vResult = _mm_mul_ps(vResult,Scale);
+ // Convert to int with rounding
+ __m128i vInt = _mm_cvtps_epi32(vResult);
+ // Since the SSE pack instruction clamps using signed rules,
+ // manually extract the values to store them to memory
+ pDestination->x = static_cast<int16_t>(_mm_extract_epi16(vInt,0));
+ pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreUShort2
+(
+ XMUSHORT2* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
+
+ XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A( &tmp, N );
+
+ pDestination->x = (int16_t)tmp.x;
+ pDestination->y = (int16_t)tmp.y;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
+ // Bounds check
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ vResult = _mm_min_ps(vResult,Max);
+ // Convert to int with rounding
+ __m128i vInt = _mm_cvtps_epi32(vResult);
+ // Since the SSE pack instruction clamps using signed rules,
+ // manually extract the values to store them to memory
+ pDestination->x = static_cast<int16_t>(_mm_extract_epi16(vInt,0));
+ pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreByteN2
+(
+ XMBYTEN2* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+
+ static const XMVECTORF32 Scale = {127.0f, 127.0f, 127.0f, 127.0f};
+
+ XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v);
+ N = XMVectorMultiply(N, Scale.v);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A( &tmp, N );
+
+ pDestination->x = (int8_t)tmp.x;
+ pDestination->y = (int8_t)tmp.y;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreByte2
+(
+ XMBYTE2* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+
+ static const XMVECTORF32 Min = {-127.0f, -127.0f, -127.0f, -127.0f};
+ static const XMVECTORF32 Max = {127.0f, 127.0f, 127.0f, 127.0f};
+
+ XMVECTOR N = XMVectorClamp(V, Min, Max);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A( &tmp, N );
+
+ pDestination->x = (int8_t)tmp.x;
+ pDestination->y = (int8_t)tmp.y;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreUByteN2
+(
+ XMUBYTEN2* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+
+ static const XMVECTORF32 Scale = {255.0f, 255.0f, 255.0f, 255.0f};
+
+ XMVECTOR N = XMVectorSaturate(V);
+ N = XMVectorMultiplyAdd(N, Scale.v, g_XMOneHalf.v);
+ N = XMVectorTruncate(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A( &tmp, N );
+
+ pDestination->x = (uint8_t)tmp.x;
+ pDestination->y = (uint8_t)tmp.y;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreUByte2
+(
+ XMUBYTE2* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+
+ static const XMVECTORF32 Max = {255.0f, 255.0f, 255.0f, 255.0f};
+
+ XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A( &tmp, N );
+
+ pDestination->x = (uint8_t)tmp.x;
+ pDestination->y = (uint8_t)tmp.y;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreU565
+(
+ XMU565* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ static const XMVECTORF32 Max = {31.0f, 63.0f, 31.0f, 0.0f};
+ // Bounds check
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ vResult = _mm_min_ps(vResult,Max);
+ // Convert to int with rounding
+ __m128i vInt = _mm_cvtps_epi32(vResult);
+ // No SSE operations will write to 16-bit values, so we have to extract them manually
+ uint16_t x = static_cast<uint16_t>(_mm_extract_epi16(vInt,0));
+ uint16_t y = static_cast<uint16_t>(_mm_extract_epi16(vInt,2));
+ uint16_t z = static_cast<uint16_t>(_mm_extract_epi16(vInt,4));
+ pDestination->v = ((z & 0x1F) << 11) |
+ ((y & 0x3F) << 5) |
+ ((x & 0x1F));
+#else
+ static const XMVECTORF32 Max = {31.0f, 63.0f, 31.0f, 0.0f};
+
+ XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A( &tmp, N );
+
+ pDestination->v = (((uint16_t)tmp.z & 0x1F) << 11) |
+ (((uint16_t)tmp.y & 0x3F) << 5) |
+ (((uint16_t)tmp.x & 0x1F));
+#endif !_XM_SSE_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreFloat3PK
+(
+ XMFLOAT3PK* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+
+ __declspec(align(16)) uint32_t IValue[4];
+ XMStoreFloat3A( reinterpret_cast<XMFLOAT3A*>(&IValue), V );
+
+ uint32_t Result[3];
+
+ // X & Y Channels (5-bit exponent, 6-bit mantissa)
+ for(uint32_t j=0; j < 2; ++j)
+ {
+ uint32_t Sign = IValue[j] & 0x80000000;
+ uint32_t I = IValue[j] & 0x7FFFFFFF;
+
+ if ((I & 0x7F800000) == 0x7F800000)
+ {
+ // INF or NAN
+ Result[j] = 0x7c0;
+ if (( I & 0x7FFFFF ) != 0)
+ {
+ Result[j] = 0x7c0 | (((I>>17)|(I>11)|(I>>6)|(I))&0x3f);
+ }
+ else if ( Sign )
+ {
+ // -INF is clamped to 0 since 3PK is positive only
+ Result[j] = 0;
+ }
+ }
+ else if ( Sign )
+ {
+ // 3PK is positive only, so clamp to zero
+ Result[j] = 0;
+ }
+ else if (I > 0x477E0000U)
+ {
+ // The number is too large to be represented as a float11, set to max
+ Result[j] = 0x7BF;
+ }
+ else
+ {
+ if (I < 0x38800000U)
+ {
+ // The number is too small to be represented as a normalized float11
+ // Convert it to a denormalized value.
+ uint32_t Shift = 113U - (I >> 23U);
+ I = (0x800000U | (I & 0x7FFFFFU)) >> Shift;
+ }
+ else
+ {
+ // Rebias the exponent to represent the value as a normalized float11
+ I += 0xC8000000U;
+ }
+
+ Result[j] = ((I + 0xFFFFU + ((I >> 17U) & 1U)) >> 17U)&0x7ffU;
+ }
+ }
+
+ // Z Channel (5-bit exponent, 5-bit mantissa)
+ uint32_t Sign = IValue[2] & 0x80000000;
+ uint32_t I = IValue[2] & 0x7FFFFFFF;
+
+ if ((I & 0x7F800000) == 0x7F800000)
+ {
+ // INF or NAN
+ Result[2] = 0x3e0;
+ if ( I & 0x7FFFFF )
+ {
+ Result[2] = 0x3e0 | (((I>>18)|(I>13)|(I>>3)|(I))&0x1f);
+ }
+ else if ( Sign )
+ {
+ // -INF is clamped to 0 since 3PK is positive only
+ Result[2] = 0;
+ }
+ }
+ else if ( Sign )
+ {
+ // 3PK is positive only, so clamp to zero
+ Result[2] = 0;
+ }
+ else if (I > 0x477C0000U)
+ {
+ // The number is too large to be represented as a float10, set to max
+ Result[2] = 0x3df;
+ }
+ else
+ {
+ if (I < 0x38800000U)
+ {
+ // The number is too small to be represented as a normalized float10
+ // Convert it to a denormalized value.
+ uint32_t Shift = 113U - (I >> 23U);
+ I = (0x800000U | (I & 0x7FFFFFU)) >> Shift;
+ }
+ else
+ {
+ // Rebias the exponent to represent the value as a normalized float10
+ I += 0xC8000000U;
+ }
+
+ Result[2] = ((I + 0x1FFFFU + ((I >> 18U) & 1U)) >> 18U)&0x3ffU;
+ }
+
+ // Pack Result into memory
+ pDestination->v = (Result[0] & 0x7ff)
+ | ( (Result[1] & 0x7ff) << 11 )
+ | ( (Result[2] & 0x3ff) << 22 );
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreFloat3SE
+(
+ XMFLOAT3SE* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+
+ __declspec(align(16)) uint32_t IValue[4];
+ XMStoreFloat3A( reinterpret_cast<XMFLOAT3A*>(&IValue), V );
+
+ uint32_t Exp[3];
+ uint32_t Frac[3];
+
+ // X, Y, Z Channels (5-bit exponent, 9-bit mantissa)
+ for(uint32_t j=0; j < 3; ++j)
+ {
+ uint32_t Sign = IValue[j] & 0x80000000;
+ uint32_t I = IValue[j] & 0x7FFFFFFF;
+
+ if ((I & 0x7F800000) == 0x7F800000)
+ {
+ // INF or NAN
+ Exp[j] = 0x1f;
+ if (( I & 0x7FFFFF ) != 0)
+ {
+ Frac[j] = ((I>>14)|(I>5)|(I))&0x1ff;
+ }
+ else if ( Sign )
+ {
+ // -INF is clamped to 0 since 3SE is positive only
+ Exp[j] = Frac[j] = 0;
+ }
+ }
+ else if ( Sign )
+ {
+ // 3SE is positive only, so clamp to zero
+ Exp[j] = Frac[j] = 0;
+ }
+ else if (I > 0x477FC000U)
+ {
+ // The number is too large, set to max
+ Exp[j] = 0x1e;
+ Frac[j] = 0x1ff;
+ }
+ else
+ {
+ if (I < 0x38800000U)
+ {
+ // The number is too small to be represented as a normalized float11
+ // Convert it to a denormalized value.
+ uint32_t Shift = 113U - (I >> 23U);
+ I = (0x800000U | (I & 0x7FFFFFU)) >> Shift;
+ }
+ else
+ {
+ // Rebias the exponent to represent the value as a normalized float11
+ I += 0xC8000000U;
+ }
+
+ uint32_t T = ((I + 0x1FFFU + ((I >> 14U) & 1U)) >> 14U)&0x3fffU;
+
+ Exp[j] = (T & 0x3E00) >> 9;
+ Frac[j] = T & 0x1ff;
+ }
+ }
+
+ // Adjust to a shared exponent
+ uint32_t T = XMMax( Exp[0], XMMax( Exp[1], Exp[2] ) );
+
+ Frac[0] = Frac[0] >> (T - Exp[0]);
+ Frac[1] = Frac[1] >> (T - Exp[1]);
+ Frac[2] = Frac[2] >> (T - Exp[2]);
+
+ // Store packed into memory
+ pDestination->xm = Frac[0];
+ pDestination->ym = Frac[1];
+ pDestination->zm = Frac[2];
+ pDestination->e = T;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreHalf4
+(
+ XMHALF4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMFLOAT4A t;
+ XMStoreFloat4A(&t, V );
+
+ pDestination->x = XMConvertFloatToHalf(t.x);
+ pDestination->y = XMConvertFloatToHalf(t.y);
+ pDestination->z = XMConvertFloatToHalf(t.z);
+ pDestination->w = XMConvertFloatToHalf(t.w);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreShortN4
+(
+ XMSHORTN4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+
+ static const XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
+
+ XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v);
+ N = XMVectorMultiply(N, Scale.v);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->x = (int16_t)tmp.x;
+ pDestination->y = (int16_t)tmp.y;
+ pDestination->z = (int16_t)tmp.z;
+ pDestination->w = (int16_t)tmp.w;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vmaxq_f32( V, g_XMNegativeOne );
+ vResult = vminq_f32( vResult, g_XMOne );
+ const __n128 Scale = vdupq_n_f32( 32767.0f );
+ vResult = vmulq_f32( vResult, Scale );
+ vResult = vcvtq_s32_f32( vResult );
+ __n64 vInt = vmovn_s32( vResult );
+ vst1_s16( (int16_t*)pDestination, vInt );
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
+
+ XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne);
+ vResult = _mm_min_ps(vResult,g_XMOne);
+ vResult = _mm_mul_ps(vResult,Scale);
+ __m128i vResulti = _mm_cvtps_epi32(vResult);
+ vResulti = _mm_packs_epi32(vResulti,vResulti);
+ _mm_store_sd(reinterpret_cast<double *>(&pDestination->x),_mm_castsi128_pd(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreShort4
+(
+ XMSHORT4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+
+ static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f};
+ static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
+
+ XMVECTOR N = XMVectorClamp(V, Min, Max);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->x = (int16_t)tmp.x;
+ pDestination->y = (int16_t)tmp.y;
+ pDestination->z = (int16_t)tmp.z;
+ pDestination->w = (int16_t)tmp.w;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f};
+ static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
+
+ __n128 vResult = vmaxq_f32( V, Min );
+ vResult = vminq_f32( vResult, Max );
+ vResult = vcvtq_s32_f32( vResult );
+ __n64 vInt = vmovn_s32( vResult );
+ vst1_s16( (int16_t*)pDestination, vInt );
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f};
+ static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
+ // Bounds check
+ XMVECTOR vResult = _mm_max_ps(V,Min);
+ vResult = _mm_min_ps(vResult,Max);
+ // Convert to int with rounding
+ __m128i vInt = _mm_cvtps_epi32(vResult);
+ // Pack the ints into shorts
+ vInt = _mm_packs_epi32(vInt,vInt);
+ _mm_store_sd(reinterpret_cast<double *>(&pDestination->x),_mm_castsi128_pd(vInt));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreUShortN4
+(
+ XMUSHORTN4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+
+ static const XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
+
+ XMVECTOR N = XMVectorSaturate(V);
+ N = XMVectorMultiplyAdd(N, Scale.v, g_XMOneHalf.v);
+ N = XMVectorTruncate(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->x = (int16_t)tmp.x;
+ pDestination->y = (int16_t)tmp.y;
+ pDestination->z = (int16_t)tmp.z;
+ pDestination->w = (int16_t)tmp.w;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n128 vResult = vmaxq_f32( V, g_XMZero );
+ vResult = vminq_f32( vResult, g_XMOne );
+ const __n128 Scale = vdupq_n_f32( 65535.0f );
+ vResult = vmulq_f32( vResult, Scale );
+ vResult = vcvtq_u32_f32( vResult );
+ __n64 vInt = vmovn_u32( vResult );
+ vst1_u16( (uint16_t*)pDestination, vInt );
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
+ // Bounds check
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ vResult = _mm_min_ps(vResult,g_XMOne);
+ vResult = _mm_mul_ps(vResult,Scale);
+ // Convert to int with rounding
+ __m128i vInt = _mm_cvtps_epi32(vResult);
+ // Since the SSE pack instruction clamps using signed rules,
+ // manually extract the values to store them to memory
+ pDestination->x = static_cast<int16_t>(_mm_extract_epi16(vInt,0));
+ pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2));
+ pDestination->z = static_cast<int16_t>(_mm_extract_epi16(vInt,4));
+ pDestination->w = static_cast<int16_t>(_mm_extract_epi16(vInt,6));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreUShort4
+(
+ XMUSHORT4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_)
+
+ static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
+
+ XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->x = (int16_t)tmp.x;
+ pDestination->y = (int16_t)tmp.y;
+ pDestination->z = (int16_t)tmp.z;
+ pDestination->w = (int16_t)tmp.w;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
+
+ __n128 vResult = vmaxq_f32( V, g_XMZero );
+ vResult = vminq_f32( vResult, Max );
+ vResult = vcvtq_u32_f32( vResult );
+ __n64 vInt = vmovn_u32( vResult );
+ vst1_u16( (uint16_t*)pDestination, vInt );
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
+ // Bounds check
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ vResult = _mm_min_ps(vResult,Max);
+ // Convert to int with rounding
+ __m128i vInt = _mm_cvtps_epi32(vResult);
+ // Since the SSE pack instruction clamps using signed rules,
+ // manually extract the values to store them to memory
+ pDestination->x = static_cast<int16_t>(_mm_extract_epi16(vInt,0));
+ pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2));
+ pDestination->z = static_cast<int16_t>(_mm_extract_epi16(vInt,4));
+ pDestination->w = static_cast<int16_t>(_mm_extract_epi16(vInt,6));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreXDecN4
+(
+ XMXDECN4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Min = {-1.0f, -1.0f, -1.0f, 0.0f};
+ static const XMVECTORF32 Scale = {511.0f, 511.0f, 511.0f, 3.0f};
+
+ XMVECTOR N = XMVectorClamp(V, Min.v, g_XMOne.v);
+ N = XMVectorMultiply(N, Scale.v);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->v = ((uint32_t)tmp.w << 30) |
+ (((int32_t)tmp.z & 0x3FF) << 20) |
+ (((int32_t)tmp.y & 0x3FF) << 10) |
+ (((int32_t)tmp.x & 0x3FF));
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Min = {-1.0f, -1.0f, -1.0f, 0.0f};
+ static const XMVECTORF32 Scale = {511.0f, 511.0f*1024.0f, 511.0f*1048576.0f,3.0f*536870912.0f};
+ static const XMVECTORI32 ScaleMask = {0x3FF,0x3FF<<10,0x3FF<<20,0x3<<29};
+ XMVECTOR vResult = _mm_max_ps(V,Min);
+ vResult = _mm_min_ps(vResult,g_XMOne);
+ // Scale by multiplication
+ vResult = _mm_mul_ps(vResult,Scale);
+ // Convert to int (W is unsigned)
+ __m128i vResulti = _mm_cvtps_epi32(vResult);
+ // Mask off any fraction
+ vResulti = _mm_and_si128(vResulti,ScaleMask);
+ // To fix W, add itself to shift it up to <<30 instead of <<29
+ __m128i vResultw = _mm_and_si128(vResulti,g_XMMaskW);
+ vResulti = _mm_add_epi32(vResulti,vResultw);
+ // Do a horizontal or of all 4 entries
+ vResult = XM_PERMUTE_PS(_mm_castsi128_ps(vResulti),_MM_SHUFFLE(0,3,2,1));
+ vResulti = _mm_or_si128(vResulti,_mm_castps_si128(vResult));
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(0,3,2,1));
+ vResulti = _mm_or_si128(vResulti,_mm_castps_si128(vResult));
+ vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(0,3,2,1));
+ vResulti = _mm_or_si128(vResulti,_mm_castps_si128(vResult));
+ _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreXDec4
+(
+ XMXDEC4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Min = {-511.0f, -511.0f, -511.0f, 0.0f};
+ static const XMVECTORF32 Max = {511.0f, 511.0f, 511.0f, 3.0f};
+
+ XMVECTOR N = XMVectorClamp(V, Min, Max);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->v = ((uint32_t)tmp.w << 30) |
+ (((int32_t)tmp.z & 0x3FF) << 20) |
+ (((int32_t)tmp.y & 0x3FF) << 10) |
+ (((int32_t)tmp.x & 0x3FF));
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 MinXDec4 = {-511.0f,-511.0f,-511.0f, 0.0f};
+ static const XMVECTORF32 MaxXDec4 = { 511.0f, 511.0f, 511.0f, 3.0f};
+ static const XMVECTORF32 ScaleXDec4 = {1.0f,1024.0f/2.0f,1024.0f*1024.0f,1024.0f*1024.0f*1024.0f/2.0f};
+ static const XMVECTORI32 MaskXDec4= {0x3FF,0x3FF<<(10-1),0x3FF<<20,0x3<<(30-1)};
+ // Clamp to bounds
+ XMVECTOR vResult = _mm_max_ps(V,MinXDec4);
+ vResult = _mm_min_ps(vResult,MaxXDec4);
+ // Scale by multiplication
+ vResult = _mm_mul_ps(vResult,ScaleXDec4);
+ // Convert to int
+ __m128i vResulti = _mm_cvttps_epi32(vResult);
+ // Mask off any fraction
+ vResulti = _mm_and_si128(vResulti,MaskXDec4);
+ // Do a horizontal or of 4 entries
+ __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
+ // x = x|z, y = y|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ // Move Z to the x position
+ vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
+ // Perform a single bit left shift on y|w
+ vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
+ // i = x|y|z|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreUDecN4
+(
+ XMUDECN4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Scale = {1023.0f, 1023.0f, 1023.0f, 3.0f};
+
+ XMVECTOR N = XMVectorSaturate(V);
+ N = XMVectorMultiply(N, Scale.v);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->v = ((uint32_t)tmp.w << 30) |
+ (((uint32_t)tmp.z & 0x3FF) << 20) |
+ (((uint32_t)tmp.y & 0x3FF) << 10) |
+ (((uint32_t)tmp.x & 0x3FF));
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 ScaleUDecN4 = {1023.0f,1023.0f*1024.0f*0.5f,1023.0f*1024.0f*1024.0f,3.0f*1024.0f*1024.0f*1024.0f*0.5f};
+ static const XMVECTORI32 MaskUDecN4= {0x3FF,0x3FF<<(10-1),0x3FF<<20,0x3<<(30-1)};
+ // Clamp to bounds
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ vResult = _mm_min_ps(vResult,g_XMOne);
+ // Scale by multiplication
+ vResult = _mm_mul_ps(vResult,ScaleUDecN4);
+ // Convert to int
+ __m128i vResulti = _mm_cvttps_epi32(vResult);
+ // Mask off any fraction
+ vResulti = _mm_and_si128(vResulti,MaskUDecN4);
+ // Do a horizontal or of 4 entries
+ __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
+ // x = x|z, y = y|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ // Move Z to the x position
+ vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
+ // Perform a left shift by one bit on y|w
+ vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
+ // i = x|y|z|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreUDec4
+(
+ XMUDEC4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Max = {1023.0f, 1023.0f, 1023.0f, 3.0f};
+
+ XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->v = ((uint32_t)tmp.w << 30) |
+ (((uint32_t)tmp.z & 0x3FF) << 20) |
+ (((uint32_t)tmp.y & 0x3FF) << 10) |
+ (((uint32_t)tmp.x & 0x3FF));
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 MaxUDec4 = { 1023.0f, 1023.0f, 1023.0f, 3.0f};
+ static const XMVECTORF32 ScaleUDec4 = {1.0f,1024.0f/2.0f,1024.0f*1024.0f,1024.0f*1024.0f*1024.0f/2.0f};
+ static const XMVECTORI32 MaskUDec4= {0x3FF,0x3FF<<(10-1),0x3FF<<20,0x3<<(30-1)};
+ // Clamp to bounds
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ vResult = _mm_min_ps(vResult,MaxUDec4);
+ // Scale by multiplication
+ vResult = _mm_mul_ps(vResult,ScaleUDec4);
+ // Convert to int
+ __m128i vResulti = _mm_cvttps_epi32(vResult);
+ // Mask off any fraction
+ vResulti = _mm_and_si128(vResulti,MaskUDec4);
+ // Do a horizontal or of 4 entries
+ __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
+ // x = x|z, y = y|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ // Move Z to the x position
+ vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
+ // Perform a left shift by one bit on y|w
+ vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
+ // i = x|y|z|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreDecN4
+(
+ XMDECN4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Scale = {511.0f, 511.0f, 511.0f, 1.0f};
+
+ XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v);
+ N = XMVectorMultiply(N, Scale.v);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->v = ((int32_t)tmp.w << 30) |
+ (((int32_t)tmp.z & 0x3FF) << 20) |
+ (((int32_t)tmp.y & 0x3FF) << 10) |
+ (((int32_t)tmp.x & 0x3FF));
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 ScaleDecN4 = {511.0f,511.0f*1024.0f,511.0f*1024.0f*1024.0f,1.0f*1024.0f*1024.0f*1024.0f};
+ static const XMVECTORI32 MaskDecN4= {0x3FF,0x3FF<<10,0x3FF<<20,0x3<<30};
+ // Clamp to bounds
+ XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne);
+ vResult = _mm_min_ps(vResult,g_XMOne);
+ // Scale by multiplication
+ vResult = _mm_mul_ps(vResult,ScaleDecN4);
+ // Convert to int
+ __m128i vResulti = _mm_cvttps_epi32(vResult);
+ // Mask off any fraction
+ vResulti = _mm_and_si128(vResulti,MaskDecN4);
+ // Do a horizontal or of 4 entries
+ __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
+ // x = x|z, y = y|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ // Move Z to the x position
+ vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
+ // i = x|y|z|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreDec4
+(
+ XMDEC4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Min = {-511.0f, -511.0f, -511.0f, -1.0f};
+ static const XMVECTORF32 Max = {511.0f, 511.0f, 511.0f, 1.0f};
+
+ XMVECTOR N = XMVectorClamp(V, Min, Max);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->v = ((int32_t)tmp.w << 30) |
+ (((int32_t)tmp.z & 0x3FF) << 20) |
+ (((int32_t)tmp.y & 0x3FF) << 10) |
+ (((int32_t)tmp.x & 0x3FF));
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 MinDec4 = {-511.0f,-511.0f,-511.0f,-1.0f};
+ static const XMVECTORF32 MaxDec4 = { 511.0f, 511.0f, 511.0f, 1.0f};
+ static const XMVECTORF32 ScaleDec4 = {1.0f,1024.0f,1024.0f*1024.0f,1024.0f*1024.0f*1024.0f};
+ static const XMVECTORI32 MaskDec4= {0x3FF,0x3FF<<10,0x3FF<<20,0x3<<30};
+ // Clamp to bounds
+ XMVECTOR vResult = _mm_max_ps(V,MinDec4);
+ vResult = _mm_min_ps(vResult,MaxDec4);
+ // Scale by multiplication
+ vResult = _mm_mul_ps(vResult,ScaleDec4);
+ // Convert to int
+ __m128i vResulti = _mm_cvttps_epi32(vResult);
+ // Mask off any fraction
+ vResulti = _mm_and_si128(vResulti,MaskDec4);
+ // Do a horizontal or of 4 entries
+ __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
+ // x = x|z, y = y|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ // Move Z to the x position
+ vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
+ // i = x|y|z|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreUByteN4
+(
+ XMUBYTEN4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Scale = {255.0f, 255.0f, 255.0f, 255.0f};
+
+ XMVECTOR N = XMVectorSaturate(V);
+ N = XMVectorMultiply(N, Scale.v);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->x = (uint8_t)tmp.x;
+ pDestination->y = (uint8_t)tmp.y;
+ pDestination->z = (uint8_t)tmp.z;
+ pDestination->w = (uint8_t)tmp.w;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 ScaleUByteN4 = {255.0f,255.0f*256.0f*0.5f,255.0f*256.0f*256.0f,255.0f*256.0f*256.0f*256.0f*0.5f};
+ static const XMVECTORI32 MaskUByteN4 = {0xFF,0xFF<<(8-1),0xFF<<16,0xFF<<(24-1)};
+ // Clamp to bounds
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ vResult = _mm_min_ps(vResult,g_XMOne);
+ // Scale by multiplication
+ vResult = _mm_mul_ps(vResult,ScaleUByteN4);
+ // Convert to int
+ __m128i vResulti = _mm_cvttps_epi32(vResult);
+ // Mask off any fraction
+ vResulti = _mm_and_si128(vResulti,MaskUByteN4);
+ // Do a horizontal or of 4 entries
+ __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
+ // x = x|z, y = y|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ // Move Z to the x position
+ vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
+ // Perform a single bit left shift to fix y|w
+ vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
+ // i = x|y|z|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreUByte4
+(
+ XMUBYTE4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Max = {255.0f, 255.0f, 255.0f, 255.0f};
+
+ XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->x = (uint8_t)tmp.x;
+ pDestination->y = (uint8_t)tmp.y;
+ pDestination->z = (uint8_t)tmp.z;
+ pDestination->w = (uint8_t)tmp.w;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 MaxUByte4 = { 255.0f, 255.0f, 255.0f, 255.0f};
+ static const XMVECTORF32 ScaleUByte4 = {1.0f,256.0f*0.5f,256.0f*256.0f,256.0f*256.0f*256.0f*0.5f};
+ static const XMVECTORI32 MaskUByte4 = {0xFF,0xFF<<(8-1),0xFF<<16,0xFF<<(24-1)};
+ // Clamp to bounds
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ vResult = _mm_min_ps(vResult,MaxUByte4);
+ // Scale by multiplication
+ vResult = _mm_mul_ps(vResult,ScaleUByte4);
+ // Convert to int
+ __m128i vResulti = _mm_cvttps_epi32(vResult);
+ // Mask off any fraction
+ vResulti = _mm_and_si128(vResulti,MaskUByte4);
+ // Do a horizontal or of 4 entries
+ __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
+ // x = x|z, y = y|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ // Move Z to the x position
+ vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
+ // Perform a single bit left shift to fix y|w
+ vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
+ // i = x|y|z|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreByteN4
+(
+ XMBYTEN4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Scale = {127.0f, 127.0f, 127.0f, 127.0f};
+
+ XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v);
+ N = XMVectorMultiply(V, Scale.v);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->x = (int8_t)tmp.x;
+ pDestination->y = (int8_t)tmp.y;
+ pDestination->z = (int8_t)tmp.z;
+ pDestination->w = (int8_t)tmp.w;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 ScaleByteN4 = {127.0f,127.0f*256.0f,127.0f*256.0f*256.0f,127.0f*256.0f*256.0f*256.0f};
+ static const XMVECTORI32 MaskByteN4 = {0xFF,0xFF<<8,0xFF<<16,0xFF<<24};
+ // Clamp to bounds
+ XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne);
+ vResult = _mm_min_ps(vResult,g_XMOne);
+ // Scale by multiplication
+ vResult = _mm_mul_ps(vResult,ScaleByteN4);
+ // Convert to int
+ __m128i vResulti = _mm_cvttps_epi32(vResult);
+ // Mask off any fraction
+ vResulti = _mm_and_si128(vResulti,MaskByteN4);
+ // Do a horizontal or of 4 entries
+ __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
+ // x = x|z, y = y|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ // Move Z to the x position
+ vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
+ // i = x|y|z|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreByte4
+(
+ XMBYTE4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Min = {-127.0f, -127.0f, -127.0f, -127.0f};
+ static const XMVECTORF32 Max = {127.0f, 127.0f, 127.0f, 127.0f};
+
+ XMVECTOR N = XMVectorClamp(V, Min, Max);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->x = (int8_t)tmp.x;
+ pDestination->y = (int8_t)tmp.y;
+ pDestination->z = (int8_t)tmp.z;
+ pDestination->w = (int8_t)tmp.w;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 MinByte4 = {-127.0f,-127.0f,-127.0f,-127.0f};
+ static const XMVECTORF32 MaxByte4 = { 127.0f, 127.0f, 127.0f, 127.0f};
+ static const XMVECTORF32 ScaleByte4 = {1.0f,256.0f,256.0f*256.0f,256.0f*256.0f*256.0f};
+ static const XMVECTORI32 MaskByte4 = {0xFF,0xFF<<8,0xFF<<16,0xFF<<24};
+ // Clamp to bounds
+ XMVECTOR vResult = _mm_max_ps(V,MinByte4);
+ vResult = _mm_min_ps(vResult,MaxByte4);
+ // Scale by multiplication
+ vResult = _mm_mul_ps(vResult,ScaleByte4);
+ // Convert to int
+ __m128i vResulti = _mm_cvttps_epi32(vResult);
+ // Mask off any fraction
+ vResulti = _mm_and_si128(vResulti,MaskByte4);
+ // Do a horizontal or of 4 entries
+ __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
+ // x = x|z, y = y|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ // Move Z to the x position
+ vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
+ // i = x|y|z|w
+ vResulti = _mm_or_si128(vResulti,vResulti2);
+ _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreUNibble4
+(
+ XMUNIBBLE4* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ static const XMVECTORF32 Max = {15.0f,15.0f,15.0f,15.0f};
+ // Bounds check
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ vResult = _mm_min_ps(vResult,Max);
+ // Convert to int with rounding
+ __m128i vInt = _mm_cvtps_epi32(vResult);
+ // No SSE operations will write to 16-bit values, so we have to extract them manually
+ uint16_t x = static_cast<uint16_t>(_mm_extract_epi16(vInt,0));
+ uint16_t y = static_cast<uint16_t>(_mm_extract_epi16(vInt,2));
+ uint16_t z = static_cast<uint16_t>(_mm_extract_epi16(vInt,4));
+ uint16_t w = static_cast<uint16_t>(_mm_extract_epi16(vInt,6));
+ pDestination->v = ((w & 0xF) << 12) |
+ ((z & 0xF) << 8) |
+ ((y & 0xF) << 4) |
+ ((x & 0xF));
+#else
+ static const XMVECTORF32 Max = {15.0f,15.0f,15.0f,15.0f};
+
+ XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->v = (((uint16_t)tmp.w & 0xF) << 12) |
+ (((uint16_t)tmp.z & 0xF) << 8) |
+ (((uint16_t)tmp.y & 0xF) << 4) |
+ (((uint16_t)tmp.x & 0xF));
+#endif !_XM_SSE_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void PackedVector::XMStoreU555
+(
+ XMU555* pDestination,
+ FXMVECTOR V
+)
+{
+ assert(pDestination);
+#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+ static const XMVECTORF32 Max = {31.0f, 31.0f, 31.0f, 1.0f};
+ // Bounds check
+ XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
+ vResult = _mm_min_ps(vResult,Max);
+ // Convert to int with rounding
+ __m128i vInt = _mm_cvtps_epi32(vResult);
+ // No SSE operations will write to 16-bit values, so we have to extract them manually
+ uint16_t x = static_cast<uint16_t>(_mm_extract_epi16(vInt,0));
+ uint16_t y = static_cast<uint16_t>(_mm_extract_epi16(vInt,2));
+ uint16_t z = static_cast<uint16_t>(_mm_extract_epi16(vInt,4));
+ uint16_t w = static_cast<uint16_t>(_mm_extract_epi16(vInt,6));
+ pDestination->v = ((w) ? 0x8000 : 0) |
+ ((z & 0x1F) << 10) |
+ ((y & 0x1F) << 5) |
+ ((x & 0x1F));
+#else
+ static const XMVECTORF32 Max = {31.0f, 31.0f, 31.0f, 1.0f};
+
+ XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v);
+ N = XMVectorRound(N);
+
+ XMFLOAT4A tmp;
+ XMStoreFloat4A(&tmp, N );
+
+ pDestination->v = ((tmp.w > 0.f) ? 0x8000 : 0) |
+ (((uint16_t)tmp.z & 0x1F) << 10) |
+ (((uint16_t)tmp.y & 0x1F) << 5) |
+ (((uint16_t)tmp.x & 0x1F));
+#endif !_XM_SSE_INTRINSICS_
+}
+
+
+/****************************************************************************
+ *
+ * XMCOLOR operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMCOLOR::XMCOLOR
+(
+ float _r,
+ float _g,
+ float _b,
+ float _a
+)
+{
+ XMStoreColor(this, XMVectorSet(_r, _g, _b, _a));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMCOLOR::XMCOLOR
+(
+ const float* pArray
+)
+{
+ XMStoreColor(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMHALF2 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMHALF2::XMHALF2
+(
+ float _x,
+ float _y
+)
+{
+ x = XMConvertFloatToHalf(_x);
+ y = XMConvertFloatToHalf(_y);
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMHALF2::XMHALF2
+(
+ const float* pArray
+)
+{
+ assert( pArray != nullptr );
+ x = XMConvertFloatToHalf(pArray[0]);
+ y = XMConvertFloatToHalf(pArray[1]);
+}
+
+/****************************************************************************
+ *
+ * XMSHORTN2 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMSHORTN2::XMSHORTN2
+(
+ float _x,
+ float _y
+)
+{
+ XMStoreShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMSHORTN2::XMSHORTN2
+(
+ const float* pArray
+)
+{
+ XMStoreShortN2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMSHORT2 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMSHORT2::XMSHORT2
+(
+ float _x,
+ float _y
+)
+{
+ XMStoreShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMSHORT2::XMSHORT2
+(
+ const float* pArray
+)
+{
+ XMStoreShort2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMUSHORTN2 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMUSHORTN2::XMUSHORTN2
+(
+ float _x,
+ float _y
+)
+{
+ XMStoreUShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMUSHORTN2::XMUSHORTN2
+(
+ const float* pArray
+)
+{
+ XMStoreUShortN2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMUSHORT2 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMUSHORT2::XMUSHORT2
+(
+ float _x,
+ float _y
+)
+{
+ XMStoreUShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMUSHORT2::XMUSHORT2
+(
+ const float* pArray
+)
+{
+ XMStoreUShort2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMBYTEN2 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMBYTEN2::XMBYTEN2
+(
+ float _x,
+ float _y
+)
+{
+ XMStoreByteN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMBYTEN2::XMBYTEN2
+(
+ const float* pArray
+)
+{
+ XMStoreByteN2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMBYTE2 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMBYTE2::XMBYTE2
+(
+ float _x,
+ float _y
+)
+{
+ XMStoreByte2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMBYTE2::XMBYTE2
+(
+ const float* pArray
+)
+{
+ XMStoreByte2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMUBYTEN2 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMUBYTEN2::XMUBYTEN2
+(
+ float _x,
+ float _y
+)
+{
+ XMStoreUByteN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMUBYTEN2::XMUBYTEN2
+(
+ const float* pArray
+)
+{
+ XMStoreUByteN2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMUBYTE2 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMUBYTE2::XMUBYTE2
+(
+ float _x,
+ float _y
+)
+{
+ XMStoreUByte2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMUBYTE2::XMUBYTE2
+(
+ const float* pArray
+)
+{
+ XMStoreUByte2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMU565 operators
+ *
+ ****************************************************************************/
+
+inline PackedVector::XMU565::XMU565
+(
+ float _x,
+ float _y,
+ float _z
+)
+{
+ XMStoreU565(this, XMVectorSet( _x, _y, _z, 0.0f ));
+}
+
+_Use_decl_annotations_
+inline PackedVector::XMU565::XMU565
+(
+ const float *pArray
+)
+{
+ XMStoreU565(this, XMLoadFloat3(reinterpret_cast<const XMFLOAT3*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMFLOAT3PK operators
+ *
+ ****************************************************************************/
+
+inline PackedVector::XMFLOAT3PK::XMFLOAT3PK
+(
+ float _x,
+ float _y,
+ float _z
+)
+{
+ XMStoreFloat3PK(this, XMVectorSet( _x, _y, _z, 0.0f ));
+}
+
+_Use_decl_annotations_
+inline PackedVector::XMFLOAT3PK::XMFLOAT3PK
+(
+ const float *pArray
+)
+{
+ XMStoreFloat3PK(this, XMLoadFloat3(reinterpret_cast<const XMFLOAT3*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMFLOAT3SE operators
+ *
+ ****************************************************************************/
+
+inline PackedVector::XMFLOAT3SE::XMFLOAT3SE
+(
+ float _x,
+ float _y,
+ float _z
+)
+{
+ XMStoreFloat3SE(this, XMVectorSet( _x, _y, _z, 0.0f ));
+}
+
+_Use_decl_annotations_
+inline PackedVector::XMFLOAT3SE::XMFLOAT3SE
+(
+ const float *pArray
+)
+{
+ XMStoreFloat3SE(this, XMLoadFloat3(reinterpret_cast<const XMFLOAT3*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMHALF4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMHALF4::XMHALF4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ x = XMConvertFloatToHalf(_x);
+ y = XMConvertFloatToHalf(_y);
+ z = XMConvertFloatToHalf(_z);
+ w = XMConvertFloatToHalf(_w);
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline PackedVector::XMHALF4::XMHALF4
+(
+ const float* pArray
+)
+{
+ XMConvertFloatToHalfStream(&x, sizeof(HALF), pArray, sizeof(float), 4);
+}
+
+/****************************************************************************
+ *
+ * XMSHORTN4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMSHORTN4::XMSHORTN4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreShortN4(this, XMVectorSet(_x, _y, _z, _w));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMSHORTN4::XMSHORTN4
+(
+ const float* pArray
+)
+{
+ XMStoreShortN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMSHORT4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMSHORT4::XMSHORT4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreShort4(this, XMVectorSet(_x, _y, _z, _w));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMSHORT4::XMSHORT4
+(
+ const float* pArray
+)
+{
+ XMStoreShort4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMUSHORTN4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMUSHORTN4::XMUSHORTN4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreUShortN4(this, XMVectorSet(_x, _y, _z, _w));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMUSHORTN4::XMUSHORTN4
+(
+ const float* pArray
+)
+{
+ XMStoreUShortN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMUSHORT4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMUSHORT4::XMUSHORT4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreUShort4(this, XMVectorSet(_x, _y, _z, _w));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMUSHORT4::XMUSHORT4
+(
+ const float* pArray
+)
+{
+ XMStoreUShort4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMXDECN4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMXDECN4::XMXDECN4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreXDecN4(this, XMVectorSet(_x, _y, _z, _w));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMXDECN4::XMXDECN4
+(
+ const float* pArray
+)
+{
+ XMStoreXDecN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMXDEC4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMXDEC4::XMXDEC4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreXDec4(this, XMVectorSet(_x, _y, _z, _w));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMXDEC4::XMXDEC4
+(
+ const float* pArray
+)
+{
+ XMStoreXDec4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMDECN4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMDECN4::XMDECN4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreDecN4(this, XMVectorSet(_x, _y, _z, _w));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMDECN4::XMDECN4
+(
+ const float* pArray
+)
+{
+ XMStoreDecN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMDEC4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMDEC4::XMDEC4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreDec4(this, XMVectorSet(_x, _y, _z, _w));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMDEC4::XMDEC4
+(
+ const float* pArray
+)
+{
+ XMStoreDec4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMUDECN4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMUDECN4::XMUDECN4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreUDecN4(this, XMVectorSet(_x, _y, _z, _w));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMUDECN4::XMUDECN4
+(
+ const float* pArray
+)
+{
+ XMStoreUDecN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMUDEC4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMUDEC4::XMUDEC4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreUDec4(this, XMVectorSet(_x, _y, _z, _w));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMUDEC4::XMUDEC4
+(
+ const float* pArray
+)
+{
+ XMStoreUDec4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMBYTEN4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMBYTEN4::XMBYTEN4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreByteN4(this, XMVectorSet(_x, _y, _z, _w));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMBYTEN4::XMBYTEN4
+(
+ const float* pArray
+)
+{
+ XMStoreByteN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMBYTE4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMBYTE4::XMBYTE4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreByte4(this, XMVectorSet(_x, _y, _z, _w));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMBYTE4::XMBYTE4
+(
+ const float* pArray
+)
+{
+ XMStoreByte4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMUBYTEN4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMUBYTEN4::XMUBYTEN4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreUByteN4(this, XMVectorSet(_x, _y, _z, _w));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMUBYTEN4::XMUBYTEN4
+(
+ const float* pArray
+)
+{
+ XMStoreUByteN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMUBYTE4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMUBYTE4::XMUBYTE4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreUByte4(this, XMVectorSet(_x, _y, _z, _w));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMUBYTE4::XMUBYTE4
+(
+ const float* pArray
+)
+{
+ XMStoreUByte4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMUNIBBLE4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMUNIBBLE4::XMUNIBBLE4
+(
+ float _x,
+ float _y,
+ float _z,
+ float _w
+)
+{
+ XMStoreUNibble4(this, XMVectorSet( _x, _y, _z, _w ));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMUNIBBLE4::XMUNIBBLE4
+(
+ const float *pArray
+)
+{
+ XMStoreUNibble4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
+}
+
+/****************************************************************************
+ *
+ * XMU555 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline PackedVector::XMU555::XMU555
+(
+ float _x,
+ float _y,
+ float _z,
+ bool _w
+)
+{
+ XMStoreU555(this, XMVectorSet(_x, _y, _z, ((_w) ? 1.0f : 0.0f) ));
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline PackedVector::XMU555::XMU555
+(
+ const float *pArray,
+ bool _w
+)
+{
+ XMVECTOR V = XMLoadFloat3(reinterpret_cast<const XMFLOAT3*>(pArray));
+ XMStoreU555(this, XMVectorSetW(V, ((_w) ? 1.0f : 0.0f) ));
+}
+
+
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/no_sal2.h b/Minecraft.Client/PS3/PS3Extras/DirectX/no_sal2.h
new file mode 100644
index 00000000..b66b68cd
--- /dev/null
+++ b/Minecraft.Client/PS3/PS3Extras/DirectX/no_sal2.h
@@ -0,0 +1,1022 @@
+
+/***
+* no_sal2.h - renders the SAL annotations for documenting APIs harmless.
+*
+* Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*Purpose:
+* sal.h provides a set of SAL2 annotations to describe how a function uses its
+* parameters - the assumptions it makes about them, and the guarantees it makes
+* upon finishing. This file redefines all those annotation macros to be harmless.
+* It is designed for use in down-level build environments where the tooling may
+* be unhappy with the standard SAL2 macro definitions.
+*
+* [Public]
+*
+****/
+
+#ifndef _NO_SAL_2_H_
+#define _NO_SAL_2_H_
+
+#ifdef _When_
+#undef _When_
+#endif
+#define _When_(c,a)
+#ifdef _At_
+#undef _At_
+#endif
+#define _At_(t,a)
+#ifdef _At_buffer_
+#undef _At_buffer_
+#endif
+#define _At_buffer_(t,i,c,a)
+#ifdef _Group_
+#undef _Group_
+#endif
+#define _Group_(a)
+#ifdef _Pre_
+#undef _Pre_
+#endif
+#define _Pre_
+#ifdef _Post_
+#undef _Post_
+#endif
+#define _Post_
+#ifdef _Deref_
+#undef _Deref_
+#endif
+#define _Deref_
+#ifdef _Null_
+#undef _Null_
+#endif
+#define _Null_
+#ifdef _Notnull_
+#undef _Notnull_
+#endif
+#define _Notnull_
+#ifdef _Maybenull_
+#undef _Maybenull_
+#endif
+#define _Maybenull_
+#ifdef _Const_
+#undef _Const_
+#endif
+#define _Const_
+#ifdef _Check_return_
+#undef _Check_return_
+#endif
+#define _Check_return_
+#ifdef _Must_inspect_result_
+#undef _Must_inspect_result_
+#endif
+#define _Must_inspect_result_
+#ifdef _Pre_satisfies_
+#undef _Pre_satisfies_
+#endif
+#define _Pre_satisfies_(e)
+#ifdef _Post_satisfies_
+#undef _Post_satisfies_
+#endif
+#define _Post_satisfies_(e)
+#ifdef _Writable_elements_
+#undef _Writable_elements_
+#endif
+#define _Writable_elements_(s)
+#ifdef _Writable_bytes_
+#undef _Writable_bytes_
+#endif
+#define _Writable_bytes_(s)
+#ifdef _Readable_elements_
+#undef _Readable_elements_
+#endif
+#define _Readable_elements_(s)
+#ifdef _Readable_bytes_
+#undef _Readable_bytes_
+#endif
+#define _Readable_bytes_(s)
+#ifdef _Null_terminated_
+#undef _Null_terminated_
+#endif
+#define _Null_terminated_
+#ifdef _NullNull_terminated_
+#undef _NullNull_terminated_
+#endif
+#define _NullNull_terminated_
+#ifdef _Valid_
+#undef _Valid_
+#endif
+#define _Valid_
+#ifdef _Notvalid_
+#undef _Notvalid_
+#endif
+#define _Notvalid_
+#ifdef _Success_
+#undef _Success_
+#endif
+#define _Success_(c)
+#ifdef _Return_type_success_
+#undef _Return_type_success_
+#endif
+#define _Return_type_success_(c)
+#ifdef _On_failure_
+#undef _On_failure_
+#endif
+#define _On_failure_(a)
+#ifdef _Always_
+#undef _Always_
+#endif
+#define _Always_(a)
+#ifdef _Use_decl_annotations_
+#undef _Use_decl_annotations_
+#endif
+#define _Use_decl_annotations_
+#ifdef _Pre_defensive_
+#undef _Pre_defensive_
+#endif
+#define _Pre_defensive_
+#ifdef _Post_defensive_
+#undef _Post_defensive_
+#endif
+#define _Post_defensive_
+#ifdef _Pre_unknown_
+#undef _Pre_unknown_
+#endif
+#define _Pre_unknown_
+#ifdef _Acquires_lock_
+#undef _Acquires_lock_
+#endif
+#define _Acquires_lock_(e)
+#ifdef _Releases_lock_
+#undef _Releases_lock_
+#endif
+#define _Releases_lock_(e)
+#ifdef _Requires_lock_held_
+#undef _Requires_lock_held_
+#endif
+#define _Requires_lock_held_(e)
+#ifdef _Requires_lock_not_held_
+#undef _Requires_lock_not_held_
+#endif
+#define _Requires_lock_not_held_(e)
+#ifdef _Requires_no_locks_held_
+#undef _Requires_no_locks_held_
+#endif
+#define _Requires_no_locks_held_
+#ifdef _Guarded_by_
+#undef _Guarded_by_
+#endif
+#define _Guarded_by_(e)
+#ifdef _Write_guarded_by_
+#undef _Write_guarded_by_
+#endif
+#define _Write_guarded_by_(e)
+#ifdef _Interlocked_
+#undef _Interlocked_
+#endif
+#define _Interlocked_
+#ifdef _Post_same_lock_
+#undef _Post_same_lock_
+#endif
+#define _Post_same_lock_(e1,e2)
+#ifdef _Benign_race_begin_
+#undef _Benign_race_begin_
+#endif
+#define _Benign_race_begin_
+#ifdef _Benign_race_end_
+#undef _Benign_race_end_
+#endif
+#define _Benign_race_end_
+#ifdef _No_competing_thread_
+#undef _No_competing_thread_
+#endif
+#define _No_competing_thread_
+#ifdef _No_competing_thread_begin_
+#undef _No_competing_thread_begin_
+#endif
+#define _No_competing_thread_begin_
+#ifdef _No_competing_thread_end_
+#undef _No_competing_thread_end_
+#endif
+#define _No_competing_thread_end_
+#ifdef _Acquires_shared_lock_
+#undef _Acquires_shared_lock_
+#endif
+#define _Acquires_shared_lock_(e)
+#ifdef _Releases_shared_lock_
+#undef _Releases_shared_lock_
+#endif
+#define _Releases_shared_lock_(e)
+#ifdef _Requires_shared_lock_held_
+#undef _Requires_shared_lock_held_
+#endif
+#define _Requires_shared_lock_held_(e)
+#ifdef _Acquires_exclusive_lock_
+#undef _Acquires_exclusive_lock_
+#endif
+#define _Acquires_exclusive_lock_(e)
+#ifdef _Releases_exclusive_lock_
+#undef _Releases_exclusive_lock_
+#endif
+#define _Releases_exclusive_lock_(e)
+#ifdef _Requires_exclusive_lock_held_
+#undef _Requires_exclusive_lock_held_
+#endif
+#define _Requires_exclusive_lock_held_(e)
+#ifdef _Has_lock_kind_
+#undef _Has_lock_kind_
+#endif
+#define _Has_lock_kind_(n)
+#ifdef _Create_lock_level_
+#undef _Create_lock_level_
+#endif
+#define _Create_lock_level_(n)
+#ifdef _Has_lock_level_
+#undef _Has_lock_level_
+#endif
+#define _Has_lock_level_(n)
+#ifdef _Lock_level_order_
+#undef _Lock_level_order_
+#endif
+#define _Lock_level_order_(n1,n2)
+#ifdef _Analysis_assume_lock_acquired_
+#undef _Analysis_assume_lock_acquired_
+#endif
+#define _Analysis_assume_lock_acquired_(e)
+#ifdef _Analysis_assume_lock_released_
+#undef _Analysis_assume_lock_released_
+#endif
+#define _Analysis_assume_lock_released_(e)
+#ifdef _Analysis_assume_lock_held_
+#undef _Analysis_assume_lock_held_
+#endif
+#define _Analysis_assume_lock_held_(e)
+#ifdef _Analysis_assume_lock_not_held_
+#undef _Analysis_assume_lock_not_held_
+#endif
+#define _Analysis_assume_lock_not_held_(e)
+#ifdef _Analysis_assume_same_lock_
+#undef _Analysis_assume_same_lock_
+#endif
+#define _Analysis_assume_same_lock_(e)
+#ifdef _In_
+#undef _In_
+#endif
+#define _In_
+#ifdef _Out_
+#undef _Out_
+#endif
+#define _Out_
+#ifdef _Inout_
+#undef _Inout_
+#endif
+#define _Inout_
+#ifdef _In_z_
+#undef _In_z_
+#endif
+#define _In_z_
+#ifdef _Inout_z_
+#undef _Inout_z_
+#endif
+#define _Inout_z_
+#ifdef _In_reads_
+#undef _In_reads_
+#endif
+#define _In_reads_(s)
+#ifdef _In_reads_bytes_
+#undef _In_reads_bytes_
+#endif
+#define _In_reads_bytes_(s)
+#ifdef _In_reads_z_
+#undef _In_reads_z_
+#endif
+#define _In_reads_z_(s)
+#ifdef _In_reads_or_z_
+#undef _In_reads_or_z_
+#endif
+#define _In_reads_or_z_(s)
+#ifdef _Out_writes_
+#undef _Out_writes_
+#endif
+#define _Out_writes_(s)
+#ifdef _Out_writes_bytes_
+#undef _Out_writes_bytes_
+#endif
+#define _Out_writes_bytes_(s)
+#ifdef _Out_writes_z_
+#undef _Out_writes_z_
+#endif
+#define _Out_writes_z_(s)
+#ifdef _Inout_updates_
+#undef _Inout_updates_
+#endif
+#define _Inout_updates_(s)
+#ifdef _Inout_updates_bytes_
+#undef _Inout_updates_bytes_
+#endif
+#define _Inout_updates_bytes_(s)
+#ifdef _Inout_updates_z_
+#undef _Inout_updates_z_
+#endif
+#define _Inout_updates_z_(s)
+#ifdef _Out_writes_to_
+#undef _Out_writes_to_
+#endif
+#define _Out_writes_to_(s,c)
+#ifdef _Out_writes_bytes_to_
+#undef _Out_writes_bytes_to_
+#endif
+#define _Out_writes_bytes_to_(s,c)
+#ifdef _Out_writes_all_
+#undef _Out_writes_all_
+#endif
+#define _Out_writes_all_(s)
+#ifdef _Out_writes_bytes_all_
+#undef _Out_writes_bytes_all_
+#endif
+#define _Out_writes_bytes_all_(s)
+#ifdef _Inout_updates_to_
+#undef _Inout_updates_to_
+#endif
+#define _Inout_updates_to_(s,c)
+#ifdef _Inout_updates_bytes_to_
+#undef _Inout_updates_bytes_to_
+#endif
+#define _Inout_updates_bytes_to_(s,c)
+#ifdef _Inout_updates_all_
+#undef _Inout_updates_all_
+#endif
+#define _Inout_updates_all_(s)
+#ifdef _Inout_updates_bytes_all_
+#undef _Inout_updates_bytes_all_
+#endif
+#define _Inout_updates_bytes_all_(s)
+#ifdef _In_reads_to_ptr_
+#undef _In_reads_to_ptr_
+#endif
+#define _In_reads_to_ptr_(p)
+#ifdef _In_reads_to_ptr_z_
+#undef _In_reads_to_ptr_z_
+#endif
+#define _In_reads_to_ptr_z_(p)
+#ifdef _Out_writes_to_ptr_
+#undef _Out_writes_to_ptr_
+#endif
+#define _Out_writes_to_ptr_(p)
+#ifdef _Out_writes_to_ptr_z_
+#undef _Out_writes_to_ptr_z_
+#endif
+#define _Out_writes_to_ptr_z_(p)
+#ifdef _In_opt_
+#undef _In_opt_
+#endif
+#define _In_opt_
+#ifdef _Out_opt_
+#undef _Out_opt_
+#endif
+#define _Out_opt_
+#ifdef _Inout_opt_
+#undef _Inout_opt_
+#endif
+#define _Inout_opt_
+#ifdef _In_opt_z_
+#undef _In_opt_z_
+#endif
+#define _In_opt_z_
+#ifdef _Inout_opt_z_
+#undef _Inout_opt_z_
+#endif
+#define _Inout_opt_z_
+#ifdef _In_reads_opt_
+#undef _In_reads_opt_
+#endif
+#define _In_reads_opt_(s)
+#ifdef _In_reads_opt_z_
+#undef _In_reads_opt_z_
+#endif
+#define _In_reads_opt_z_(s)
+#ifdef _In_reads_bytes_opt_
+#undef _In_reads_bytes_opt_
+#endif
+#define _In_reads_bytes_opt_(s)
+#ifdef _Out_writes_opt_
+#undef _Out_writes_opt_
+#endif
+#define _Out_writes_opt_(s)
+#ifdef _Out_writes_bytes_opt_
+#undef _Out_writes_bytes_opt_
+#endif
+#define _Out_writes_bytes_opt_(s)
+#ifdef _Out_writes_opt_z_
+#undef _Out_writes_opt_z_
+#endif
+#define _Out_writes_opt_z_(s)
+#ifdef _Inout_updates_opt_
+#undef _Inout_updates_opt_
+#endif
+#define _Inout_updates_opt_(s)
+#ifdef _Inout_updates_bytes_opt_
+#undef _Inout_updates_bytes_opt_
+#endif
+#define _Inout_updates_bytes_opt_(s)
+#ifdef _Inout_updates_opt_z_
+#undef _Inout_updates_opt_z_
+#endif
+#define _Inout_updates_opt_z_(s)
+#ifdef _Out_writes_to_opt_
+#undef _Out_writes_to_opt_
+#endif
+#define _Out_writes_to_opt_(s,c)
+#ifdef _Out_writes_bytes_to_opt_
+#undef _Out_writes_bytes_to_opt_
+#endif
+#define _Out_writes_bytes_to_opt_(s,c)
+#ifdef _Out_writes_all_opt_
+#undef _Out_writes_all_opt_
+#endif
+#define _Out_writes_all_opt_(s)
+#ifdef _Out_writes_bytes_all_opt_
+#undef _Out_writes_bytes_all_opt_
+#endif
+#define _Out_writes_bytes_all_opt_(s)
+#ifdef _Inout_updates_to_opt_
+#undef _Inout_updates_to_opt_
+#endif
+#define _Inout_updates_to_opt_(s,c)
+#ifdef _Inout_updates_bytes_to_opt_
+#undef _Inout_updates_bytes_to_opt_
+#endif
+#define _Inout_updates_bytes_to_opt_(s,c)
+#ifdef _Inout_updates_all_opt_
+#undef _Inout_updates_all_opt_
+#endif
+#define _Inout_updates_all_opt_(s)
+#ifdef _Inout_updates_bytes_all_opt_
+#undef _Inout_updates_bytes_all_opt_
+#endif
+#define _Inout_updates_bytes_all_opt_(s)
+#ifdef _In_reads_to_ptr_opt_
+#undef _In_reads_to_ptr_opt_
+#endif
+#define _In_reads_to_ptr_opt_(p)
+#ifdef _In_reads_to_ptr_opt_z_
+#undef _In_reads_to_ptr_opt_z_
+#endif
+#define _In_reads_to_ptr_opt_z_(p)
+#ifdef _Out_writes_to_ptr_opt_
+#undef _Out_writes_to_ptr_opt_
+#endif
+#define _Out_writes_to_ptr_opt_(p)
+#ifdef _Out_writes_to_ptr_opt_z_
+#undef _Out_writes_to_ptr_opt_z_
+#endif
+#define _Out_writes_to_ptr_opt_z_(p)
+#ifdef _Outptr_
+#undef _Outptr_
+#endif
+#define _Outptr_
+#ifdef _Outptr_opt_
+#undef _Outptr_opt_
+#endif
+#define _Outptr_opt_
+#ifdef _Outptr_result_maybenull_
+#undef _Outptr_result_maybenull_
+#endif
+#define _Outptr_result_maybenull_
+#ifdef _Outptr_opt_result_maybenull_
+#undef _Outptr_opt_result_maybenull_
+#endif
+#define _Outptr_opt_result_maybenull_
+#ifdef _Outptr_result_z_
+#undef _Outptr_result_z_
+#endif
+#define _Outptr_result_z_
+#ifdef _Outptr_opt_result_z_
+#undef _Outptr_opt_result_z_
+#endif
+#define _Outptr_opt_result_z_
+#ifdef _Outptr_result_maybenull_z_
+#undef _Outptr_result_maybenull_z_
+#endif
+#define _Outptr_result_maybenull_z_
+#ifdef _Outptr_opt_result_maybenull_z_
+#undef _Outptr_opt_result_maybenull_z_
+#endif
+#define _Outptr_opt_result_maybenull_z_
+#ifdef _COM_Outptr_
+#undef _COM_Outptr_
+#endif
+#define _COM_Outptr_
+#ifdef _COM_Outptr_opt_
+#undef _COM_Outptr_opt_
+#endif
+#define _COM_Outptr_opt_
+#ifdef _COM_Outptr_result_maybenull_
+#undef _COM_Outptr_result_maybenull_
+#endif
+#define _COM_Outptr_result_maybenull_
+#ifdef _COM_Outptr_opt_result_maybenull_
+#undef _COM_Outptr_opt_result_maybenull_
+#endif
+#define _COM_Outptr_opt_result_maybenull_
+#ifdef _Outptr_result_buffer_
+#undef _Outptr_result_buffer_
+#endif
+#define _Outptr_result_buffer_(s)
+#ifdef _Outptr_result_bytebuffer_
+#undef _Outptr_result_bytebuffer_
+#endif
+#define _Outptr_result_bytebuffer_(s)
+#ifdef _Outptr_opt_result_buffer_
+#undef _Outptr_opt_result_buffer_
+#endif
+#define _Outptr_opt_result_buffer_(s)
+#ifdef _Outptr_opt_result_bytebuffer_
+#undef _Outptr_opt_result_bytebuffer_
+#endif
+#define _Outptr_opt_result_bytebuffer_(s)
+#ifdef _Outptr_result_buffer_to_
+#undef _Outptr_result_buffer_to_
+#endif
+#define _Outptr_result_buffer_to_(s,c)
+#ifdef _Outptr_result_bytebuffer_to_
+#undef _Outptr_result_bytebuffer_to_
+#endif
+#define _Outptr_result_bytebuffer_to_(s,c)
+#ifdef _Outptr_opt_result_buffer_to_
+#undef _Outptr_opt_result_buffer_to_
+#endif
+#define _Outptr_opt_result_buffer_to_(s,c)
+#ifdef _Outptr_opt_result_bytebuffer_to_
+#undef _Outptr_opt_result_bytebuffer_to_
+#endif
+#define _Outptr_opt_result_bytebuffer_to_(s,c)
+#ifdef _Ret_
+#undef _Ret_
+#endif
+#define _Ret_
+#ifdef _Ret_valid_
+#undef _Ret_valid_
+#endif
+#define _Ret_valid_
+#ifdef _Ret_z_
+#undef _Ret_z_
+#endif
+#define _Ret_z_
+#ifdef _Ret_writes_
+#undef _Ret_writes_
+#endif
+#define _Ret_writes_(s)
+#ifdef _Ret_writes_bytes_
+#undef _Ret_writes_bytes_
+#endif
+#define _Ret_writes_bytes_(s)
+#ifdef _Ret_writes_z_
+#undef _Ret_writes_z_
+#endif
+#define _Ret_writes_z_(s)
+#ifdef _Ret_writes_to_
+#undef _Ret_writes_to_
+#endif
+#define _Ret_writes_to_(s,c)
+#ifdef _Ret_writes_bytes_to_
+#undef _Ret_writes_bytes_to_
+#endif
+#define _Ret_writes_bytes_to_(s,c)
+#ifdef _Ret_writes_to_ptr_
+#undef _Ret_writes_to_ptr_
+#endif
+#define _Ret_writes_to_ptr_(p)
+#ifdef _Ret_writes_to_ptr_z_
+#undef _Ret_writes_to_ptr_z_
+#endif
+#define _Ret_writes_to_ptr_z_(p)
+#ifdef _Ret_writes_maybenull_
+#undef _Ret_writes_maybenull_
+#endif
+#define _Ret_writes_maybenull_(s)
+#ifdef _Ret_writes_bytes_maybenull_
+#undef _Ret_writes_bytes_maybenull_
+#endif
+#define _Ret_writes_bytes_maybenull_(s)
+#ifdef _Ret_writes_to_maybenull_
+#undef _Ret_writes_to_maybenull_
+#endif
+#define _Ret_writes_to_maybenull_(s,c)
+#ifdef _Ret_writes_bytes_to_maybenull_
+#undef _Ret_writes_bytes_to_maybenull_
+#endif
+#define _Ret_writes_bytes_to_maybenull_(s,c)
+#ifdef _Ret_writes_maybenull_z_
+#undef _Ret_writes_maybenull_z_
+#endif
+#define _Ret_writes_maybenull_z_(s)
+#ifdef _Ret_null_
+#undef _Ret_null_
+#endif
+#define _Ret_null_
+#ifdef _Ret_notnull_
+#undef _Ret_notnull_
+#endif
+#define _Ret_notnull_
+#ifdef _Ret_maybenull_
+#undef _Ret_maybenull_
+#endif
+#define _Ret_maybenull_
+#ifdef _Ret_maybenull_z_
+#undef _Ret_maybenull_z_
+#endif
+#define _Ret_maybenull_z_
+#ifdef _Field_size_
+#undef _Field_size_
+#endif
+#define _Field_size_(s)
+#ifdef _Field_size_opt_
+#undef _Field_size_opt_
+#endif
+#define _Field_size_opt_(s)
+#ifdef _Field_size_bytes_
+#undef _Field_size_bytes_
+#endif
+#define _Field_size_bytes_(s)
+#ifdef _Field_size_bytes_opt_
+#undef _Field_size_bytes_opt_
+#endif
+#define _Field_size_bytes_opt_(s)
+#ifdef _Field_size_part_
+#undef _Field_size_part_
+#endif
+#define _Field_size_part_(s,c)
+#ifdef _Field_size_part_opt_
+#undef _Field_size_part_opt_
+#endif
+#define _Field_size_part_opt_(s,c)
+#ifdef _Field_size_bytes_part_
+#undef _Field_size_bytes_part_
+#endif
+#define _Field_size_bytes_part_(s,c)
+#ifdef _Field_size_bytes_part_opt_
+#undef _Field_size_bytes_part_opt_
+#endif
+#define _Field_size_bytes_part_opt_(s,c)
+#ifdef _Field_size_full_
+#undef _Field_size_full_
+#endif
+#define _Field_size_full_(s)
+#ifdef _Field_size_full_opt_
+#undef _Field_size_full_opt_
+#endif
+#define _Field_size_full_opt_(s)
+#ifdef _Field_size_bytes_full_
+#undef _Field_size_bytes_full_
+#endif
+#define _Field_size_bytes_full_(s)
+#ifdef _Field_size_bytes_full_opt_
+#undef _Field_size_bytes_full_opt_
+#endif
+#define _Field_size_bytes_full_opt_(s)
+#ifdef _Printf_format_string_
+#undef _Printf_format_string_
+#endif
+#define _Printf_format_string_
+#ifdef _Scanf_format_string_
+#undef _Scanf_format_string_
+#endif
+#define _Scanf_format_string_
+#ifdef _Scanf_s_format_string_
+#undef _Scanf_s_format_string_
+#endif
+#define _Scanf_s_format_string_
+#ifdef _Printf_format_string_params_
+#undef _Printf_format_string_params_
+#endif
+#define _Printf_format_string_params_(x)
+#ifdef _Scanf_format_string_params_
+#undef _Scanf_format_string_params_
+#endif
+#define _Scanf_format_string_params_(x)
+#ifdef _Scanf_s_format_string_params_
+#undef _Scanf_s_format_string_params_
+#endif
+#define _Scanf_s_format_string_params_(x)
+#ifdef _In_range_
+#undef _In_range_
+#endif
+#define _In_range_(l,h)
+#ifdef _Out_range_
+#undef _Out_range_
+#endif
+#define _Out_range_(l,h)
+#ifdef _Ret_range_
+#undef _Ret_range_
+#endif
+#define _Ret_range_(l,h)
+#ifdef _Deref_in_range_
+#undef _Deref_in_range_
+#endif
+#define _Deref_in_range_(l,h)
+#ifdef _Deref_out_range_
+#undef _Deref_out_range_
+#endif
+#define _Deref_out_range_(l,h)
+#ifdef _Deref_inout_range_
+#undef _Deref_inout_range_
+#endif
+#define _Deref_inout_range_(l,h)
+#ifdef _Field_range_
+#undef _Field_range_
+#endif
+#define _Field_range_(l,h)
+#ifdef _Pre_equal_to_
+#undef _Pre_equal_to_
+#endif
+#define _Pre_equal_to_(e)
+#ifdef _Post_equal_to_
+#undef _Post_equal_to_
+#endif
+#define _Post_equal_to_(e)
+#ifdef _Struct_size_bytes_
+#undef _Struct_size_bytes_
+#endif
+#define _Struct_size_bytes_(s)
+#ifdef _Analysis_assume_
+#undef _Analysis_assume_
+#endif
+#define _Analysis_assume_
+#ifdef _Analysis_mode_
+#undef _Analysis_mode_
+#endif
+#define _Analysis_mode_(m)
+#ifdef _Analysis_noreturn_
+#undef _Analysis_noreturn_
+#endif
+#define _Analysis_noreturn_
+#ifdef _Raises_SEH_exception_
+#undef _Raises_SEH_exception_
+#endif
+#define _Raises_SEH_exception_
+#ifdef _Maybe_raises_SEH_exception_
+#undef _Maybe_raises_SEH_exception_
+#endif
+#define _Maybe_raises_SEH_exception_
+#ifdef _Function_class_
+#undef _Function_class_
+#endif
+#define _Function_class_(n)
+#ifdef _Literal_
+#undef _Literal_
+#endif
+#define _Literal_
+#ifdef _Notliteral_
+#undef _Notliteral_
+#endif
+#define _Notliteral_
+#ifdef _Enum_is_bitflag_
+#undef _Enum_is_bitflag_
+#endif
+#define _Enum_is_bitflag_
+#ifdef _Strict_type_match_
+#undef _Strict_type_match_
+#endif
+#define _Strict_type_match_
+#ifdef _Points_to_data_
+#undef _Points_to_data_
+#endif
+#define _Points_to_data_
+#ifdef _Interlocked_operand_
+#undef _Interlocked_operand_
+#endif
+#define _Interlocked_operand_
+#ifdef _IRQL_raises_
+#undef _IRQL_raises_
+#endif
+#define _IRQL_raises_(i)
+#ifdef _IRQL_requires_
+#undef _IRQL_requires_
+#endif
+#define _IRQL_requires_(i)
+#ifdef _IRQL_requires_max_
+#undef _IRQL_requires_max_
+#endif
+#define _IRQL_requires_max_(i)
+#ifdef _IRQL_requires_min_
+#undef _IRQL_requires_min_
+#endif
+#define _IRQL_requires_min_(i)
+#ifdef _IRQL_saves_
+#undef _IRQL_saves_
+#endif
+#define _IRQL_saves_
+#ifdef _IRQL_saves_global_
+#undef _IRQL_saves_global_
+#endif
+#define _IRQL_saves_global_(k,s)
+#ifdef _IRQL_restores_
+#undef _IRQL_restores_
+#endif
+#define _IRQL_restores_
+#ifdef _IRQL_restores_global_
+#undef _IRQL_restores_global_
+#endif
+#define _IRQL_restores_global_(k,s)
+#ifdef _IRQL_always_function_min_
+#undef _IRQL_always_function_min_
+#endif
+#define _IRQL_always_function_min_(i)
+#ifdef _IRQL_always_function_max_
+#undef _IRQL_always_function_max_
+#endif
+#define _IRQL_always_function_max_(i)
+#ifdef _IRQL_requires_same_
+#undef _IRQL_requires_same_
+#endif
+#define _IRQL_requires_same_
+#ifdef _IRQL_uses_cancel_
+#undef _IRQL_uses_cancel_
+#endif
+#define _IRQL_uses_cancel_
+#ifdef _IRQL_is_cancel_
+#undef _IRQL_is_cancel_
+#endif
+#define _IRQL_is_cancel_
+#ifdef _Kernel_float_saved_
+#undef _Kernel_float_saved_
+#endif
+#define _Kernel_float_saved_
+#ifdef _Kernel_float_restored_
+#undef _Kernel_float_restored_
+#endif
+#define _Kernel_float_restored_
+#ifdef _Kernel_float_used_
+#undef _Kernel_float_used_
+#endif
+#define _Kernel_float_used_
+#ifdef _Kernel_acquires_resource_
+#undef _Kernel_acquires_resource_
+#endif
+#define _Kernel_acquires_resource_(k)
+#ifdef _Kernel_releases_resource_
+#undef _Kernel_releases_resource_
+#endif
+#define _Kernel_releases_resource_(k)
+#ifdef _Kernel_requires_resource_held_
+#undef _Kernel_requires_resource_held_
+#endif
+#define _Kernel_requires_resource_held_(k)
+#ifdef _Kernel_requires_resource_not_held_
+#undef _Kernel_requires_resource_not_held_
+#endif
+#define _Kernel_requires_resource_not_held_(k)
+#ifdef _Kernel_clear_do_init_
+#undef _Kernel_clear_do_init_
+#endif
+#define _Kernel_clear_do_init_(yn)
+#ifdef _Kernel_IoGetDmaAdapter_
+#undef _Kernel_IoGetDmaAdapter_
+#endif
+#define _Kernel_IoGetDmaAdapter_
+#ifdef _Outref_
+#undef _Outref_
+#endif
+#define _Outref_
+#ifdef _Outref_result_maybenull_
+#undef _Outref_result_maybenull_
+#endif
+#define _Outref_result_maybenull_
+#ifdef _Outref_result_buffer_
+#undef _Outref_result_buffer_
+#endif
+#define _Outref_result_buffer_(s)
+#ifdef _Outref_result_bytebuffer_
+#undef _Outref_result_bytebuffer_
+#endif
+#define _Outref_result_bytebuffer_(s)
+#ifdef _Outref_result_buffer_to_
+#undef _Outref_result_buffer_to_
+#endif
+#define _Outref_result_buffer_to_(s,c)
+#ifdef _Outref_result_bytebuffer_to_
+#undef _Outref_result_bytebuffer_to_
+#endif
+#define _Outref_result_bytebuffer_to_(s,c)
+#ifdef _Outref_result_buffer_all_
+#undef _Outref_result_buffer_all_
+#endif
+#define _Outref_result_buffer_all_(s)
+#ifdef _Outref_result_bytebuffer_all_
+#undef _Outref_result_bytebuffer_all_
+#endif
+#define _Outref_result_bytebuffer_all_(s)
+#ifdef _Outref_result_buffer_maybenull_
+#undef _Outref_result_buffer_maybenull_
+#endif
+#define _Outref_result_buffer_maybenull_(s)
+#ifdef _Outref_result_bytebuffer_maybenull_
+#undef _Outref_result_bytebuffer_maybenull_
+#endif
+#define _Outref_result_bytebuffer_maybenull_(s)
+#ifdef _Outref_result_buffer_to_maybenull_
+#undef _Outref_result_buffer_to_maybenull_
+#endif
+#define _Outref_result_buffer_to_maybenull_(s,c)
+#ifdef _Outref_result_bytebuffer_to_maybenull_
+#undef _Outref_result_bytebuffer_to_maybenull_
+#endif
+#define _Outref_result_bytebuffer_to_maybenull_(s,c)
+#ifdef _Outref_result_buffer_all_maybenull_
+#undef _Outref_result_buffer_all_maybenull_
+#endif
+#define _Outref_result_buffer_all_maybenull_(s)
+#ifdef _Outref_result_bytebuffer_all_maybenull_
+#undef _Outref_result_bytebuffer_all_maybenull_
+#endif
+#define _Outref_result_bytebuffer_all_maybenull_(s)
+#ifdef _In_defensive_
+#undef _In_defensive_
+#endif
+#define _In_defensive_(a)
+#ifdef _Out_defensive_
+#undef _Out_defensive_
+#endif
+#define _Out_defensive_(a)
+#ifdef _Inout_defensive_
+#undef _Inout_defensive_
+#endif
+#define _Inout_defensive_(a)
+#ifdef _Outptr_result_nullonfailure_
+#undef _Outptr_result_nullonfailure_
+#endif
+#define _Outptr_result_nullonfailure_
+#ifdef _Outptr_opt_result_nullonfailure_
+#undef _Outptr_opt_result_nullonfailure_
+#endif
+#define _Outptr_opt_result_nullonfailure_
+#ifdef _Outref_result_nullonfailure_
+#undef _Outref_result_nullonfailure_
+#endif
+#define _Outref_result_nullonfailure_
+#ifdef _Result_nullonfailure_
+#undef _Result_nullonfailure_
+#endif
+#define _Result_nullonfailure_
+#ifdef _Result_zeroonfailure_
+#undef _Result_zeroonfailure_
+#endif
+#define _Result_zeroonfailure_
+#ifdef _Acquires_nonreentrant_lock_
+#undef _Acquires_nonreentrant_lock_
+#endif
+#define _Acquires_nonreentrant_lock_(e)
+#ifdef _Releases_nonreentrant_lock_
+#undef _Releases_nonreentrant_lock_
+#endif
+#define _Releases_nonreentrant_lock_(e)
+#ifdef _Function_ignore_lock_checking_
+#undef _Function_ignore_lock_checking_
+#endif
+#define _Function_ignore_lock_checking_(e)
+#ifdef _Analysis_suppress_lock_checking_
+#undef _Analysis_suppress_lock_checking_
+#endif
+#define _Analysis_suppress_lock_checking_(e)
+#undef _Reserved_
+#define _Reserved_ _Pre_equal_to_(0) _Pre_ _Null_
+#undef _Pre_z_
+#define _Pre_z_ _Pre_ _Null_terminated_
+#undef _Post_z_
+#define _Post_z_ _Post_ _Null_terminated_
+#undef _Prepost_z_
+#define _Prepost_z_ _Pre_z_ _Post_z_
+#undef _Pre_null_
+#define _Pre_null_ _Pre_ _Null_
+#undef _Pre_maybenull_
+#define _Pre_maybenull_ _Pre_ _Maybenull_
+#undef _Pre_notnull_
+#define _Pre_notnull_ _Pre_ _Notnull_
+#undef _Pre_valid_
+#define _Pre_valid_ _Pre_notnull_ _Pre_ _Valid_
+#undef _Pre_opt_valid_
+#define _Pre_opt_valid_ _Pre_maybenull_ _Pre_ _Valid_
+#undef _Post_valid_
+#define _Post_valid_ _Post_ _Valid_
+#undef _Post_invalid_
+#define _Post_invalid_ _Post_ _Deref_ _Notvalid_
+#undef _Post_ptr_invalid_
+#define _Post_ptr_invalid_ _Post_ _Notvalid_
+#undef _Pre_readable_size_
+#define _Pre_readable_size_(s) _Pre_ _Readable_elements_(s) _Pre_ _Valid_
+#undef _Pre_writable_size_
+#define _Pre_writable_size_(s) _Pre_ _Writable_elements_(s)
+#undef _Pre_readable_byte_size_
+#define _Pre_readable_byte_size_(s) _Pre_ _Readable_bytes_(s) _Pre_ _Valid_
+#undef _Pre_writable_byte_size_
+#define _Pre_writable_byte_size_(s) _Pre_ _Writable_bytes_(s)
+#undef _Post_readable_size_
+#define _Post_readable_size_(s) _Post_ _Readable_elements_(s) _Post_ _Valid_
+#undef _Post_writable_size_
+#define _Post_writable_size_(s) _Post_ _Writable_elements_(s)
+#undef _Post_readable_byte_size_
+#define _Post_readable_byte_size_(s) _Post_ _Readable_bytes_(s) _Post_ _Valid_
+#undef _Post_writable_byte_size_
+#define _Post_writable_byte_size_(s) _Post_ _Writable_bytes_(s)
+
+#endif /* _NO_SAL_2_H_ */
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/sal.h b/Minecraft.Client/PS3/PS3Extras/DirectX/sal.h
new file mode 100644
index 00000000..3576d7ed
--- /dev/null
+++ b/Minecraft.Client/PS3/PS3Extras/DirectX/sal.h
@@ -0,0 +1,1998 @@
+/***
+*sal.h - markers for documenting the semantics of APIs
+*
+* Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*Purpose:
+* sal.h provides a set of annotations to describe how a function uses its
+* parameters - the assumptions it makes about them, and the guarantees it makes
+* upon finishing.
+*
+* [Public]
+*
+****/
+
+#pragma once
+/*==========================================================================
+
+ The macros are defined in 3 layers:
+
+ _In_\_Out_ Layer:
+ ----------------
+ This layer provides the highest abstraction and its macros should be used
+ in most cases. Its macros start with _In_, _Out_ or _Inout_. For the
+ typical case they provide the most concise annotations.
+
+ _Pre_\_Post_ Layer:
+ ------------------
+ The macros of this layer only should be used when there is no suitable macro
+ in the _In_\_Out_ layer. Its macros start with _Pre_, _Post_, _Ret_,
+ _Deref_pre_ _Deref_post_ and _Deref_ret_. This layer provides the most
+ flexibility for annotations.
+
+ Implementation Abstraction Layer:
+ --------------------------------
+ Macros from this layer should never be used directly. The layer only exists
+ to hide the implementation of the annotation macros.
+
+
+ Annotation Syntax:
+ |--------------|----------|----------------|-----------------------------|
+ | Usage | Nullness | ZeroTerminated | Extent |
+ |--------------|----------|----------------|-----------------------------|
+ | _In_ | <> | <> | <> |
+ | _Out_ | opt_ | z_ | [byte]cap_[c_|x_]( size ) |
+ | _Inout_ | | | [byte]count_[c_|x_]( size ) |
+ | _Deref_out_ | | | ptrdiff_cap_( ptr ) |
+ |--------------| | | ptrdiff_count_( ptr ) |
+ | _Ret_ | | | |
+ | _Deref_ret_ | | | |
+ |--------------| | | |
+ | _Pre_ | | | |
+ | _Post_ | | | |
+ | _Deref_pre_ | | | |
+ | _Deref_post_ | | | |
+ |--------------|----------|----------------|-----------------------------|
+
+ Usage:
+ -----
+ _In_, _Out_, _Inout_, _Pre_, _Post_, _Deref_pre_, _Deref_post_ are for
+ formal parameters.
+ _Ret_, _Deref_ret_ must be used for return values.
+
+ Nullness:
+ --------
+ If the pointer can be NULL the annotation contains _opt. If the macro
+ does not contain '_opt' the pointer may not be NULL.
+
+ String Type:
+ -----------
+ _z: NullTerminated string
+ for _In_ parameters the buffer must have the specified stringtype before the call
+ for _Out_ parameters the buffer must have the specified stringtype after the call
+ for _Inout_ parameters both conditions apply
+
+ Extent Syntax:
+ |------|---------------|---------------|
+ | Unit | Writ\Readable | Argument Type |
+ |------|---------------|---------------|
+ | <> | cap_ | <> |
+ | byte | count_ | c_ |
+ | | | x_ |
+ |------|---------------|---------------|
+
+ 'cap' (capacity) describes the writable size of the buffer and is typically used
+ with _Out_. The default unit is elements. Use 'bytecap' if the size is given in bytes
+ 'count' describes the readable size of the buffer and is typically used with _In_.
+ The default unit is elements. Use 'bytecount' if the size is given in bytes.
+
+ Argument syntax for cap_, bytecap_, count_, bytecount_:
+ (<parameter>|return)[+n] e.g. cch, return, cb+2
+
+ If the buffer size is a constant expression use the c_ postfix.
+ E.g. cap_c_(20), count_c_(MAX_PATH), bytecount_c_(16)
+
+ If the buffer size is given by a limiting pointer use the ptrdiff_ versions
+ of the macros.
+
+ If the buffer size is neither a parameter nor a constant expression use the x_
+ postfix. e.g. bytecount_x_(num*size) x_ annotations accept any arbitrary string.
+ No analysis can be done for x_ annotations but they at least tell the tool that
+ the buffer has some sort of extent description. x_ annotations might be supported
+ by future compiler versions.
+
+============================================================================*/
+
+#define __ATTR_SAL
+
+#ifdef _PREFAST_
+// choose attribute or __declspec implementation
+#ifndef _USE_DECLSPECS_FOR_SAL
+#define _USE_DECLSPECS_FOR_SAL 0
+#endif
+
+#if _USE_DECLSPECS_FOR_SAL
+#undef _USE_ATTRIBUTES_FOR_SAL
+#define _USE_ATTRIBUTES_FOR_SAL 0
+#elif !defined(_USE_ATTRIBUTES_FOR_SAL)
+#if _MSC_VER >= 1500
+#define _USE_ATTRIBUTES_FOR_SAL 1
+#else
+#define _USE_ATTRIBUTES_FOR_SAL 0
+#endif // if _MSC_VER >= 1400
+#endif // if _USE_DECLSPECS_FOR_SAL
+
+
+#if !_USE_DECLSPECS_FOR_SAL
+#if !_USE_ATTRIBUTES_FOR_SAL
+#if _MSC_VER >= 1500
+#undef _USE_ATTRIBUTES_FOR_SAL
+#define _USE_ATTRIBUTES_FOR_SAL 1
+#else
+#undef _USE_DECLSPECS_FOR_SAL
+#define _USE_DECLSPECS_FOR_SAL 1
+#endif // _MSC_VER >= 1400
+#endif // !_USE_ATTRIBUTES_FOR_SAL
+#endif // !_USE_DECLSPECS_FOR_SAL
+
+#endif // #ifdef _PREFAST_
+
+// Disable expansion of SAL macros in non-Prefast mode to
+// improve compiler throughput.
+#ifndef _USE_DECLSPECS_FOR_SAL
+#define _USE_DECLSPECS_FOR_SAL 0
+#endif
+#ifndef _USE_ATTRIBUTES_FOR_SAL
+#define _USE_ATTRIBUTES_FOR_SAL 0
+#endif
+
+// safeguard for MIDL and RC builds
+#if _USE_DECLSPECS_FOR_SAL && ( defined( MIDL_PASS ) || defined(__midl) || defined(RC_INVOKED) || !defined(_PREFAST_) )
+#undef _USE_DECLSPECS_FOR_SAL
+#define _USE_DECLSPECS_FOR_SAL 0
+#endif
+#if _USE_ATTRIBUTES_FOR_SAL && ( !defined(_MSC_EXTENSIONS) || defined( MIDL_PASS ) || defined(__midl) || defined(RC_INVOKED) )
+#undef _USE_ATTRIBUTES_FOR_SAL
+#define _USE_ATTRIBUTES_FOR_SAL 0
+#endif
+
+#if defined(_MSC_EXTENSIONS) && !defined( MIDL_PASS ) && !defined(__midl) && !defined(RC_INVOKED)
+#include "codeanalysis\sourceannotations.h"
+#endif
+
+//============================================================================
+// _In_\_Out_ Layer:
+//============================================================================
+
+// 'in' parameters --------------------------
+
+// input pointer parameter
+// e.g. void SetPoint( _In_ const POINT* pPT );
+#define _In_ _Pre1_impl_(_$notnull) _Deref_pre2_impl_(_$valid, _$readaccess)
+#define _In_opt_ _Pre_opt_valid_ _Deref_pre_readonly_
+
+// nullterminated 'in' parameters.
+// e.g. void CopyStr( _In_z_ const char* szFrom, _Out_z_cap_(cchTo) char* szTo, size_t cchTo );
+#define _In_z_ _Pre_z_ _Deref_pre_readonly_
+#define _In_opt_z_ _Pre_opt_z_ _Deref_pre_readonly_
+
+// 'input' buffers with given size
+
+// e.g. void SetCharRange( _In_count_(cch) const char* rgch, size_t cch )
+// valid buffer extent described by another parameter
+#define _In_count_(size) _Pre_count_(size) _Deref_pre_readonly_
+#define _In_opt_count_(size) _Pre_opt_count_(size) _Deref_pre_readonly_
+#define _In_bytecount_(size) _Pre_bytecount_(size) _Deref_pre_readonly_
+#define _In_opt_bytecount_(size) _Pre_opt_bytecount_(size) _Deref_pre_readonly_
+
+// valid buffer extent described by a constant extression
+#define _In_count_c_(size) _Pre_count_c_(size) _Deref_pre_readonly_
+#define _In_opt_count_c_(size) _Pre_opt_count_c_(size) _Deref_pre_readonly_
+#define _In_bytecount_c_(size) _Pre_bytecount_c_(size) _Deref_pre_readonly_
+#define _In_opt_bytecount_c_(size) _Pre_opt_bytecount_c_(size) _Deref_pre_readonly_
+
+// nullterminated 'input' buffers with given size
+
+// e.g. void SetCharRange( _In_count_(cch) const char* rgch, size_t cch )
+// nullterminated valid buffer extent described by another parameter
+#define _In_z_count_(size) _Pre_z_ _Pre_count_(size) _Deref_pre_readonly_
+#define _In_opt_z_count_(size) _Pre_opt_z_ _Pre_opt_count_(size) _Deref_pre_readonly_
+#define _In_z_bytecount_(size) _Pre_z_ _Pre_bytecount_(size) _Deref_pre_readonly_
+#define _In_opt_z_bytecount_(size) _Pre_opt_z_ _Pre_opt_bytecount_(size) _Deref_pre_readonly_
+
+// nullterminated valid buffer extent described by a constant extression
+#define _In_z_count_c_(size) _Pre_z_ _Pre_count_c_(size) _Deref_pre_readonly_
+#define _In_opt_z_count_c_(size) _Pre_opt_z_ _Pre_opt_count_c_(size) _Deref_pre_readonly_
+#define _In_z_bytecount_c_(size) _Pre_z_ _Pre_bytecount_c_(size) _Deref_pre_readonly_
+#define _In_opt_z_bytecount_c_(size) _Pre_opt_z_ _Pre_opt_bytecount_c_(size) _Deref_pre_readonly_
+
+// buffer capacity is described by another pointer
+// e.g. void Foo( _In_ptrdiff_count_(pchMax) const char* pch, const char* pchMax ) { while pch < pchMax ) pch++; }
+#define _In_ptrdiff_count_(size) _Pre_ptrdiff_count_(size) _Deref_pre_readonly_
+#define _In_opt_ptrdiff_count_(size) _Pre_opt_ptrdiff_count_(size) _Deref_pre_readonly_
+
+// 'x' version for complex expressions that are not supported by the current compiler version
+// e.g. void Set3ColMatrix( _In_count_x_(3*cRows) const Elem* matrix, int cRows );
+#define _In_count_x_(size) _Pre_count_x_(size) _Deref_pre_readonly_
+#define _In_opt_count_x_(size) _Pre_opt_count_x_(size) _Deref_pre_readonly_
+#define _In_bytecount_x_(size) _Pre_bytecount_x_(size) _Deref_pre_readonly_
+#define _In_opt_bytecount_x_(size) _Pre_opt_bytecount_x_(size) _Deref_pre_readonly_
+
+// 'out' parameters --------------------------
+
+// output pointer parameter
+// e.g. void GetPoint( _Out_ POINT* pPT );
+#define _Out_ _Pre_cap_c_(1) _Pre_invalid_
+#define _Out_opt_ _Pre_opt_cap_c_(1) _Pre_invalid_
+
+// 'out' with buffer size
+// e.g. void GetIndeces( _Out_cap_(cIndeces) int* rgIndeces, size_t cIndices );
+// buffer capacity is described by another parameter
+#define _Out_cap_(size) _Pre_cap_(size) _Pre_invalid_
+#define _Out_opt_cap_(size) _Pre_opt_cap_(size) _Pre_invalid_
+#define _Out_bytecap_(size) _Pre_bytecap_(size) _Pre_invalid_
+#define _Out_opt_bytecap_(size) _Pre_opt_bytecap_(size) _Pre_invalid_
+
+// buffer capacity is described by a constant expression
+#define _Out_cap_c_(size) _Pre_cap_c_(size) _Pre_invalid_
+#define _Out_opt_cap_c_(size) _Pre_opt_cap_c_(size) _Pre_invalid_
+#define _Out_bytecap_c_(size) _Pre_bytecap_c_(size) _Pre_invalid_
+#define _Out_opt_bytecap_c_(size) _Pre_opt_bytecap_c_(size) _Pre_invalid_
+
+// buffer capacity is described by another parameter multiplied by a constant expression
+#define _Out_cap_m_(mult,size) _Pre_cap_m_(mult,size) _Pre_invalid_
+#define _Out_opt_cap_m_(mult,size) _Pre_opt_cap_m_(mult,size) _Pre_invalid_
+#define _Out_z_cap_m_(mult,size) _Pre_cap_m_(mult,size) _Pre_invalid_ _Post_z_
+#define _Out_opt_z_cap_m_(mult,size) _Pre_opt_cap_m_(mult,size) _Pre_invalid_ _Post_z_
+
+// buffer capacity is described by another pointer
+// e.g. void Foo( _Out_ptrdiff_cap_(pchMax) char* pch, const char* pchMax ) { while pch < pchMax ) pch++; }
+#define _Out_ptrdiff_cap_(size) _Pre_ptrdiff_cap_(size) _Pre_invalid_
+#define _Out_opt_ptrdiff_cap_(size) _Pre_opt_ptrdiff_cap_(size) _Pre_invalid_
+
+// buffer capacity is described by a complex expression
+#define _Out_cap_x_(size) _Pre_cap_x_(size) _Pre_invalid_
+#define _Out_opt_cap_x_(size) _Pre_opt_cap_x_(size) _Pre_invalid_
+#define _Out_bytecap_x_(size) _Pre_bytecap_x_(size) _Pre_invalid_
+#define _Out_opt_bytecap_x_(size) _Pre_opt_bytecap_x_(size) _Pre_invalid_
+
+// a zero terminated string is filled into a buffer of given capacity
+// e.g. void CopyStr( _In_z_ const char* szFrom, _Out_z_cap_(cchTo) char* szTo, size_t cchTo );
+// buffer capacity is described by another parameter
+#define _Out_z_cap_(size) _Pre_cap_(size) _Pre_invalid_ _Post_z_
+#define _Out_opt_z_cap_(size) _Pre_opt_cap_(size) _Pre_invalid_ _Post_z_
+#define _Out_z_bytecap_(size) _Pre_bytecap_(size) _Pre_invalid_ _Post_z_
+#define _Out_opt_z_bytecap_(size) _Pre_opt_bytecap_(size) _Pre_invalid_ _Post_z_
+
+// buffer capacity is described by a constant expression
+#define _Out_z_cap_c_(size) _Pre_cap_c_(size) _Pre_invalid_ _Post_z_
+#define _Out_opt_z_cap_c_(size) _Pre_opt_cap_c_(size) _Pre_invalid_ _Post_z_
+#define _Out_z_bytecap_c_(size) _Pre_bytecap_c_(size) _Pre_invalid_ _Post_z_
+#define _Out_opt_z_bytecap_c_(size) _Pre_opt_bytecap_c_(size) _Pre_invalid_ _Post_z_
+
+// buffer capacity is described by a complex expression
+#define _Out_z_cap_x_(size) _Pre_cap_x_(size) _Pre_invalid_ _Post_z_
+#define _Out_opt_z_cap_x_(size) _Pre_opt_cap_x_(size) _Pre_invalid_ _Post_z_
+#define _Out_z_bytecap_x_(size) _Pre_bytecap_x_(size) _Pre_invalid_ _Post_z_
+#define _Out_opt_z_bytecap_x_(size) _Pre_opt_bytecap_x_(size) _Pre_invalid_ _Post_z_
+
+// a zero terminated string is filled into a buffer of given capacity
+// e.g. size_t CopyCharRange( _In_count_(cchFrom) const char* rgchFrom, size_t cchFrom, _Out_cap_post_count_(cchTo,return)) char* rgchTo, size_t cchTo );
+#define _Out_cap_post_count_(cap,count) _Pre_cap_(cap) _Pre_invalid_ _Post_count_(count)
+#define _Out_opt_cap_post_count_(cap,count) _Pre_opt_cap_(cap) _Pre_invalid_ _Post_count_(count)
+#define _Out_bytecap_post_bytecount_(cap,count) _Pre_bytecap_(cap) _Pre_invalid_ _Post_bytecount_(count)
+#define _Out_opt_bytecap_post_bytecount_(cap,count) _Pre_opt_bytecap_(cap) _Pre_invalid_ _Post_bytecount_(count)
+
+// a zero terminated string is filled into a buffer of given capacity
+// e.g. size_t CopyStr( _In_z_ const char* szFrom, _Out_z_cap_post_count_(cchTo,return+1) char* szTo, size_t cchTo );
+#define _Out_z_cap_post_count_(cap,count) _Pre_cap_(cap) _Pre_invalid_ _Post_z_count_(count)
+#define _Out_opt_z_cap_post_count_(cap,count) _Pre_opt_cap_(cap) _Pre_invalid_ _Post_z_count_(count)
+#define _Out_z_bytecap_post_bytecount_(cap,count) _Pre_bytecap_(cap) _Pre_invalid_ _Post_z_bytecount_(count)
+#define _Out_opt_z_bytecap_post_bytecount_(cap,count) _Pre_opt_bytecap_(cap) _Pre_invalid_ _Post_z_bytecount_(count)
+
+// only use with dereferenced arguments e.g. '*pcch'
+#define _Out_capcount_(capcount) _Pre_cap_(capcount) _Pre_invalid_ _Post_count_(capcount)
+#define _Out_opt_capcount_(capcount) _Pre_opt_cap_(capcount) _Pre_invalid_ _Post_count_(capcount)
+#define _Out_bytecapcount_(capcount) _Pre_bytecap_(capcount) _Pre_invalid_ _Post_bytecount_(capcount)
+#define _Out_opt_bytecapcount_(capcount) _Pre_opt_bytecap_(capcount) _Pre_invalid_ _Post_bytecount_(capcount)
+
+#define _Out_capcount_x_(capcount) _Pre_cap_x_(capcount) _Pre_invalid_ _Post_count_x_(capcount)
+#define _Out_opt_capcount_x_(capcount) _Pre_opt_cap_x_(capcount) _Pre_invalid_ _Post_count_x_(capcount)
+#define _Out_bytecapcount_x_(capcount) _Pre_bytecap_x_(capcount) _Pre_invalid_ _Post_bytecount_x_(capcount)
+#define _Out_opt_bytecapcount_x_(capcount) _Pre_opt_bytecap_x_(capcount) _Pre_invalid_ _Post_bytecount_x_(capcount)
+
+// e.g. GetString( _Out_z_capcount_(*pLen+1) char* sz, size_t* pLen );
+#define _Out_z_capcount_(capcount) _Pre_cap_(capcount) _Pre_invalid_ _Post_z_count_(capcount)
+#define _Out_opt_z_capcount_(capcount) _Pre_opt_cap_(capcount) _Pre_invalid_ _Post_z_count_(capcount)
+#define _Out_z_bytecapcount_(capcount) _Pre_bytecap_(capcount) _Pre_invalid_ _Post_z_bytecount_(capcount)
+#define _Out_opt_z_bytecapcount_(capcount) _Pre_opt_bytecap_(capcount) _Pre_invalid_ _Post_z_bytecount_(capcount)
+
+// inout parameters ----------------------------
+
+// inout pointer parameter
+// e.g. void ModifyPoint( _Inout_ POINT* pPT );
+#define _Inout_ _Prepost_valid_
+#define _Inout_opt_ _Prepost_opt_valid_
+
+// string buffers
+// e.g. void toupper( _Inout_z_ char* sz );
+#define _Inout_z_ _Prepost_z_
+#define _Inout_opt_z_ _Prepost_opt_z_
+
+// 'inout' buffers with initialized elements before and after the call
+// e.g. void ModifyIndices( _Inout_count_(cIndices) int* rgIndeces, size_t cIndices );
+#define _Inout_count_(size) _Prepost_count_(size)
+#define _Inout_opt_count_(size) _Prepost_opt_count_(size)
+#define _Inout_bytecount_(size) _Prepost_bytecount_(size)
+#define _Inout_opt_bytecount_(size) _Prepost_opt_bytecount_(size)
+
+#define _Inout_count_c_(size) _Prepost_count_c_(size)
+#define _Inout_opt_count_c_(size) _Prepost_opt_count_c_(size)
+#define _Inout_bytecount_c_(size) _Prepost_bytecount_c_(size)
+#define _Inout_opt_bytecount_c_(size) _Prepost_opt_bytecount_c_(size)
+
+// nullterminated 'inout' buffers with initialized elements before and after the call
+// e.g. void ModifyIndices( _Inout_count_(cIndices) int* rgIndeces, size_t cIndices );
+#define _Inout_z_count_(size) _Prepost_z_ _Prepost_count_(size)
+#define _Inout_opt_z_count_(size) _Prepost_z_ _Prepost_opt_count_(size)
+#define _Inout_z_bytecount_(size) _Prepost_z_ _Prepost_bytecount_(size)
+#define _Inout_opt_z_bytecount_(size) _Prepost_z_ _Prepost_opt_bytecount_(size)
+
+#define _Inout_z_count_c_(size) _Prepost_z_ _Prepost_count_c_(size)
+#define _Inout_opt_z_count_c_(size) _Prepost_z_ _Prepost_opt_count_c_(size)
+#define _Inout_z_bytecount_c_(size) _Prepost_z_ _Prepost_bytecount_c_(size)
+#define _Inout_opt_z_bytecount_c_(size) _Prepost_z_ _Prepost_opt_bytecount_c_(size)
+
+#define _Inout_ptrdiff_count_(size) _Pre_ptrdiff_count_(size)
+#define _Inout_opt_ptrdiff_count_(size) _Pre_opt_ptrdiff_count_(size)
+
+#define _Inout_count_x_(size) _Prepost_count_x_(size)
+#define _Inout_opt_count_x_(size) _Prepost_opt_count_x_(size)
+#define _Inout_bytecount_x_(size) _Prepost_bytecount_x_(size)
+#define _Inout_opt_bytecount_x_(size) _Prepost_opt_bytecount_x_(size)
+
+// e.g. void AppendToLPSTR( _In_ LPCSTR szFrom, _Inout_cap_(cchTo) LPSTR* szTo, size_t cchTo );
+#define _Inout_cap_(size) _Pre_valid_cap_(size) _Post_valid_
+#define _Inout_opt_cap_(size) _Pre_opt_valid_cap_(size) _Post_valid_
+#define _Inout_bytecap_(size) _Pre_valid_bytecap_(size) _Post_valid_
+#define _Inout_opt_bytecap_(size) _Pre_opt_valid_bytecap_(size) _Post_valid_
+
+#define _Inout_cap_c_(size) _Pre_valid_cap_c_(size) _Post_valid_
+#define _Inout_opt_cap_c_(size) _Pre_opt_valid_cap_c_(size) _Post_valid_
+#define _Inout_bytecap_c_(size) _Pre_valid_bytecap_c_(size) _Post_valid_
+#define _Inout_opt_bytecap_c_(size) _Pre_opt_valid_bytecap_c_(size) _Post_valid_
+
+#define _Inout_cap_x_(size) _Pre_valid_cap_x_(size) _Post_valid_
+#define _Inout_opt_cap_x_(size) _Pre_opt_valid_cap_x_(size) _Post_valid_
+#define _Inout_bytecap_x_(size) _Pre_valid_bytecap_x_(size) _Post_valid_
+#define _Inout_opt_bytecap_x_(size) _Pre_opt_valid_bytecap_x_(size) _Post_valid_
+
+// inout string buffers with writable size
+// e.g. void AppendStr( _In_z_ const char* szFrom, _Inout_z_cap_(cchTo) char* szTo, size_t cchTo );
+#define _Inout_z_cap_(size) _Pre_z_cap_(size) _Post_z_
+#define _Inout_opt_z_cap_(size) _Pre_opt_z_cap_(size) _Post_z_
+#define _Inout_z_bytecap_(size) _Pre_z_bytecap_(size) _Post_z_
+#define _Inout_opt_z_bytecap_(size) _Pre_opt_z_bytecap_(size) _Post_z_
+
+#define _Inout_z_cap_c_(size) _Pre_z_cap_c_(size) _Post_z_
+#define _Inout_opt_z_cap_c_(size) _Pre_opt_z_cap_c_(size) _Post_z_
+#define _Inout_z_bytecap_c_(size) _Pre_z_bytecap_c_(size) _Post_z_
+#define _Inout_opt_z_bytecap_c_(size) _Pre_opt_z_bytecap_c_(size) _Post_z_
+
+#define _Inout_z_cap_x_(size) _Pre_z_cap_x_(size) _Post_z_
+#define _Inout_opt_z_cap_x_(size) _Pre_opt_z_cap_x_(size) _Post_z_
+#define _Inout_z_bytecap_x_(size) _Pre_z_bytecap_x_(size) _Post_z_
+#define _Inout_opt_z_bytecap_x_(size) _Pre_opt_z_bytecap_x_(size) _Post_z_
+
+// return values -------------------------------
+
+// returning pointers to valid objects
+#define _Ret_ _Ret_valid_
+#define _Ret_opt_ _Ret_opt_valid_
+
+// More _Ret_ annotations are defined below
+
+// Pointer to pointers -------------------------
+
+// e.g. HRESULT HrCreatePoint( _Deref_out_opt_ POINT** ppPT );
+#define _Deref_out_ _Out_ _Deref_pre_invalid_ _Deref_post_valid_
+#define _Deref_out_opt_ _Out_ _Deref_pre_invalid_ _Deref_post_opt_valid_
+#define _Deref_opt_out_ _Out_opt_ _Deref_pre_invalid_ _Deref_post_valid_
+#define _Deref_opt_out_opt_ _Out_opt_ _Deref_pre_invalid_ _Deref_post_opt_valid_
+
+// e.g. void CloneString( _In_z_ const wchar_t* wzFrom, _Deref_out_z_ wchar_t** pWzTo );
+#define _Deref_out_z_ _Out_ _Deref_pre_invalid_ _Deref_post_z_
+#define _Deref_out_opt_z_ _Out_ _Deref_pre_invalid_ _Deref_post_opt_z_
+#define _Deref_opt_out_z_ _Out_opt_ _Deref_pre_invalid_ _Deref_post_z_
+#define _Deref_opt_out_opt_z_ _Out_opt_ _Deref_pre_invalid_ _Deref_post_opt_z_
+
+// More _Deref_ annotations are defined below
+
+// Other annotations
+
+// Check the return value of a function e.g. _Check_return_ ErrorCode Foo();
+#define _Check_return_ _Check_return_impl_
+
+// e.g. MyPrintF( _Printf_format_string_ const wchar_t* wzFormat, ... );
+#define _Printf_format_string_ _Printf_format_string_impl_
+#define _Scanf_format_string_ _Scanf_format_string_impl_
+#define _Scanf_s_format_string_ _Scanf_s_format_string_impl_
+#define _FormatMessage_format_string_
+
+// <expr> indicates whether post conditions apply
+#define _Success_(expr) _Success_impl_(expr)
+
+// annotations to express 'boundedness' of integral value parameter
+#define _In_bound_ _In_bound_impl_
+#define _Out_bound_ _Out_bound_impl_
+#define _Ret_bound_ _Ret_bound_impl_
+#define _Deref_in_bound_ _Deref_in_bound_impl_
+#define _Deref_out_bound_ _Deref_out_bound_impl_
+#define _Deref_inout_bound_ _Deref_in_bound_ _Deref_out_bound_
+#define _Deref_ret_bound_ _Deref_ret_bound_impl_
+
+// annotations to express upper and lower bounds of integral value parameter
+#define _In_range_(lb,ub) _In_range_impl_(lb,ub)
+#define _Out_range_(lb,ub) _Out_range_impl_(lb,ub)
+#define _Ret_range_(lb,ub) _Ret_range_impl_(lb,ub)
+#define _Deref_in_range_(lb,ub) _Deref_in_range_impl_(lb,ub)
+#define _Deref_out_range_(lb,ub) _Deref_out_range_impl_(lb,ub)
+#define _Deref_ret_range_(lb,ub) _Deref_ret_range_impl_(lb,ub)
+
+//============================================================================
+// _Pre_\_Post_ Layer:
+//============================================================================
+
+//
+// _Pre_ annotation ---
+//
+// describing conditions that must be met before the call of the function
+
+// e.g. int strlen( _Pre_z_ const char* sz );
+// buffer is a zero terminated string
+#define _Pre_z_ _Pre2_impl_(_$notnull, _$zterm) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_z_ _Pre2_impl_(_$maybenull,_$zterm) _Deref_pre1_impl_(_$valid)
+
+// e.g. void FreeMemory( _Pre_bytecap_(cb) _Post_ptr_invalid_ void* pv, size_t cb );
+// buffer capacity described by another parameter
+#define _Pre_cap_(size) _Pre2_impl_(_$notnull, _$cap(size))
+#define _Pre_opt_cap_(size) _Pre2_impl_(_$maybenull,_$cap(size))
+#define _Pre_bytecap_(size) _Pre2_impl_(_$notnull, _$bytecap(size))
+#define _Pre_opt_bytecap_(size) _Pre2_impl_(_$maybenull,_$bytecap(size))
+
+// buffer capacity described by a constant expression
+#define _Pre_cap_c_(size) _Pre2_impl_(_$notnull, _$cap_c(size))
+#define _Pre_opt_cap_c_(size) _Pre2_impl_(_$maybenull,_$cap_c(size))
+#define _Pre_bytecap_c_(size) _Pre2_impl_(_$notnull, _$bytecap_c(size))
+#define _Pre_opt_bytecap_c_(size) _Pre2_impl_(_$maybenull,_$bytecap_c(size))
+
+// buffer capacity is described by another parameter multiplied by a constant expression
+#define _Pre_cap_m_(mult,size) _Pre2_impl_(_$notnull, _$mult(mult,size))
+#define _Pre_opt_cap_m_(mult,size) _Pre2_impl_(_$maybenull,_$mult(mult,size))
+
+// buffer capacity described by size of other buffer, only used by dangerous legacy APIs
+// e.g. int strcpy(_Pre_cap_for_(src) char* dst, const char* src);
+#define _Pre_cap_for_(param) _Pre2_impl_(_$notnull, _$cap_for(param))
+#define _Pre_opt_cap_for_(param) _Pre2_impl_(_$maybenull,_$cap_for(param))
+
+// buffer capacity described by a complex condition
+#define _Pre_cap_x_(size) _Pre2_impl_(_$notnull, _$cap_x(size))
+#define _Pre_opt_cap_x_(size) _Pre2_impl_(_$maybenull,_$cap_x(size))
+#define _Pre_bytecap_x_(size) _Pre2_impl_(_$notnull, _$bytecap_x(size))
+#define _Pre_opt_bytecap_x_(size) _Pre2_impl_(_$maybenull,_$bytecap_x(size))
+
+// buffer capacity described by the difference to another pointer parameter
+#define _Pre_ptrdiff_cap_(ptr) _Pre2_impl_(_$notnull, _$cap_x(__ptrdiff(ptr)))
+#define _Pre_opt_ptrdiff_cap_(ptr) _Pre2_impl_(_$maybenull,_$cap_x(__ptrdiff(ptr)))
+
+// e.g. void AppendStr( _Pre_z_ const char* szFrom, _Pre_z_cap_(cchTo) _Post_z_ char* szTo, size_t cchTo );
+#define _Pre_z_cap_(size) _Pre3_impl_(_$notnull, _$zterm,_$cap(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_z_cap_(size) _Pre3_impl_(_$maybenull,_$zterm,_$cap(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_z_bytecap_(size) _Pre3_impl_(_$notnull, _$zterm,_$bytecap(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_z_bytecap_(size) _Pre3_impl_(_$maybenull,_$zterm,_$bytecap(size)) _Deref_pre1_impl_(_$valid)
+
+#define _Pre_z_cap_c_(size) _Pre3_impl_(_$notnull, _$zterm,_$cap_c(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_z_cap_c_(size) _Pre3_impl_(_$maybenull,_$zterm,_$cap_c(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_z_bytecap_c_(size) _Pre3_impl_(_$notnull, _$zterm,_$bytecap_c(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_z_bytecap_c_(size) _Pre3_impl_(_$maybenull,_$zterm,_$bytecap_c(size)) _Deref_pre1_impl_(_$valid)
+
+#define _Pre_z_cap_x_(size) _Pre3_impl_(_$notnull, _$zterm,_$cap_x(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_z_cap_x_(size) _Pre3_impl_(_$maybenull,_$zterm,_$cap_x(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_z_bytecap_x_(size) _Pre3_impl_(_$notnull, _$zterm,_$bytecap_x(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_z_bytecap_x_(size) _Pre3_impl_(_$maybenull,_$zterm,_$bytecap_x(size)) _Deref_pre1_impl_(_$valid)
+
+// known capacity and valid but unknown readable extent
+#define _Pre_valid_cap_(size) _Pre2_impl_(_$notnull, _$cap(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_valid_cap_(size) _Pre2_impl_(_$maybenull,_$cap(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_valid_bytecap_(size) _Pre2_impl_(_$notnull, _$bytecap(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_valid_bytecap_(size) _Pre2_impl_(_$maybenull,_$bytecap(size)) _Deref_pre1_impl_(_$valid)
+
+#define _Pre_valid_cap_c_(size) _Pre2_impl_(_$notnull, _$cap_c(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_valid_cap_c_(size) _Pre2_impl_(_$maybenull,_$cap_c(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_valid_bytecap_c_(size) _Pre2_impl_(_$notnull, _$bytecap_c(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_valid_bytecap_c_(size) _Pre2_impl_(_$maybenull,_$bytecap_c(size)) _Deref_pre1_impl_(_$valid)
+
+#define _Pre_valid_cap_x_(size) _Pre2_impl_(_$notnull, _$cap_x(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_valid_cap_x_(size) _Pre2_impl_(_$maybenull,_$cap_x(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_valid_bytecap_x_(size) _Pre2_impl_(_$notnull, _$bytecap_x(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_valid_bytecap_x_(size) _Pre2_impl_(_$maybenull,_$bytecap_x(size)) _Deref_pre1_impl_(_$valid)
+
+// e.g. void AppendCharRange( _Pre_count_(cchFrom) const char* rgFrom, size_t cchFrom, _Out_z_cap_(cchTo) char* szTo, size_t cchTo );
+// Valid buffer extent described by another parameter
+#define _Pre_count_(size) _Pre2_impl_(_$notnull, _$count(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_count_(size) _Pre2_impl_(_$maybenull,_$count(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_bytecount_(size) _Pre2_impl_(_$notnull, _$bytecount(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_bytecount_(size) _Pre2_impl_(_$maybenull,_$bytecount(size)) _Deref_pre1_impl_(_$valid)
+
+// Valid buffer extent described by a constant expression
+#define _Pre_count_c_(size) _Pre2_impl_(_$notnull, _$count_c(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_count_c_(size) _Pre2_impl_(_$maybenull,_$count_c(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_bytecount_c_(size) _Pre2_impl_(_$notnull, _$bytecount_c(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_bytecount_c_(size) _Pre2_impl_(_$maybenull,_$bytecount_c(size)) _Deref_pre1_impl_(_$valid)
+
+// Valid buffer extent described by a complex expression
+#define _Pre_count_x_(size) _Pre2_impl_(_$notnull, _$count_x(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_count_x_(size) _Pre2_impl_(_$maybenull,_$count_x(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_bytecount_x_(size) _Pre2_impl_(_$notnull, _$bytecount_x(size)) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_bytecount_x_(size) _Pre2_impl_(_$maybenull,_$bytecount_x(size)) _Deref_pre1_impl_(_$valid)
+
+// Valid buffer extent described by the difference to another pointer parameter
+#define _Pre_ptrdiff_count_(ptr) _Pre2_impl_(_$notnull, _$count_x(__ptrdiff(ptr))) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_ptrdiff_count_(ptr) _Pre2_impl_(_$maybenull,_$count_x(__ptrdiff(ptr))) _Deref_pre1_impl_(_$valid)
+
+// valid size unknown or indicated by type (e.g.:LPSTR)
+#define _Pre_valid_ _Pre1_impl_(_$notnull) _Deref_pre1_impl_(_$valid)
+#define _Pre_opt_valid_ _Pre1_impl_(_$maybenull) _Deref_pre1_impl_(_$valid)
+
+#define _Pre_invalid_ _Deref_pre1_impl_(_$notvalid)
+
+// used with allocated but not yet initialized objects
+#define _Pre_notnull_ _Pre1_impl_(_$notnull)
+#define _Pre_maybenull_ _Pre1_impl_(_$maybenull)
+#define _Pre_null_ _Pre1_impl_(_$null)
+
+// restrict access rights
+#define _Pre_readonly_ _Pre1_impl_(_$readaccess)
+#define _Pre_writeonly_ _Pre1_impl_(_$writeaccess)
+//
+// _Post_ annotations ---
+//
+// describing conditions that hold after the function call
+
+// void CopyStr( _In_z_ const char* szFrom, _Pre_cap_(cch) _Post_z_ char* szFrom, size_t cchFrom );
+// buffer will be a zero-terminated string after the call
+#define _Post_z_ _Post1_impl_(_$zterm) _Deref_post1_impl_(_$valid)
+
+// char * strncpy(_Out_cap_(_Count) _Post_maybez_ char * _Dest, _In_z_ const char * _Source, _In_ size_t _Count)
+// buffer maybe zero-terminated after the call
+#define _Post_maybez_ _Post1_impl_(_$maybezterm)
+
+// e.g. SIZE_T HeapSize( _In_ HANDLE hHeap, DWORD dwFlags, _Pre_notnull_ _Post_bytecap_(return) LPCVOID lpMem );
+#define _Post_cap_(size) _Post1_impl_(_$cap(size))
+#define _Post_bytecap_(size) _Post1_impl_(_$bytecap(size))
+
+// e.g. int strlen( _In_z_ _Post_count_(return+1) const char* sz );
+#define _Post_count_(size) _Post1_impl_(_$count(size)) _Deref_post1_impl_(_$valid)
+#define _Post_bytecount_(size) _Post1_impl_(_$bytecount(size)) _Deref_post1_impl_(_$valid)
+#define _Post_count_c_(size) _Post1_impl_(_$count_c(size)) _Deref_post1_impl_(_$valid)
+#define _Post_bytecount_c_(size) _Post1_impl_(_$bytecount_c(size)) _Deref_post1_impl_(_$valid)
+#define _Post_count_x_(size) _Post1_impl_(_$count_x(size)) _Deref_post1_impl_(_$valid)
+#define _Post_bytecount_x_(size) _Post1_impl_(_$bytecount_x(size)) _Deref_post1_impl_(_$valid)
+
+// e.g. size_t CopyStr( _In_z_ const char* szFrom, _Pre_cap_(cch) _Post_z_count_(return+1) char* szFrom, size_t cchFrom );
+#define _Post_z_count_(size) _Post2_impl_(_$zterm,_$count(size)) _Deref_post1_impl_(_$valid)
+#define _Post_z_bytecount_(size) _Post2_impl_(_$zterm,_$bytecount(size)) _Deref_post1_impl_(_$valid)
+#define _Post_z_count_c_(size) _Post2_impl_(_$zterm,_$count_c(size)) _Deref_post1_impl_(_$valid)
+#define _Post_z_bytecount_c_(size) _Post2_impl_(_$zterm,_$bytecount_c(size)) _Deref_post1_impl_(_$valid)
+#define _Post_z_count_x_(size) _Post2_impl_(_$zterm,_$count_x(size)) _Deref_post1_impl_(_$valid)
+#define _Post_z_bytecount_x_(size) _Post2_impl_(_$zterm,_$bytecount_x(size)) _Deref_post1_impl_(_$valid)
+
+// e.g. void free( _Post_ptr_invalid_ void* pv );
+#define _Post_ptr_invalid_ _Post1_impl_(_$notvalid)
+
+// e.g. HRESULT InitStruct( _Post_valid_ Struct* pobj );
+#define _Post_valid_ _Deref_post1_impl_(_$valid)
+#define _Post_invalid_ _Deref_post1_impl_(_$notvalid)
+
+// e.g. void ThrowExceptionIfNull( _Post_notnull_ const void* pv );
+#define _Post_notnull_ _Post1_impl_(_$notnull)
+
+//
+// _Ret_ annotations
+//
+// describing conditions that hold for return values after the call
+
+// e.g. _Ret_z_ CString::operator const wchar_t*() const throw();
+#define _Ret_z_ _Ret2_impl_(_$notnull, _$zterm) _Deref_ret1_impl_(_$valid)
+#define _Ret_opt_z_ _Ret2_impl_(_$maybenull,_$zterm) _Deref_ret1_impl_(_$valid)
+
+// e.g. _Ret_opt_bytecap_(cb) void* AllocateMemory( size_t cb );
+// Buffer capacity is described by another parameter
+#define _Ret_cap_(size) _Ret2_impl_(_$notnull, _$cap(size))
+#define _Ret_opt_cap_(size) _Ret2_impl_(_$maybenull,_$cap(size))
+#define _Ret_bytecap_(size) _Ret2_impl_(_$notnull, _$bytecap(size))
+#define _Ret_opt_bytecap_(size) _Ret2_impl_(_$maybenull,_$bytecap(size))
+
+// Buffer capacity is described by a constant expression
+#define _Ret_cap_c_(size) _Ret2_impl_(_$notnull, _$cap_c(size))
+#define _Ret_opt_cap_c_(size) _Ret2_impl_(_$maybenull,_$cap_c(size))
+#define _Ret_bytecap_c_(size) _Ret2_impl_(_$notnull, _$bytecap_c(size))
+#define _Ret_opt_bytecap_c_(size) _Ret2_impl_(_$maybenull,_$bytecap_c(size))
+
+// Buffer capacity is described by a complex condition
+#define _Ret_cap_x_(size) _Ret2_impl_(_$notnull, _$cap_x(size))
+#define _Ret_opt_cap_x_(size) _Ret2_impl_(_$maybenull,_$cap_x(size))
+#define _Ret_bytecap_x_(size) _Ret2_impl_(_$notnull, _$bytecap_x(size))
+#define _Ret_opt_bytecap_x_(size) _Ret2_impl_(_$maybenull,_$bytecap_x(size))
+
+// return value is nullterminated and capacity is given by another parameter
+#define _Ret_z_cap_(size) _Ret3_impl_(_$notnull, _$zterm,_$cap(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_opt_z_cap_(size) _Ret3_impl_(_$maybenull,_$zterm,_$cap(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_z_bytecap_(size) _Ret3_impl_(_$notnull, _$zterm,_$bytecap(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_opt_z_bytecap_(size) _Ret3_impl_(_$maybenull,_$zterm,_$bytecap(size)) _Deref_ret1_impl_(_$valid)
+
+// e.g. _Ret_opt_bytecount_(cb) void* AllocateZeroInitializedMemory( size_t cb );
+// Valid Buffer extent is described by another parameter
+#define _Ret_count_(size) _Ret2_impl_(_$notnull, _$count(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_opt_count_(size) _Ret2_impl_(_$maybenull,_$count(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_bytecount_(size) _Ret2_impl_(_$notnull, _$bytecount(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_opt_bytecount_(size) _Ret2_impl_(_$maybenull,_$bytecount(size)) _Deref_ret1_impl_(_$valid)
+
+// Valid Buffer extent is described by a constant expression
+#define _Ret_count_c_(size) _Ret2_impl_(_$notnull, _$count_c(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_opt_count_c_(size) _Ret2_impl_(_$maybenull,_$count_c(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_bytecount_c_(size) _Ret2_impl_(_$notnull, _$bytecount_c(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_opt_bytecount_c_(size) _Ret2_impl_(_$maybenull,_$bytecount_c(size)) _Deref_ret1_impl_(_$valid)
+
+// Valid Buffer extent is described by a complex expression
+#define _Ret_count_x_(size) _Ret2_impl_(_$notnull, _$count_x(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_opt_count_x_(size) _Ret2_impl_(_$maybenull,_$count_x(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_bytecount_x_(size) _Ret2_impl_(_$notnull, _$bytecount_x(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_opt_bytecount_x_(size) _Ret2_impl_(_$maybenull,_$bytecount_x(size)) _Deref_ret1_impl_(_$valid)
+
+// return value is nullterminated and length is given by another parameter
+#define _Ret_z_count_(size) _Ret3_impl_(_$notnull, _$zterm,_$count(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_opt_z_count_(size) _Ret3_impl_(_$maybenull,_$zterm,_$count(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_z_bytecount_(size) _Ret3_impl_(_$notnull, _$zterm,_$bytecount(size)) _Deref_ret1_impl_(_$valid)
+#define _Ret_opt_z_bytecount_(size) _Ret3_impl_(_$maybenull,_$zterm,_$bytecount(size)) _Deref_ret1_impl_(_$valid)
+
+// e.g. _Ret_opt_valid_ LPSTR void* CloneSTR( _Pre_valid_ LPSTR src );
+#define _Ret_valid_ _Ret1_impl_(_$notnull) _Deref_ret1_impl_(_$valid)
+#define _Ret_opt_valid_ _Ret1_impl_(_$maybenull) _Deref_ret1_impl_(_$valid)
+
+// used with allocated but not yet initialized objects
+#define _Ret_notnull_ _Ret1_impl_(_$notnull)
+#define _Ret_maybenull_ _Ret1_impl_(_$maybenull)
+#define _Ret_null_ _Ret1_impl_(_$null)
+
+//
+// _Deref_pre_ ---
+//
+// describing conditions for array elements of dereferenced pointer parameters that must be met before the call
+
+// e.g. void SaveStringArray( _In_count_(cStrings) _Deref_pre_z_ const wchar_t* const rgpwch[] );
+#define _Deref_pre_z_ _Deref_pre2_impl_(_$notnull, _$zterm) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_z_ _Deref_pre2_impl_(_$maybenull,_$zterm) _Deref2_pre1_impl_(_$valid)
+
+// e.g. void FillInArrayOfStr32( _In_count_(cStrings) _Deref_pre_cap_c_(32) _Deref_post_z_ wchar_t* const rgpwch[] );
+// buffer capacity is described by another parameter
+#define _Deref_pre_cap_(size) _Deref_pre2_impl_(_$notnull, _$cap(size))
+#define _Deref_pre_opt_cap_(size) _Deref_pre2_impl_(_$maybenull,_$cap(size))
+#define _Deref_pre_bytecap_(size) _Deref_pre2_impl_(_$notnull, _$bytecap(size))
+#define _Deref_pre_opt_bytecap_(size) _Deref_pre2_impl_(_$maybenull,_$bytecap(size))
+
+// buffer capacity is described by a constant expression
+#define _Deref_pre_cap_c_(size) _Deref_pre2_impl_(_$notnull, _$cap_c(size))
+#define _Deref_pre_opt_cap_c_(size) _Deref_pre2_impl_(_$maybenull,_$cap_c(size))
+#define _Deref_pre_bytecap_c_(size) _Deref_pre2_impl_(_$notnull, _$bytecap_c(size))
+#define _Deref_pre_opt_bytecap_c_(size) _Deref_pre2_impl_(_$maybenull,_$bytecap_c(size))
+
+// buffer capacity is described by a complex condition
+#define _Deref_pre_cap_x_(size) _Deref_pre2_impl_(_$notnull, _$cap_x(size))
+#define _Deref_pre_opt_cap_x_(size) _Deref_pre2_impl_(_$maybenull,_$cap_x(size))
+#define _Deref_pre_bytecap_x_(size) _Deref_pre2_impl_(_$notnull, _$bytecap_x(size))
+#define _Deref_pre_opt_bytecap_x_(size) _Deref_pre2_impl_(_$maybenull,_$bytecap_x(size))
+
+// convenience macros for nullterminated buffers with given capacity
+#define _Deref_pre_z_cap_(size) _Deref_pre3_impl_(_$notnull, _$zterm,_$cap(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_z_cap_(size) _Deref_pre3_impl_(_$maybenull,_$zterm,_$cap(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_z_bytecap_(size) _Deref_pre3_impl_(_$notnull, _$zterm,_$bytecap(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_z_bytecap_(size) _Deref_pre3_impl_(_$maybenull,_$zterm,_$bytecap(size)) _Deref2_pre1_impl_(_$valid)
+
+#define _Deref_pre_z_cap_c_(size) _Deref_pre3_impl_(_$notnull, _$zterm,_$cap_c(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_z_cap_c_(size) _Deref_pre3_impl_(_$maybenull,_$zterm,_$cap_c(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_z_bytecap_c_(size) _Deref_pre3_impl_(_$notnull, _$zterm,_$bytecap_c(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_z_bytecap_c_(size) _Deref_pre3_impl_(_$maybenull,_$zterm,_$bytecap_c(size)) _Deref2_pre1_impl_(_$valid)
+
+#define _Deref_pre_z_cap_x_(size) _Deref_pre3_impl_(_$notnull, _$zterm,_$cap_x(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_z_cap_x_(size) _Deref_pre3_impl_(_$maybenull,_$zterm,_$cap_x(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_z_bytecap_x_(size) _Deref_pre3_impl_(_$notnull, _$zterm,_$bytecap_x(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_z_bytecap_x_(size) _Deref_pre3_impl_(_$maybenull,_$zterm,_$bytecap_x(size)) _Deref2_pre1_impl_(_$valid)
+
+// known capacity and valid but unknown readable extent
+#define _Deref_pre_valid_cap_(size) _Deref_pre2_impl_(_$notnull, _$cap(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_valid_cap_(size) _Deref_pre2_impl_(_$maybenull,_$cap(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_valid_bytecap_(size) _Deref_pre2_impl_(_$notnull, _$bytecap(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_valid_bytecap_(size) _Deref_pre2_impl_(_$maybenull,_$bytecap(size)) _Deref2_pre1_impl_(_$valid)
+
+#define _Deref_pre_valid_cap_c_(size) _Deref_pre2_impl_(_$notnull, _$cap_c(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_valid_cap_c_(size) _Deref_pre2_impl_(_$maybenull,_$cap_c(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_valid_bytecap_c_(size) _Deref_pre2_impl_(_$notnull, _$bytecap_c(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_valid_bytecap_c_(size) _Deref_pre2_impl_(_$maybenull,_$bytecap_c(size)) _Deref2_pre1_impl_(_$valid)
+
+#define _Deref_pre_valid_cap_x_(size) _Deref_pre2_impl_(_$notnull, _$cap_x(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_valid_cap_x_(size) _Deref_pre2_impl_(_$maybenull,_$cap_x(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_valid_bytecap_x_(size) _Deref_pre2_impl_(_$notnull, _$bytecap_x(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_valid_bytecap_x_(size) _Deref_pre2_impl_(_$maybenull,_$bytecap_x(size)) _Deref2_pre1_impl_(_$valid)
+
+// e.g. void SaveMatrix( _In_count_(n) _Deref_pre_count_(n) const Elem** matrix, size_t n );
+// valid buffer extent is described by another parameter
+#define _Deref_pre_count_(size) _Deref_pre2_impl_(_$notnull, _$count(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_count_(size) _Deref_pre2_impl_(_$maybenull,_$count(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_bytecount_(size) _Deref_pre2_impl_(_$notnull, _$bytecount(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_bytecount_(size) _Deref_pre2_impl_(_$maybenull,_$bytecount(size)) _Deref2_pre1_impl_(_$valid)
+
+// valid buffer extent is described by a constant expression
+#define _Deref_pre_count_c_(size) _Deref_pre2_impl_(_$notnull, _$count_c(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_count_c_(size) _Deref_pre2_impl_(_$maybenull,_$count_c(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_bytecount_c_(size) _Deref_pre2_impl_(_$notnull, _$bytecount_c(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_bytecount_c_(size) _Deref_pre2_impl_(_$maybenull,_$bytecount_c(size)) _Deref2_pre1_impl_(_$valid)
+
+// valid buffer extent is described by a complex expression
+#define _Deref_pre_count_x_(size) _Deref_pre2_impl_(_$notnull, _$count_x(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_count_x_(size) _Deref_pre2_impl_(_$maybenull,_$count_x(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_bytecount_x_(size) _Deref_pre2_impl_(_$notnull, _$bytecount_x(size)) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_bytecount_x_(size) _Deref_pre2_impl_(_$maybenull,_$bytecount_x(size)) _Deref2_pre1_impl_(_$valid)
+
+// e.g. void PrintStringArray( _In_count_(cElems) _Deref_pre_valid_ LPCSTR rgStr[], size_t cElems );
+#define _Deref_pre_valid_ _Deref_pre1_impl_(_$notnull) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_opt_valid_ _Deref_pre1_impl_(_$maybenull) _Deref2_pre1_impl_(_$valid)
+#define _Deref_pre_invalid_ _Deref2_pre1_impl_(_$notvalid)
+
+#define _Deref_pre_notnull_ _Deref_pre1_impl_(_$notnull)
+#define _Deref_pre_maybenull_ _Deref_pre1_impl_(_$maybenull)
+#define _Deref_pre_null_ _Deref_pre1_impl_(_$null)
+
+// restrict access rights
+#define _Deref_pre_readonly_ _Deref_pre1_impl_(_$readaccess)
+#define _Deref_pre_writeonly_ _Deref_pre1_impl_(_$writeaccess)
+
+//
+// _Deref_post_ ---
+//
+// describing conditions for array elements or dereferenced pointer parameters that hold after the call
+
+// e.g. void CloneString( _In_z_ const Wchar_t* wzIn _Out_ _Deref_post_z_ wchar_t** pWzOut );
+#define _Deref_post_z_ _Deref_post2_impl_(_$notnull, _$zterm) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_z_ _Deref_post2_impl_(_$maybenull,_$zterm) _Deref2_post1_impl_(_$valid)
+
+// e.g. HRESULT HrAllocateMemory( size_t cb, _Out_ _Deref_post_bytecap_(cb) void** ppv );
+// buffer capacity is described by another parameter
+#define _Deref_post_cap_(size) _Deref_post2_impl_(_$notnull, _$cap(size))
+#define _Deref_post_opt_cap_(size) _Deref_post2_impl_(_$maybenull,_$cap(size))
+#define _Deref_post_bytecap_(size) _Deref_post2_impl_(_$notnull, _$bytecap(size))
+#define _Deref_post_opt_bytecap_(size) _Deref_post2_impl_(_$maybenull,_$bytecap(size))
+
+// buffer capacity is described by a constant expression
+#define _Deref_post_cap_c_(size) _Deref_post2_impl_(_$notnull, _$cap_z(size))
+#define _Deref_post_opt_cap_c_(size) _Deref_post2_impl_(_$maybenull,_$cap_z(size))
+#define _Deref_post_bytecap_c_(size) _Deref_post2_impl_(_$notnull, _$bytecap_z(size))
+#define _Deref_post_opt_bytecap_c_(size) _Deref_post2_impl_(_$maybenull,_$bytecap_z(size))
+
+// buffer capacity is described by a complex expression
+#define _Deref_post_cap_x_(size) _Deref_post2_impl_(_$notnull, _$cap_x(size))
+#define _Deref_post_opt_cap_x_(size) _Deref_post2_impl_(_$maybenull,_$cap_x(size))
+#define _Deref_post_bytecap_x_(size) _Deref_post2_impl_(_$notnull, _$bytecap_x(size))
+#define _Deref_post_opt_bytecap_x_(size) _Deref_post2_impl_(_$maybenull,_$bytecap_x(size))
+
+// convenience macros for nullterminated buffers with given capacity
+#define _Deref_post_z_cap_(size) _Deref_post3_impl_(_$notnull, _$zterm,_$cap(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_z_cap_(size) _Deref_post3_impl_(_$maybenull,_$zterm,_$cap(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_z_bytecap_(size) _Deref_post3_impl_(_$notnull, _$zterm,_$bytecap(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_z_bytecap_(size) _Deref_post3_impl_(_$maybenull,_$zterm,_$bytecap(size)) _Deref2_post1_impl_(_$valid)
+
+#define _Deref_post_z_cap_c_(size) _Deref_post3_impl_(_$notnull, _$zterm,_$cap_c(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_z_cap_c_(size) _Deref_post3_impl_(_$maybenull,_$zterm,_$cap_c(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_z_bytecap_c_(size) _Deref_post3_impl_(_$notnull, _$zterm,_$bytecap_c(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_z_bytecap_c_(size) _Deref_post3_impl_(_$maybenull,_$zterm,_$bytecap_c(size)) _Deref2_post1_impl_(_$valid)
+
+#define _Deref_post_z_cap_x_(size) _Deref_post3_impl_(_$notnull, _$zterm,_$cap_x(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_z_cap_x_(size) _Deref_post3_impl_(_$maybenull,_$zterm,_$cap_x(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_z_bytecap_x_(size) _Deref_post3_impl_(_$notnull, _$zterm,_$bytecap_x(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_z_bytecap_x_(size) _Deref_post3_impl_(_$maybenull,_$zterm,_$bytecap_x(size)) _Deref2_post1_impl_(_$valid)
+
+// known capacity and valid but unknown readable extent
+#define _Deref_post_valid_cap_(size) _Deref_post2_impl_(_$notnull, _$cap(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_valid_cap_(size) _Deref_post2_impl_(_$maybenull,_$cap(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_valid_bytecap_(size) _Deref_post2_impl_(_$notnull, _$bytecap(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_valid_bytecap_(size) _Deref_post2_impl_(_$maybenull,_$bytecap(size)) _Deref2_post1_impl_(_$valid)
+
+#define _Deref_post_valid_cap_c_(size) _Deref_post2_impl_(_$notnull, _$cap_c(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_valid_cap_c_(size) _Deref_post2_impl_(_$maybenull,_$cap_c(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_valid_bytecap_c_(size) _Deref_post2_impl_(_$notnull, _$bytecap_c(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_valid_bytecap_c_(size) _Deref_post2_impl_(_$maybenull,_$bytecap_c(size)) _Deref2_post1_impl_(_$valid)
+
+#define _Deref_post_valid_cap_x_(size) _Deref_post2_impl_(_$notnull, _$cap_x(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_valid_cap_x_(size) _Deref_post2_impl_(_$maybenull,_$cap_x(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_valid_bytecap_x_(size) _Deref_post2_impl_(_$notnull, _$bytecap_x(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_valid_bytecap_x_(size) _Deref_post2_impl_(_$maybenull,_$bytecap_x(size)) _Deref2_post1_impl_(_$valid)
+
+// e.g. HRESULT HrAllocateZeroInitializedMemory( size_t cb, _Out_ _Deref_post_bytecount_(cb) void** ppv );
+// valid buffer extent is described by another parameter
+#define _Deref_post_count_(size) _Deref_post2_impl_(_$notnull, _$count(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_count_(size) _Deref_post2_impl_(_$maybenull,_$count(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_bytecount_(size) _Deref_post2_impl_(_$notnull, _$bytecount(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_bytecount_(size) _Deref_post2_impl_(_$maybenull,_$bytecount(size)) _Deref2_post1_impl_(_$valid)
+
+// buffer capacity is described by a constant expression
+#define _Deref_post_count_c_(size) _Deref_post2_impl_(_$notnull, _$count_c(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_count_c_(size) _Deref_post2_impl_(_$maybenull,_$count_c(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_bytecount_c_(size) _Deref_post2_impl_(_$notnull, _$bytecount_c(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_bytecount_c_(size) _Deref_post2_impl_(_$maybenull,_$bytecount_c(size)) _Deref2_post1_impl_(_$valid)
+
+// buffer capacity is described by a complex expression
+#define _Deref_post_count_x_(size) _Deref_post2_impl_(_$notnull, _$count_x(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_count_x_(size) _Deref_post2_impl_(_$maybenull,_$count_x(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_bytecount_x_(size) _Deref_post2_impl_(_$notnull, _$bytecount_x(size)) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_bytecount_x_(size) _Deref_post2_impl_(_$maybenull,_$bytecount_x(size)) _Deref2_post1_impl_(_$valid)
+
+// e.g. void GetStrings( _Out_count_(cElems) _Deref_post_valid_ LPSTR const rgStr[], size_t cElems );
+#define _Deref_post_valid_ _Deref_post1_impl_(_$notnull) _Deref2_post1_impl_(_$valid)
+#define _Deref_post_opt_valid_ _Deref_post1_impl_(_$maybenull) _Deref2_post1_impl_(_$valid)
+
+#define _Deref_post_notnull_ _Deref_post1_impl_(_$notnull)
+#define _Deref_post_maybenull_ _Deref_post1_impl_(_$maybenull)
+#define _Deref_post_null_ _Deref_post1_impl_(_$null)
+
+//
+// _Deref_ret_ ---
+//
+
+#define _Deref_ret_z_ _Deref_ret2_impl_(_$notnull, _$zterm)
+#define _Deref_ret_opt_z_ _Deref_ret2_impl_(_$maybenull,_$zterm)
+
+//
+// special _Deref_ ---
+//
+#define _Deref2_pre_readonly_ _Deref2_pre1_impl_(_$readaccess)
+
+// Convenience macros for more concise annotations
+
+//
+// _Pre_post ---
+//
+// describing conditions that hold before and after the function call
+
+#define _Prepost_z_ _Pre_z_ _Post_z_
+#define _Prepost_opt_z_ _Pre_opt_z_ _Post_z_
+
+#define _Prepost_count_(size) _Pre_count_(size) _Post_count_(size)
+#define _Prepost_opt_count_(size) _Pre_opt_count_(size) _Post_count_(size)
+#define _Prepost_bytecount_(size) _Pre_bytecount_(size) _Post_bytecount_(size)
+#define _Prepost_opt_bytecount_(size) _Pre_opt_bytecount_(size) _Post_bytecount_(size)
+#define _Prepost_count_c_(size) _Pre_count_c_(size) _Post_count_c_(size)
+#define _Prepost_opt_count_c_(size) _Pre_opt_count_c_(size) _Post_count_c_(size)
+#define _Prepost_bytecount_c_(size) _Pre_bytecount_c_(size) _Post_bytecount_c_(size)
+#define _Prepost_opt_bytecount_c_(size) _Pre_opt_bytecount_c_(size) _Post_bytecount_c_(size)
+#define _Prepost_count_x_(size) _Pre_count_x_(size) _Post_count_x_(size)
+#define _Prepost_opt_count_x_(size) _Pre_opt_count_x_(size) _Post_count_x_(size)
+#define _Prepost_bytecount_x_(size) _Pre_bytecount_x_(size) _Post_bytecount_x_(size)
+#define _Prepost_opt_bytecount_x_(size) _Pre_opt_bytecount_x_(size) _Post_bytecount_x_(size)
+
+#define _Prepost_valid_ _Pre_valid_ _Post_valid_
+#define _Prepost_opt_valid_ _Pre_opt_valid_ _Post_valid_
+
+//
+// _Deref_<both> ---
+//
+// short version for _Deref_pre_<ann> _Deref_post_<ann>
+// describing conditions for array elements or dereferenced pointer parameters that hold before and after the call
+
+#define _Deref_prepost_z_ _Deref_pre_z_ _Deref_post_z_
+#define _Deref_prepost_opt_z_ _Deref_pre_opt_z_ _Deref_post_opt_z_
+
+#define _Deref_prepost_cap_(size) _Deref_pre_cap_(size) _Deref_post_cap_(size)
+#define _Deref_prepost_opt_cap_(size) _Deref_pre_opt_cap_(size) _Deref_post_opt_cap_(size)
+#define _Deref_prepost_bytecap_(size) _Deref_pre_bytecap_(size) _Deref_post_bytecap_(size)
+#define _Deref_prepost_opt_bytecap_(size) _Deref_pre_opt_bytecap_(size) _Deref_post_opt_bytecap_(size)
+
+#define _Deref_prepost_cap_x_(size) _Deref_pre_cap_x_(size) _Deref_post_cap_x_(size)
+#define _Deref_prepost_opt_cap_x_(size) _Deref_pre_opt_cap_x_(size) _Deref_post_opt_cap_x_(size)
+#define _Deref_prepost_bytecap_x_(size) _Deref_pre_bytecap_x_(size) _Deref_post_bytecap_x_(size)
+#define _Deref_prepost_opt_bytecap_x_(size) _Deref_pre_opt_bytecap_x_(size) _Deref_post_opt_bytecap_x_(size)
+
+#define _Deref_prepost_z_cap_(size) _Deref_pre_z_cap_(size) _Deref_post_z_cap_(size)
+#define _Deref_prepost_opt_z_cap_(size) _Deref_pre_opt_z_cap_(size) _Deref_post_opt_z_cap_(size)
+#define _Deref_prepost_z_bytecap_(size) _Deref_pre_z_bytecap_(size) _Deref_post_z_bytecap_(size)
+#define _Deref_prepost_opt_z_bytecap_(size) _Deref_pre_opt_z_bytecap_(size) _Deref_post_opt_z_bytecap_(size)
+
+#define _Deref_prepost_valid_cap_(size) _Deref_pre_valid_cap_(size) _Deref_post_valid_cap_(size)
+#define _Deref_prepost_opt_valid_cap_(size) _Deref_pre_opt_valid_cap_(size) _Deref_post_opt_valid_cap_(size)
+#define _Deref_prepost_valid_bytecap_(size) _Deref_pre_valid_bytecap_(size) _Deref_post_valid_bytecap_(size)
+#define _Deref_prepost_opt_valid_bytecap_(size) _Deref_pre_opt_valid_bytecap_(size) _Deref_post_opt_valid_bytecap_(size)
+
+#define _Deref_prepost_valid_cap_x_(size) _Deref_pre_valid_cap_x_(size) _Deref_post_valid_cap_x_(size)
+#define _Deref_prepost_opt_valid_cap_x_(size) _Deref_pre_opt_valid_cap_x_(size) _Deref_post_opt_valid_cap_x_(size)
+#define _Deref_prepost_valid_bytecap_x_(size) _Deref_pre_valid_bytecap_x_(size) _Deref_post_valid_bytecap_x_(size)
+#define _Deref_prepost_opt_valid_bytecap_x_(size) _Deref_pre_opt_valid_bytecap_x_(size) _Deref_post_opt_valid_bytecap_x_(size)
+
+#define _Deref_prepost_count_(size) _Deref_pre_count_(size) _Deref_post_count_(size)
+#define _Deref_prepost_opt_count_(size) _Deref_pre_opt_count_(size) _Deref_post_opt_count_(size)
+#define _Deref_prepost_bytecount_(size) _Deref_pre_bytecount_(size) _Deref_post_bytecount_(size)
+#define _Deref_prepost_opt_bytecount_(size) _Deref_pre_opt_bytecount_(size) _Deref_post_opt_bytecount_(size)
+
+#define _Deref_prepost_count_x_(size) _Deref_pre_count_x_(size) _Deref_post_count_x_(size)
+#define _Deref_prepost_opt_count_x_(size) _Deref_pre_opt_count_x_(size) _Deref_post_opt_count_x_(size)
+#define _Deref_prepost_bytecount_x_(size) _Deref_pre_bytecount_x_(size) _Deref_post_bytecount_x_(size)
+#define _Deref_prepost_opt_bytecount_x_(size) _Deref_pre_opt_bytecount_x_(size) _Deref_post_opt_bytecount_x_(size)
+
+#define _Deref_prepost_valid_ _Deref_pre_valid_ _Deref_post_valid_
+#define _Deref_prepost_opt_valid_ _Deref_pre_opt_valid_ _Deref_post_opt_valid_
+
+//
+// _Deref_<miscellaneous>
+//
+// used with references to arrays
+
+#define _Deref_out_z_cap_c_(size) _Deref_pre_cap_c_(size) _Deref_pre_invalid_ _Deref_post_z_
+#define _Deref_inout_z_cap_c_(size) _Deref_pre_z_cap_c_(size) _Deref_post_z_
+#define _Deref_out_z_bytecap_c_(size) _Deref_pre_bytecap_c_(size) _Deref_pre_invalid_ _Deref_post_z_
+#define _Deref_inout_z_bytecap_c_(size) _Deref_pre_z_bytecap_c_(size) _Deref_post_z_
+#define _Deref_inout_z_ _Deref_prepost_z_
+
+//============================================================================
+// Implementation Layer:
+//============================================================================
+
+#if _USE_ATTRIBUTES_FOR_SAL
+
+#define _Check_return_impl_ [returnvalue:SA_Post(MustCheck=SA_Yes)]
+
+#define _Success_impl_(expr) [SA_Success(Condition=#expr)]
+
+#define _Printf_format_string_impl_ [SA_FormatString(Style="printf")]
+#define _Scanf_format_string_impl_ [SA_FormatString(Style="scanf")]
+#define _Scanf_s_format_string_impl_ [SA_FormatString(Style="scanf_s")]
+
+#define _In_bound_impl_ [SA_PreBound(Deref=0)]
+#define _Out_bound_impl_ [SA_PostBound(Deref=0)]
+#define _Ret_bound_impl_ [returnvalue:SA_PostBound(Deref=0)]
+#define _Deref_in_bound_impl_ [SA_PreBound(Deref=1)]
+#define _Deref_out_bound_impl_ [SA_PostBound(Deref=1)]
+#define _Deref_ret_bound_impl_ [returnvalue:SA_PostBound(Deref=1)]
+
+#define _In_range_impl_(min,max) [SA_PreRange(MinVal=#min,MaxVal=#max)]
+#define _Out_range_impl_(min,max) [SA_PostRange(MinVal=#min,MaxVal=#max)]
+#define _Ret_range_impl_(min,max) [returnvalue:SA_PostRange(MinVal=#min,MaxVal=#max)]
+#define _Deref_in_range_impl_(min,max) [SA_PreRange(Deref=1,MinVal=#min,MaxVal=#max)]
+#define _Deref_out_range_impl_(min,max) [SA_PostRange(Deref=1,MinVal=#min,MaxVal=#max)]
+#define _Deref_ret_range_impl_(min,max) [returnvalue:SA_PostRange(Deref=1,MinVal=#min,MaxVal=#max)]
+
+#define _$valid Valid=SA_Yes
+#define _$maybevalid Valid=SA_Maybe
+#define _$notvalid Valid=SA_No
+
+#define _$null Null=SA_Yes
+#define _$maybenull Null=SA_Maybe
+#define _$notnull Null=SA_No
+
+#define _$zterm NullTerminated=SA_Yes
+#define _$maybezterm NullTerminated=SA_Maybe
+#define _$notzterm NullTerminated=SA_No
+
+#define _$readaccess Access=SA_Read
+#define _$writeaccess Access=SA_Write
+
+#define _$cap(size) WritableElements=#size
+#define _$cap_c(size) WritableElementsConst=size
+#define _$cap_for(param) WritableElementsLength=#param
+#define _$cap_x(size) WritableElements="\n@"#size
+
+#define _$bytecap(size) WritableBytes=#size
+#define _$bytecap_c(size) WritableBytesConst=size
+#define _$bytecap_x(size) WritableBytes="\n@"#size
+
+#define _$mult(mult,size) ElementSizeConst=mult,_$cap(size)
+
+#define _$count(size) ValidElements=#size
+#define _$count_c(size) ValidElementsConst=size
+#define _$count_x(size) ValidElements="\n@"#size
+
+#define _$bytecount(size) ValidBytes=#size
+#define _$bytecount_c(size) ValidBytesConst=size
+#define _$bytecount_x(size) ValidBytes="\n@"#size
+
+#define _Pre1_impl_(p1) [SA_Pre(p1)]
+#define _Pre2_impl_(p1,p2) [SA_Pre(p1,p2)]
+#define _Pre3_impl_(p1,p2,p3) [SA_Pre(p1,p2,p3)]
+
+#define _Post1_impl_(p1) [SA_Post(p1)]
+#define _Post2_impl_(p1,p2) [SA_Post(p1,p2)]
+#define _Post3_impl_(p1,p2,p3) [SA_Post(p1,p2,p3)]
+
+#define _Ret1_impl_(p1) [returnvalue:SA_Post(p1)]
+#define _Ret2_impl_(p1,p2) [returnvalue:SA_Post(p1,p2)]
+#define _Ret3_impl_(p1,p2,p3) [returnvalue:SA_Post(p1,p2,p3)]
+
+#define _Deref_pre1_impl_(p1) [SA_Pre(Deref=1,p1)]
+#define _Deref_pre2_impl_(p1,p2) [SA_Pre(Deref=1,p1,p2)]
+#define _Deref_pre3_impl_(p1,p2,p3) [SA_Pre(Deref=1,p1,p2,p3)]
+
+#define _Deref_post1_impl_(p1) [SA_Post(Deref=1,p1)]
+#define _Deref_post2_impl_(p1,p2) [SA_Post(Deref=1,p1,p2)]
+#define _Deref_post3_impl_(p1,p2,p3) [SA_Post(Deref=1,p1,p2,p3)]
+
+#define _Deref_ret1_impl_(p1) [returnvalue:SA_Post(Deref=1,p1)]
+#define _Deref_ret2_impl_(p1,p2) [returnvalue:SA_Post(Deref=1,p1,p2)]
+#define _Deref_ret3_impl_(p1,p2,p3) [returnvalue:SA_Post(Deref=1,p1,p2,p3)]
+
+#define _Deref2_pre1_impl_(p1) [SA_Pre(Deref=2,p1)]
+#define _Deref2_post1_impl_(p1) [SA_Post(Deref=2,p1)]
+#define _Deref2_ret1_impl_(p1) [returnvalue:SA_Post(Deref=2,p1)]
+
+#elif _USE_DECLSPECS_FOR_SAL
+
+#define _$SPECSTRIZE( x ) #x
+
+#define _Check_return_impl_ __declspec("SAL_checkReturn")
+
+#define _Success_impl_(expr) __declspec("SAL_success("_$SPECSTRIZE(expr)")")
+
+#define _Printf_format_string_impl_
+#define _Scanf_format_string_impl_
+#define _Scanf_s_format_string_impl_
+
+#define _In_bound_impl_ _$pre _$bound
+#define _Out_bound_impl_ _$post _$bound
+#define _Ret_bound_impl_ _$post _$bound
+#define _Deref_in_bound_impl_ _$derefpre _$bound
+#define _Deref_out_bound_impl_ _$derefpost _$bound
+#define _Deref_ret_bound_impl_ _$derefpost bound
+
+#define _In_range_impl_(min,max) _$pre _$range(min,max)
+#define _Out_range_impl_(min,max) _$post _$range(min,max)
+#define _Ret_range_impl_(min,max) _$post _$range(min,max)
+#define _Deref_in_range_impl_(min,max) _$derefpre _$range(min,max)
+#define _Deref_out_range_impl_(min,max) _$derefpost _$range(min,max)
+#define _Deref_ret_range_impl_(min,max) _$derefpost _$range(min,max)
+
+#define _$valid __declspec("SAL_valid")
+#define _$maybevalid __declspec("SAL_maybevalid")
+#define _$notvalid __declspec("SAL_notvalid")
+
+#define _$null __declspec("SAL_null")
+#define _$maybenull __declspec("SAL_maybenull")
+#define _$notnull __declspec("SAL_notnull")
+
+#define _$zterm __declspec("SAL_readableTo(sentinel(0))")
+#define _$maybezterm
+#define _$notzterm
+
+#define _$readaccess __declspec("SAL_readonly")
+#define _$writeaccess __declspec("SAL_notreadonly")
+
+#define _$cap(size) __declspec("SAL_writableTo(elementCount("_$SPECSTRIZE(size)"))")
+#define _$cap_c(size) __declspec("SAL_writableTo(elementCount("_$SPECSTRIZE(size)"))")
+#define _$cap_for(param) __declspec("SAL_writableTo(needsCountFor("_$SPECSTRIZE(param)"))")
+#define _$cap_x(size) __declspec("SAL_writableTo(inexpressibleCount('"_$SPECSTRIZE(size)"'))")
+
+#define _$bytecap(size) __declspec("SAL_writableTo(byteCount("_$SPECSTRIZE(size)"))")
+#define _$bytecap_c(size) __declspec("SAL_writableTo(byteCount("_$SPECSTRIZE(size)"))")
+#define _$bytecap_x(size) __declspec("SAL_writableTo(inexpressibleCount('"_$SPECSTRIZE(size)"'))")
+
+#define _$mult(mult,size) __declspec("SAL_writableTo(inexpressibleCount("_$SPECSTRIZE(mult)"*"_$SPECSTRIZE(size)"))")
+
+#define _$count(size) __declspec("SAL_readableTo(elementCount("_$SPECSTRIZE(size)"))")
+#define _$count_c(size) __declspec("SAL_readableTo(elementCount("_$SPECSTRIZE(size)"))")
+#define _$count_x(size) __declspec("SAL_readableTo(inexpressibleCount('"_$SPECSTRIZE(size)"'))")
+
+#define _$bytecount(size) __declspec("SAL_readableTo(byteCount("_$SPECSTRIZE(size)"))")
+#define _$bytecount_c(size) __declspec("SAL_readableTo(byteCount("_$SPECSTRIZE(size)"))")
+#define _$bytecount_x(size) __declspec("SAL_readableTo(inexpressibleCount('"_$SPECSTRIZE(size)"'))")
+
+#define _$pre __declspec("SAL_pre")
+#define _$post __declspec("SAL_post")
+#define _$deref_pre __declspec("SAL_pre") __declspec("SAL_deref")
+#define _$deref_post __declspec("SAL_post") __declspec("SAL_deref")
+
+#define _$bound __declspec("SAL_bound")
+#define _$range(min,max) __declspec("SAL_range("_$SPECSTRIZE(min)","_$SPECSTRIZE(max)")")
+
+#define _Pre1_impl_(p1) _$pre p1
+#define _Pre2_impl_(p1,p2) _$pre p1 _$pre p2
+#define _Pre3_impl_(p1,p2,p3) _$pre p1 _$pre p2 _$pre p3
+
+#define _Post1_impl_(p1) _$post p1
+#define _Post2_impl_(p1,p2) _$post p1 _$post p2
+#define _Post3_impl_(p1,p2,p3) _$post p1 _$post p2 _$post p3
+
+#define _Ret1_impl_(p1) _$post p1
+#define _Ret2_impl_(p1,p2) _$post p1 _$post p2
+#define _Ret3_impl_(p1,p2,p3) _$post p1 _$post p2 _$post p3
+
+#define _Deref_pre1_impl_(p1) _$deref_pre p1
+#define _Deref_pre2_impl_(p1,p2) _$deref_pre p1 _$deref_pre p2
+#define _Deref_pre3_impl_(p1,p2,p3) _$deref_pre p1 _$deref_pre p2 _$deref_pre p3
+
+#define _Deref_post1_impl_(p1) _$deref_post p1
+#define _Deref_post2_impl_(p1,p2) _$deref_post p1 _$deref_post p2
+#define _Deref_post3_impl_(p1,p2,p3) _$deref_post p1 _$deref_post p2 _$deref_post p3
+
+#define _Deref_ret1_impl_(p1) _$deref_post p1
+#define _Deref_ret2_impl_(p1,p2) _$deref_post p1 _$deref_post p2
+#define _Deref_ret3_impl_(p1,p2,p3) _$deref_post p1 _$deref_post p2 _$deref_post p3
+
+#define _Deref2_pre1_impl_(p1) _$deref_pre __declspec("SAL_deref") p1
+#define _Deref2_post1_impl_(p1) _$deref_post __declspec("SAL_deref") p1
+#define _Deref2_ret1_impl_(p1) _$deref_post __declspec("SAL_deref") p1
+
+#elif defined(_MSC_EXTENSIONS) && !defined( MIDL_PASS ) && !defined(__midl) && !defined(RC_INVOKED) && defined(_PFT_VER) && _MSC_VER >= 1400
+
+// minimum attribute expansion for foreground build
+
+#pragma push_macro( "SA" )
+#pragma push_macro( "REPEATABLE" )
+
+#ifdef __cplusplus
+#define SA( id ) id
+#define REPEATABLE [repeatable]
+#else // !__cplusplus
+#define SA( id ) SA_##id
+#define REPEATABLE
+#endif // !__cplusplus
+
+REPEATABLE
+[source_annotation_attribute( SA( Parameter ) )]
+struct _$P
+{
+#ifdef __cplusplus
+ _$P();
+#endif
+ int _$d;
+};
+typedef struct _$P _$P;
+
+REPEATABLE
+[source_annotation_attribute( SA( ReturnValue ) )]
+struct _$R
+{
+#ifdef __cplusplus
+ _$R();
+#endif
+ int _$d;
+};
+typedef struct _$R _$R;
+
+[source_annotation_attribute( SA( Method ) )]
+struct _$M
+{
+#ifdef __cplusplus
+ _$M();
+#endif
+ int _$d;
+};
+typedef struct _$M _$M;
+
+#pragma pop_macro( "REPEATABLE" )
+#pragma pop_macro( "SA" )
+
+#define _Check_return_impl_ [returnvalue:_$R(_$d=0)]
+
+#define _Success_impl_(expr) [_$M(_$d=0)]
+
+#define _Printf_format_string_impl_ [_$P(_$d=0)]
+#define _Scanf_format_string_impl_ [_$P(_$d=0)]
+#define _Scanf_s_format_string_impl_ [_$P(_$d=0)]
+
+#define _In_bound_impl_ [_$P(_$d=0)]
+#define _Out_bound_impl_ [_$P(_$d=0)]
+#define _Ret_bound_impl_ [returnvalue:_$R(_$d=0)]
+#define _Deref_in_bound_impl_ [_$P(_$d=0)]
+#define _Deref_out_bound_impl_ [_$P(_$d=0)]
+#define _Deref_ret_bound_impl_ [returnvalue:_$R(_$d=0)]
+
+#define _In_range_impl_(min,max) [_$P(_$d=0)]
+#define _Out_range_impl_(min,max) [_$P(_$d=0)]
+#define _Ret_range_impl_(min,max) [returnvalue:_$R(_$d=0)]
+#define _Deref_in_range_impl_(min,max) [_$P(_$d=0)]
+#define _Deref_out_range_impl_(min,max) [_$P(_$d=0)]
+#define _Deref_ret_range_impl_(min,max) [returnvalue:_$R(_$d=0)]
+
+#define _Pre1_impl_(p1) [_$P(_$d=0)]
+#define _Pre2_impl_(p1,p2) [_$P(_$d=0)]
+#define _Pre3_impl_(p1,p2,p3) [_$P(_$d=0)]
+
+#define _Post1_impl_(p1) [_$P(_$d=0)]
+#define _Post2_impl_(p1,p2) [_$P(_$d=0)]
+#define _Post3_impl_(p1,p2,p3) [_$P(_$d=0)]
+
+#define _Ret1_impl_(p1) [returnvalue:_$R(_$d=0)]
+#define _Ret2_impl_(p1,p2) [returnvalue:_$R(_$d=0)]
+#define _Ret3_impl_(p1,p2,p3) [returnvalue:_$R(_$d=0)]
+
+#define _Deref_pre1_impl_(p1) [_$P(_$d=0)]
+#define _Deref_pre2_impl_(p1,p2) [_$P(_$d=0)]
+#define _Deref_pre3_impl_(p1,p2,p3) [_$P(_$d=0)]
+
+#define _Deref_post1_impl_(p1) [_$P(_$d=0)]
+#define _Deref_post2_impl_(p1,p2) [_$P(_$d=0)]
+#define _Deref_post3_impl_(p1,p2,p3) [_$P(_$d=0)]
+
+#define _Deref_ret1_impl_(p1) [returnvalue:_$R(_$d=0)]
+#define _Deref_ret2_impl_(p1,p2) [returnvalue:_$R(_$d=0)]
+#define _Deref_ret3_impl_(p1,p2,p3) [returnvalue:_$R(_$d=0)]
+
+#define _Deref2_pre1_impl_(p1) //[_$P(_$d=0)]
+#define _Deref2_post1_impl_(p1) //[_$P(_$d=0)]
+#define _Deref2_ret1_impl_(p1) //[_$P(_$d=0)]
+
+#else
+
+#define _Check_return_impl_
+
+#define _Success_impl_(expr)
+
+#define _Printf_format_string_impl_
+#define _Scanf_format_string_impl_
+#define _Scanf_s_format_string_impl_
+
+#define _In_bound_impl_
+#define _Out_bound_impl_
+#define _Ret_bound_impl_
+#define _Deref_in_bound_impl_
+#define _Deref_out_bound_impl_
+#define _Deref_ret_bound_impl_
+
+#define _In_range_impl_(min,max)
+#define _Out_range_impl_(min,max)
+#define _Ret_range_impl_(min,max)
+#define _Deref_in_range_impl_(min,max)
+#define _Deref_out_range_impl_(min,max)
+#define _Deref_ret_range_impl_(min,max)
+
+#define _Pre1_impl_(p1)
+#define _Pre2_impl_(p1,p2)
+#define _Pre3_impl_(p1,p2,p3)
+
+#define _Post1_impl_(p1)
+#define _Post2_impl_(p1,p2)
+#define _Post3_impl_(p1,p2,p3)
+
+#define _Ret1_impl_(p1)
+#define _Ret2_impl_(p1,p2)
+#define _Ret3_impl_(p1,p2,p3)
+
+#define _Deref_pre1_impl_(p1)
+#define _Deref_pre2_impl_(p1,p2)
+#define _Deref_pre3_impl_(p1,p2,p3)
+
+#define _Deref_post1_impl_(p1)
+#define _Deref_post2_impl_(p1,p2)
+#define _Deref_post3_impl_(p1,p2,p3)
+
+#define _Deref_ret1_impl_(p1)
+#define _Deref_ret2_impl_(p1,p2)
+#define _Deref_ret3_impl_(p1,p2,p3)
+
+#define _Deref2_pre1_impl_(p1)
+#define _Deref2_post1_impl_(p1)
+#define _Deref2_ret1_impl_(p1)
+
+#endif
+
+// This section contains the deprecated annotations
+
+/*
+ -------------------------------------------------------------------------------
+ Introduction
+
+ sal.h provides a set of annotations to describe how a function uses its
+ parameters - the assumptions it makes about them, and the guarantees it makes
+ upon finishing.
+
+ Annotations may be placed before either a function parameter's type or its return
+ type, and describe the function's behavior regarding the parameter or return value.
+ There are two classes of annotations: buffer annotations and advanced annotations.
+ Buffer annotations describe how functions use their pointer parameters, and
+ advanced annotations either describe complex/unusual buffer behavior, or provide
+ additional information about a parameter that is not otherwise expressible.
+
+ -------------------------------------------------------------------------------
+ Buffer Annotations
+
+ The most important annotations in sal.h provide a consistent way to annotate
+ buffer parameters or return values for a function. Each of these annotations describes
+ a single buffer (which could be a string, a fixed-length or variable-length array,
+ or just a pointer) that the function interacts with: where it is, how large it is,
+ how much is initialized, and what the function does with it.
+
+ The appropriate macro for a given buffer can be constructed using the table below.
+ Just pick the appropriate values from each category, and combine them together
+ with a leading underscore. Some combinations of values do not make sense as buffer
+ annotations. Only meaningful annotations can be added to your code; for a list of
+ these, see the buffer annotation definitions section.
+
+ Only a single buffer annotation should be used for each parameter.
+
+ |------------|------------|---------|--------|----------|----------|---------------|
+ | Level | Usage | Size | Output | NullTerm | Optional | Parameters |
+ |------------|------------|---------|--------|----------|----------|---------------|
+ | <> | <> | <> | <> | _z | <> | <> |
+ | _deref | _in | _ecount | _full | _nz | _opt | (size) |
+ | _deref_opt | _out | _bcount | _part | | | (size,length) |
+ | | _inout | | | | | |
+ | | | | | | | |
+ |------------|------------|---------|--------|----------|----------|---------------|
+
+ Level: Describes the buffer pointer's level of indirection from the parameter or
+ return value 'p'.
+
+ <> : p is the buffer pointer.
+ _deref : *p is the buffer pointer. p must not be NULL.
+ _deref_opt : *p may be the buffer pointer. p may be NULL, in which case the rest of
+ the annotation is ignored.
+
+ Usage: Describes how the function uses the buffer.
+
+ <> : The buffer is not accessed. If used on the return value or with _deref, the
+ function will provide the buffer, and it will be uninitialized at exit.
+ Otherwise, the caller must provide the buffer. This should only be used
+ for alloc and free functions.
+ _in : The function will only read from the buffer. The caller must provide the
+ buffer and initialize it. Cannot be used with _deref.
+ _out : The function will only write to the buffer. If used on the return value or
+ with _deref, the function will provide the buffer and initialize it.
+ Otherwise, the caller must provide the buffer, and the function will
+ initialize it.
+ _inout : The function may freely read from and write to the buffer. The caller must
+ provide the buffer and initialize it. If used with _deref, the buffer may
+ be reallocated by the function.
+
+ Size: Describes the total size of the buffer. This may be less than the space actually
+ allocated for the buffer, in which case it describes the accessible amount.
+
+ <> : No buffer size is given. If the type specifies the buffer size (such as
+ with LPSTR and LPWSTR), that amount is used. Otherwise, the buffer is one
+ element long. Must be used with _in, _out, or _inout.
+ _ecount : The buffer size is an explicit element count.
+ _bcount : The buffer size is an explicit byte count.
+
+ Output: Describes how much of the buffer will be initialized by the function. For
+ _inout buffers, this also describes how much is initialized at entry. Omit this
+ category for _in buffers; they must be fully initialized by the caller.
+
+ <> : The type specifies how much is initialized. For instance, a function initializing
+ an LPWSTR must NULL-terminate the string.
+ _full : The function initializes the entire buffer.
+ _part : The function initializes part of the buffer, and explicitly indicates how much.
+
+ NullTerm: States if the present of a '\0' marks the end of valid elements in the buffer.
+ _z : A '\0' indicated the end of the buffer
+ _nz : The buffer may not be null terminated and a '\0' does not indicate the end of the
+ buffer.
+ Optional: Describes if the buffer itself is optional.
+
+ <> : The pointer to the buffer must not be NULL.
+ _opt : The pointer to the buffer might be NULL. It will be checked before being dereferenced.
+
+ Parameters: Gives explicit counts for the size and length of the buffer.
+
+ <> : There is no explicit count. Use when neither _ecount nor _bcount is used.
+ (size) : Only the buffer's total size is given. Use with _ecount or _bcount but not _part.
+ (size,length) : The buffer's total size and initialized length are given. Use with _ecount_part
+ and _bcount_part.
+
+ -------------------------------------------------------------------------------
+ Buffer Annotation Examples
+
+ LWSTDAPI_(BOOL) StrToIntExA(
+ LPCSTR pszString, -- No annotation required, const implies __in.
+ DWORD dwFlags,
+ __out int *piRet -- A pointer whose dereference will be filled in.
+ );
+
+ void MyPaintingFunction(
+ __in HWND hwndControl, -- An initialized read-only parameter.
+ __in_opt HDC hdcOptional, -- An initialized read-only parameter that might be NULL.
+ __inout IPropertyStore *ppsStore -- An initialized parameter that may be freely used
+ -- and modified.
+ );
+
+ LWSTDAPI_(BOOL) PathCompactPathExA(
+ __out_ecount(cchMax) LPSTR pszOut, -- A string buffer with cch elements that will
+ -- be NULL terminated on exit.
+ LPCSTR pszSrc, -- No annotation required, const implies __in.
+ UINT cchMax,
+ DWORD dwFlags
+ );
+
+ HRESULT SHLocalAllocBytes(
+ size_t cb,
+ __deref_bcount(cb) T **ppv -- A pointer whose dereference will be set to an
+ -- uninitialized buffer with cb bytes.
+ );
+
+ __inout_bcount_full(cb) : A buffer with cb elements that is fully initialized at
+ entry and exit, and may be written to by this function.
+
+ __out_ecount_part(count, *countOut) : A buffer with count elements that will be
+ partially initialized by this function. The function indicates how much it
+ initialized by setting *countOut.
+
+ -------------------------------------------------------------------------------
+ Advanced Annotations
+
+ Advanced annotations describe behavior that is not expressible with the regular
+ buffer macros. These may be used either to annotate buffer parameters that involve
+ complex or conditional behavior, or to enrich existing annotations with additional
+ information.
+
+ __success(expr) f :
+ <expr> indicates whether function f succeeded or not. If <expr> is true at exit,
+ all the function's guarantees (as given by other annotations) must hold. If <expr>
+ is false at exit, the caller should not expect any of the function's guarantees
+ to hold. If not used, the function must always satisfy its guarantees. Added
+ automatically to functions that indicate success in standard ways, such as by
+ returning an HRESULT.
+
+ __nullterminated p :
+ Pointer p is a buffer that may be read or written up to and including the first
+ NULL character or pointer. May be used on typedefs, which marks valid (properly
+ initialized) instances of that type as being NULL-terminated.
+
+ __nullnullterminated p :
+ Pointer p is a buffer that may be read or written up to and including the first
+ sequence of two NULL characters or pointers. May be used on typedefs, which marks
+ valid instances of that type as being double-NULL terminated.
+
+ __reserved v :
+ Value v must be 0/NULL, reserved for future use.
+
+ __checkReturn v :
+ Return value v must not be ignored by callers of this function.
+
+ __typefix(ctype) v :
+ Value v should be treated as an instance of ctype, rather than its declared type.
+
+ __override f :
+ Specify C#-style 'override' behaviour for overriding virtual methods.
+
+ __callback f :
+ Function f can be used as a function pointer.
+
+ __format_string p :
+ Pointer p is a string that contains % markers in the style of printf.
+
+ __blocksOn(resource) f :
+ Function f blocks on the resource 'resource'.
+
+ __fallthrough :
+ Annotates switch statement labels where fall-through is desired, to distinguish
+ from forgotten break statements.
+
+ -------------------------------------------------------------------------------
+ Advanced Annotation Examples
+
+ __success(return == TRUE) LWSTDAPI_(BOOL)
+ PathCanonicalizeA(__out_ecount(MAX_PATH) LPSTR pszBuf, LPCSTR pszPath) :
+ pszBuf is only guaranteed to be NULL-terminated when TRUE is returned.
+
+ typedef __nullterminated WCHAR* LPWSTR : Initialized LPWSTRs are NULL-terminated strings.
+
+ __out_ecount(cch) __typefix(LPWSTR) void *psz : psz is a buffer parameter which will be
+ a NULL-terminated WCHAR string at exit, and which initially contains cch WCHARs.
+
+ -------------------------------------------------------------------------------
+*/
+
+#define __specstrings
+
+#ifdef __cplusplus
+#ifndef __nothrow
+# define __nothrow __declspec(nothrow)
+#endif
+extern "C" {
+#else
+#ifndef __nothrow
+# define __nothrow
+#endif
+#endif /* #ifdef __cplusplus */
+
+
+/*
+ -------------------------------------------------------------------------------
+ Helper Macro Definitions
+
+ These express behavior common to many of the high-level annotations.
+ DO NOT USE THESE IN YOUR CODE.
+ -------------------------------------------------------------------------------
+*/
+
+/*
+The helper annotations are only understood by the compiler version used by various
+defect detection tools. When the regular compiler is running, they are defined into
+nothing, and do not affect the compiled code.
+*/
+
+#if !defined(__midl) && defined(_PREFAST_)
+
+ /*
+ In the primitive __declspec("SAL_*") annotations "SAL" stands for Standard
+ Annotation Language. These __declspec("SAL_*") annotations are the
+ primitives the compiler understands and all high-level SpecString MACROs
+ will decompose into these primivates.
+ */
+
+ #define SPECSTRINGIZE( x ) #x
+
+ /*
+ __null p
+ __notnull p
+ __maybenull p
+
+ Annotates a pointer p. States that pointer p is null. Commonly used
+ in the negated form __notnull or the possibly null form __maybenull.
+ */
+
+ #define __null __declspec("SAL_null")
+ #define __notnull __declspec("SAL_notnull")
+ #define __maybenull __declspec("SAL_maybenull")
+
+ /*
+ __readonly l
+ __notreadonly l
+ __mabyereadonly l
+
+ Annotates a location l. States that location l is not modified after
+ this point. If the annotation is placed on the precondition state of
+ a function, the restriction only applies until the postcondition state
+ of the function. __maybereadonly states that the annotated location
+ may be modified, whereas __notreadonly states that a location must be
+ modified.
+ */
+
+ #define __readonly __declspec("SAL_readonly")
+ #define __notreadonly __declspec("SAL_notreadonly")
+ #define __maybereadonly __declspec("SAL_maybereadonly")
+
+ /*
+ __valid v
+ __notvalid v
+ __maybevalid v
+
+ Annotates any value v. States that the value satisfies all properties of
+ valid values of its type. For example, for a string buffer, valid means
+ that the buffer pointer is either NULL or points to a NULL-terminated string.
+ */
+
+ #define __valid __declspec("SAL_valid")
+ #define __notvalid __declspec("SAL_notvalid")
+ #define __maybevalid __declspec("SAL_maybevalid")
+
+ /*
+ __readableTo(extent) p
+
+ Annotates a buffer pointer p. If the buffer can be read, extent describes
+ how much of the buffer is readable. For a reader of the buffer, this is
+ an explicit permission to read up to that amount, rather than a restriction to
+ read only up to it.
+ */
+
+ #define __readableTo(extent) __declspec("SAL_readableTo("SPECSTRINGIZE(extent)")")
+
+ /*
+
+ __elem_readableTo(size)
+
+ Annotates a buffer pointer p as being readable to size elements.
+ */
+
+ #define __elem_readableTo(size) __declspec("SAL_readableTo(elementCount("SPECSTRINGIZE(size)"))")
+
+ /*
+ __byte_readableTo(size)
+
+ Annotates a buffer pointer p as being readable to size bytes.
+ */
+ #define __byte_readableTo(size) __declspec("SAL_readableTo(byteCount("SPECSTRINGIZE(size)"))")
+
+ /*
+ __writableTo(extent) p
+
+ Annotates a buffer pointer p. If the buffer can be modified, extent
+ describes how much of the buffer is writable (usually the allocation
+ size). For a writer of the buffer, this is an explicit permission to
+ write up to that amount, rather than a restriction to write only up to it.
+ */
+ #define __writableTo(size) __declspec("SAL_writableTo("SPECSTRINGIZE(size)")")
+
+ /*
+ __elem_writableTo(size)
+
+ Annotates a buffer pointer p as being writable to size elements.
+ */
+ #define __elem_writableTo(size) __declspec("SAL_writableTo(elementCount("SPECSTRINGIZE(size)"))")
+
+ /*
+ __byte_writableTo(size)
+
+ Annotates a buffer pointer p as being writable to size bytes.
+ */
+ #define __byte_writableTo(size) __declspec("SAL_writableTo(byteCount("SPECSTRINGIZE(size)"))")
+
+ /*
+ __deref p
+
+ Annotates a pointer p. The next annotation applies one dereference down
+ in the type. If readableTo(p, size) then the next annotation applies to
+ all elements *(p+i) for which i satisfies the size. If p is a pointer
+ to a struct, the next annotation applies to all fields of the struct.
+ */
+ #define __deref __declspec("SAL_deref")
+
+ /*
+ __pre __next_annotation
+
+ The next annotation applies in the precondition state
+ */
+ #define __pre __declspec("SAL_pre")
+
+ /*
+ __post __next_annotation
+
+ The next annotation applies in the postcondition state
+ */
+ #define __post __declspec("SAL_post")
+
+ /*
+ __precond(<expr>)
+
+ When <expr> is true, the next annotation applies in the precondition state
+ (currently not enabled)
+ */
+ #define __precond(expr) __pre
+
+ /*
+ __postcond(<expr>)
+
+ When <expr> is true, the next annotation applies in the postcondition state
+ (currently not enabled)
+ */
+ #define __postcond(expr) __post
+
+ /*
+ __exceptthat
+
+ Given a set of annotations Q containing __exceptthat maybeP, the effect of
+ the except clause is to erase any P or notP annotations (explicit or
+ implied) within Q at the same level of dereferencing that the except
+ clause appears, and to replace it with maybeP.
+
+ Example 1: __valid __exceptthat __maybenull on a pointer p means that the
+ pointer may be null, and is otherwise valid, thus overriding
+ the implicit notnull annotation implied by __valid on
+ pointers.
+
+ Example 2: __valid __deref __exceptthat __maybenull on an int **p means
+ that p is not null (implied by valid), but the elements
+ pointed to by p could be null, and are otherwise valid.
+ */
+ #define __exceptthat __declspec("SAL_except")
+ #define __execeptthat __exceptthat
+
+ /*
+ _refparam
+
+ Added to all out parameter macros to indicate that they are all reference
+ parameters.
+ */
+ #define __refparam __deref __notreadonly
+
+ /*
+ __inner_*
+
+ Helper macros that directly correspond to certain high-level annotations.
+
+ */
+
+ /*
+ Macros to classify the entrypoints and indicate their category.
+
+ Pre-defined control point categories include: RPC, LPC, DeviceDriver, UserToKernel, ISAPI, COM.
+
+ */
+ #define __inner_control_entrypoint(category) __declspec("SAL_entrypoint(controlEntry, "SPECSTRINGIZE(category)")")
+
+ /*
+ Pre-defined data entry point categories include: Registry, File, Network.
+ */
+ #define __inner_data_entrypoint(category) __declspec("SAL_entrypoint(dataEntry, "SPECSTRINGIZE(category)")")
+
+ #define __inner_success(expr) __declspec("SAL_success("SPECSTRINGIZE(expr)")")
+ #define __inner_checkReturn __declspec("SAL_checkReturn")
+ #define __inner_typefix(ctype) __declspec("SAL_typefix("SPECSTRINGIZE(ctype)")")
+ #define __inner_override __declspec("__override")
+ #define __inner_callback __declspec("__callback")
+ #define __inner_blocksOn(resource) __declspec("SAL_blocksOn("SPECSTRINGIZE(resource)")")
+ #define __inner_fallthrough_dec __inline __nothrow void __FallThrough() {}
+ #define __inner_fallthrough __FallThrough();
+
+#else
+ #define __null
+ #define __notnull
+ #define __maybenull
+ #define __readonly
+ #define __notreadonly
+ #define __maybereadonly
+ #define __valid
+ #define __notvalid
+ #define __maybevalid
+ #define __readableTo(extent)
+ #define __elem_readableTo(size)
+ #define __byte_readableTo(size)
+ #define __writableTo(size)
+ #define __elem_writableTo(size)
+ #define __byte_writableTo(size)
+ #define __deref
+ #define __pre
+ #define __post
+ #define __precond(expr)
+ #define __postcond(expr)
+ #define __exceptthat
+ #define __execeptthat
+ #define __inner_success(expr)
+ #define __inner_checkReturn
+ #define __inner_typefix(ctype)
+ #define __inner_override
+ #define __inner_callback
+ #define __inner_blocksOn(resource)
+ #define __inner_fallthrough_dec
+ #define __inner_fallthrough
+ #define __refparam
+ #define __inner_control_entrypoint(category)
+ #define __inner_data_entrypoint(category)
+#endif /* #if !defined(__midl) && defined(_PREFAST_) */
+
+/*
+-------------------------------------------------------------------------------
+Buffer Annotation Definitions
+
+Any of these may be used to directly annotate functions, but only one should
+be used for each parameter. To determine which annotation to use for a given
+buffer, use the table in the buffer annotations section.
+-------------------------------------------------------------------------------
+*/
+
+#define __ecount(size) __notnull __elem_writableTo(size)
+#define __bcount(size) __notnull __byte_writableTo(size)
+#define __in __pre __valid __pre __deref __readonly
+#define __in_ecount(size) __in __pre __elem_readableTo(size)
+#define __in_bcount(size) __in __pre __byte_readableTo(size)
+#define __in_z __in __pre __nullterminated
+#define __in_ecount_z(size) __in_ecount(size) __pre __nullterminated
+#define __in_bcount_z(size) __in_bcount(size) __pre __nullterminated
+#define __in_nz __in
+#define __in_ecount_nz(size) __in_ecount(size)
+#define __in_bcount_nz(size) __in_bcount(size)
+#define __out __ecount(1) __post __valid __refparam
+#define __out_ecount(size) __ecount(size) __post __valid __refparam
+#define __out_bcount(size) __bcount(size) __post __valid __refparam
+#define __out_ecount_part(size,length) __out_ecount(size) __post __elem_readableTo(length)
+#define __out_bcount_part(size,length) __out_bcount(size) __post __byte_readableTo(length)
+#define __out_ecount_full(size) __out_ecount_part(size,size)
+#define __out_bcount_full(size) __out_bcount_part(size,size)
+#define __out_z __post __valid __refparam __post __nullterminated
+#define __out_z_opt __post __valid __refparam __post __nullterminated __exceptthat __maybenull
+#define __out_ecount_z(size) __ecount(size) __post __valid __refparam __post __nullterminated
+#define __out_bcount_z(size) __bcount(size) __post __valid __refparam __post __nullterminated
+#define __out_ecount_part_z(size,length) __out_ecount_part(size,length) __post __nullterminated
+#define __out_bcount_part_z(size,length) __out_bcount_part(size,length) __post __nullterminated
+#define __out_ecount_full_z(size) __out_ecount_full(size) __post __nullterminated
+#define __out_bcount_full_z(size) __out_bcount_full(size) __post __nullterminated
+#define __out_nz __post __valid __refparam __post
+#define __out_nz_opt __post __valid __refparam __post __exceptthat __maybenull
+#define __out_ecount_nz(size) __ecount(size) __post __valid __refparam
+#define __out_bcount_nz(size) __bcount(size) __post __valid __refparam
+#define __inout __pre __valid __post __valid __refparam
+#define __inout_ecount(size) __out_ecount(size) __pre __valid
+#define __inout_bcount(size) __out_bcount(size) __pre __valid
+#define __inout_ecount_part(size,length) __out_ecount_part(size,length) __pre __valid __pre __elem_readableTo(length)
+#define __inout_bcount_part(size,length) __out_bcount_part(size,length) __pre __valid __pre __byte_readableTo(length)
+#define __inout_ecount_full(size) __inout_ecount_part(size,size)
+#define __inout_bcount_full(size) __inout_bcount_part(size,size)
+#define __inout_z __inout __pre __nullterminated __post __nullterminated
+#define __inout_ecount_z(size) __inout_ecount(size) __pre __nullterminated __post __nullterminated
+#define __inout_bcount_z(size) __inout_bcount(size) __pre __nullterminated __post __nullterminated
+#define __inout_nz __inout
+#define __inout_ecount_nz(size) __inout_ecount(size)
+#define __inout_bcount_nz(size) __inout_bcount(size)
+#define __ecount_opt(size) __ecount(size) __exceptthat __maybenull
+#define __bcount_opt(size) __bcount(size) __exceptthat __maybenull
+#define __in_opt __in __exceptthat __maybenull
+#define __in_ecount_opt(size) __in_ecount(size) __exceptthat __maybenull
+#define __in_bcount_opt(size) __in_bcount(size) __exceptthat __maybenull
+#define __in_z_opt __in_opt __pre __nullterminated
+#define __in_ecount_z_opt(size) __in_ecount_opt(size) __pre __nullterminated
+#define __in_bcount_z_opt(size) __in_bcount_opt(size) __pre __nullterminated
+#define __in_nz_opt __in_opt
+#define __in_ecount_nz_opt(size) __in_ecount_opt(size)
+#define __in_bcount_nz_opt(size) __in_bcount_opt(size)
+#define __out_opt __out __exceptthat __maybenull
+#define __out_ecount_opt(size) __out_ecount(size) __exceptthat __maybenull
+#define __out_bcount_opt(size) __out_bcount(size) __exceptthat __maybenull
+#define __out_ecount_part_opt(size,length) __out_ecount_part(size,length) __exceptthat __maybenull
+#define __out_bcount_part_opt(size,length) __out_bcount_part(size,length) __exceptthat __maybenull
+#define __out_ecount_full_opt(size) __out_ecount_full(size) __exceptthat __maybenull
+#define __out_bcount_full_opt(size) __out_bcount_full(size) __exceptthat __maybenull
+#define __out_ecount_z_opt(size) __out_ecount_opt(size) __post __nullterminated
+#define __out_bcount_z_opt(size) __out_bcount_opt(size) __post __nullterminated
+#define __out_ecount_part_z_opt(size,length) __out_ecount_part_opt(size,length) __post __nullterminated
+#define __out_bcount_part_z_opt(size,length) __out_bcount_part_opt(size,length) __post __nullterminated
+#define __out_ecount_full_z_opt(size) __out_ecount_full_opt(size) __post __nullterminated
+#define __out_bcount_full_z_opt(size) __out_bcount_full_opt(size) __post __nullterminated
+#define __out_ecount_nz_opt(size) __out_ecount_opt(size) __post __nullterminated
+#define __out_bcount_nz_opt(size) __out_bcount_opt(size) __post __nullterminated
+#define __inout_opt __inout __exceptthat __maybenull
+#define __inout_ecount_opt(size) __inout_ecount(size) __exceptthat __maybenull
+#define __inout_bcount_opt(size) __inout_bcount(size) __exceptthat __maybenull
+#define __inout_ecount_part_opt(size,length) __inout_ecount_part(size,length) __exceptthat __maybenull
+#define __inout_bcount_part_opt(size,length) __inout_bcount_part(size,length) __exceptthat __maybenull
+#define __inout_ecount_full_opt(size) __inout_ecount_full(size) __exceptthat __maybenull
+#define __inout_bcount_full_opt(size) __inout_bcount_full(size) __exceptthat __maybenull
+#define __inout_z_opt __inout_opt __pre __nullterminated __post __nullterminated
+#define __inout_ecount_z_opt(size) __inout_ecount_opt(size) __pre __nullterminated __post __nullterminated
+#define __inout_ecount_z_opt(size) __inout_ecount_opt(size) __pre __nullterminated __post __nullterminated
+#define __inout_bcount_z_opt(size) __inout_bcount_opt(size)
+#define __inout_nz_opt __inout_opt
+#define __inout_ecount_nz_opt(size) __inout_ecount_opt(size)
+#define __inout_bcount_nz_opt(size) __inout_bcount_opt(size)
+#define __deref_ecount(size) __ecount(1) __post __elem_readableTo(1) __post __deref __notnull __post __deref __elem_writableTo(size)
+#define __deref_bcount(size) __ecount(1) __post __elem_readableTo(1) __post __deref __notnull __post __deref __byte_writableTo(size)
+#define __deref_out __deref_ecount(1) __post __deref __valid __refparam
+#define __deref_out_ecount(size) __deref_ecount(size) __post __deref __valid __refparam
+#define __deref_out_bcount(size) __deref_bcount(size) __post __deref __valid __refparam
+#define __deref_out_ecount_part(size,length) __deref_out_ecount(size) __post __deref __elem_readableTo(length)
+#define __deref_out_bcount_part(size,length) __deref_out_bcount(size) __post __deref __byte_readableTo(length)
+#define __deref_out_ecount_full(size) __deref_out_ecount_part(size,size)
+#define __deref_out_bcount_full(size) __deref_out_bcount_part(size,size)
+#define __deref_out_z __post __deref __valid __refparam __post __deref __nullterminated
+#define __deref_out_ecount_z(size) __deref_out_ecount(size) __post __deref __nullterminated
+#define __deref_out_bcount_z(size) __deref_out_ecount(size) __post __deref __nullterminated
+#define __deref_out_nz __deref_out
+#define __deref_out_ecount_nz(size) __deref_out_ecount(size)
+#define __deref_out_bcount_nz(size) __deref_out_ecount(size)
+#define __deref_inout __notnull __elem_readableTo(1) __pre __deref __valid __post __deref __valid __refparam
+#define __deref_inout_z __deref_inout __pre __deref __nullterminated __post __deref __nullterminated
+#define __deref_inout_ecount(size) __deref_inout __pre __deref __elem_writableTo(size) __post __deref __elem_writableTo(size)
+#define __deref_inout_bcount(size) __deref_inout __pre __deref __byte_writableTo(size) __post __deref __byte_writableTo(size)
+#define __deref_inout_ecount_part(size,length) __deref_inout_ecount(size) __pre __deref __elem_readableTo(length) __post __deref __elem_readableTo(length)
+#define __deref_inout_bcount_part(size,length) __deref_inout_bcount(size) __pre __deref __byte_readableTo(length) __post __deref __byte_readableTo(length)
+#define __deref_inout_ecount_full(size) __deref_inout_ecount_part(size,size)
+#define __deref_inout_bcount_full(size) __deref_inout_bcount_part(size,size)
+#define __deref_inout_z __deref_inout __pre __deref __nullterminated __post __deref __nullterminated
+#define __deref_inout_ecount_z(size) __deref_inout_ecount(size) __pre __deref __nullterminated __post __deref __nullterminated
+#define __deref_inout_bcount_z(size) __deref_inout_bcount(size) __pre __deref __nullterminated __post __deref __nullterminated
+#define __deref_inout_nz __deref_inout
+#define __deref_inout_ecount_nz(size) __deref_inout_ecount(size)
+#define __deref_inout_bcount_nz(size) __deref_inout_ecount(size)
+#define __deref_ecount_opt(size) __deref_ecount(size) __post __deref __exceptthat __maybenull
+#define __deref_bcount_opt(size) __deref_bcount(size) __post __deref __exceptthat __maybenull
+#define __deref_out_opt __deref_out __post __deref __exceptthat __maybenull
+#define __deref_out_ecount_opt(size) __deref_out_ecount(size) __post __deref __exceptthat __maybenull
+#define __deref_out_bcount_opt(size) __deref_out_bcount(size) __post __deref __exceptthat __maybenull
+#define __deref_out_ecount_part_opt(size,length) __deref_out_ecount_part(size,length) __post __deref __exceptthat __maybenull
+#define __deref_out_bcount_part_opt(size,length) __deref_out_bcount_part(size,length) __post __deref __exceptthat __maybenull
+#define __deref_out_ecount_full_opt(size) __deref_out_ecount_full(size) __post __deref __exceptthat __maybenull
+#define __deref_out_bcount_full_opt(size) __deref_out_bcount_full(size) __post __deref __exceptthat __maybenull
+#define __deref_out_z_opt __post __deref __valid __refparam __execeptthat __maybenull __post __deref __nullterminated
+#define __deref_out_ecount_z_opt(size) __deref_out_ecount_opt(size) __post __deref __nullterminated
+#define __deref_out_bcount_z_opt(size) __deref_out_bcount_opt(size) __post __deref __nullterminated
+#define __deref_out_nz_opt __deref_out_opt
+#define __deref_out_ecount_nz_opt(size) __deref_out_ecount_opt(size)
+#define __deref_out_bcount_nz_opt(size) __deref_out_bcount_opt(size)
+#define __deref_inout_opt __deref_inout __pre __deref __exceptthat __maybenull __post __deref __exceptthat __maybenull
+#define __deref_inout_ecount_opt(size) __deref_inout_ecount(size) __pre __deref __exceptthat __maybenull __post __deref __exceptthat __maybenull
+#define __deref_inout_bcount_opt(size) __deref_inout_bcount(size) __pre __deref __exceptthat __maybenull __post __deref __exceptthat __maybenull
+#define __deref_inout_ecount_part_opt(size,length) __deref_inout_ecount_part(size,length) __pre __deref __exceptthat __maybenull __post __deref __exceptthat __maybenull
+#define __deref_inout_bcount_part_opt(size,length) __deref_inout_bcount_part(size,length) __pre __deref __exceptthat __maybenull __post __deref __exceptthat __maybenull
+#define __deref_inout_ecount_full_opt(size) __deref_inout_ecount_full(size) __pre __deref __exceptthat __maybenull __post __deref __exceptthat __maybenull
+#define __deref_inout_bcount_full_opt(size) __deref_inout_bcount_full(size) __pre __deref __exceptthat __maybenull __post __deref __exceptthat __maybenull
+#define __deref_inout_z_opt __deref_inout_opt __pre __deref __nullterminated __post __deref __nullterminated
+#define __deref_inout_ecount_z_opt(size) __deref_inout_ecount_opt(size) __pre __deref __nullterminated __post __deref __nullterminated
+#define __deref_inout_bcount_z_opt(size) __deref_inout_bcount_opt(size) __pre __deref __nullterminated __post __deref __nullterminated
+#define __deref_inout_nz_opt __deref_inout_opt
+#define __deref_inout_ecount_nz_opt(size) __deref_inout_ecount_opt(size)
+#define __deref_inout_bcount_nz_opt(size) __deref_inout_bcount_opt(size)
+#define __deref_opt_ecount(size) __deref_ecount(size) __exceptthat __maybenull
+#define __deref_opt_bcount(size) __deref_bcount(size) __exceptthat __maybenull
+#define __deref_opt_out __deref_out __exceptthat __maybenull
+#define __deref_opt_out_z __deref_opt_out __post __deref __nullterminated
+#define __deref_opt_out_ecount(size) __deref_out_ecount(size) __exceptthat __maybenull
+#define __deref_opt_out_bcount(size) __deref_out_bcount(size) __exceptthat __maybenull
+#define __deref_opt_out_ecount_part(size,length) __deref_out_ecount_part(size,length) __exceptthat __maybenull
+#define __deref_opt_out_bcount_part(size,length) __deref_out_bcount_part(size,length) __exceptthat __maybenull
+#define __deref_opt_out_ecount_full(size) __deref_out_ecount_full(size) __exceptthat __maybenull
+#define __deref_opt_out_bcount_full(size) __deref_out_bcount_full(size) __exceptthat __maybenull
+#define __deref_opt_inout __deref_inout __exceptthat __maybenull
+#define __deref_opt_inout_ecount(size) __deref_inout_ecount(size) __exceptthat __maybenull
+#define __deref_opt_inout_bcount(size) __deref_inout_bcount(size) __exceptthat __maybenull
+#define __deref_opt_inout_ecount_part(size,length) __deref_inout_ecount_part(size,length) __exceptthat __maybenull
+#define __deref_opt_inout_bcount_part(size,length) __deref_inout_bcount_part(size,length) __exceptthat __maybenull
+#define __deref_opt_inout_ecount_full(size) __deref_inout_ecount_full(size) __exceptthat __maybenull
+#define __deref_opt_inout_bcount_full(size) __deref_inout_bcount_full(size) __exceptthat __maybenull
+#define __deref_opt_inout_z __deref_opt_inout __pre __deref __nullterminated __post __deref __nullterminated
+#define __deref_opt_inout_ecount_z(size) __deref_opt_inout_ecount(size) __pre __deref __nullterminated __post __deref __nullterminated
+#define __deref_opt_inout_bcount_z(size) __deref_opt_inout_bcount(size) __pre __deref __nullterminated __post __deref __nullterminated
+#define __deref_opt_inout_nz __deref_opt_inout
+#define __deref_opt_inout_ecount_nz(size) __deref_opt_inout_ecount(size)
+#define __deref_opt_inout_bcount_nz(size) __deref_opt_inout_bcount(size)
+#define __deref_opt_ecount_opt(size) __deref_ecount_opt(size) __exceptthat __maybenull
+#define __deref_opt_bcount_opt(size) __deref_bcount_opt(size) __exceptthat __maybenull
+#define __deref_opt_out_opt __deref_out_opt __exceptthat __maybenull
+#define __deref_opt_out_ecount_opt(size) __deref_out_ecount_opt(size) __exceptthat __maybenull
+#define __deref_opt_out_bcount_opt(size) __deref_out_bcount_opt(size) __exceptthat __maybenull
+#define __deref_opt_out_ecount_part_opt(size,length) __deref_out_ecount_part_opt(size,length) __exceptthat __maybenull
+#define __deref_opt_out_bcount_part_opt(size,length) __deref_out_bcount_part_opt(size,length) __exceptthat __maybenull
+#define __deref_opt_out_ecount_full_opt(size) __deref_out_ecount_full_opt(size) __exceptthat __maybenull
+#define __deref_opt_out_bcount_full_opt(size) __deref_out_bcount_full_opt(size) __exceptthat __maybenull
+#define __deref_opt_out_z_opt __post __deref __valid __refparam __exceptthat __maybenull __pre __deref __exceptthat __maybenull __post __deref __exceptthat __maybenull __post __deref __nullterminated
+#define __deref_opt_out_ecount_z_opt(size) __deref_opt_out_ecount_opt(size) __post __deref __nullterminated
+#define __deref_opt_out_bcount_z_opt(size) __deref_opt_out_bcount_opt(size) __post __deref __nullterminated
+#define __deref_opt_out_nz_opt __deref_opt_out_opt
+#define __deref_opt_out_ecount_nz_opt(size) __deref_opt_out_ecount_opt(size)
+#define __deref_opt_out_bcount_nz_opt(size) __deref_opt_out_bcount_opt(size)
+#define __deref_opt_inout_opt __deref_inout_opt __exceptthat __maybenull
+#define __deref_opt_inout_ecount_opt(size) __deref_inout_ecount_opt(size) __exceptthat __maybenull
+#define __deref_opt_inout_bcount_opt(size) __deref_inout_bcount_opt(size) __exceptthat __maybenull
+#define __deref_opt_inout_ecount_part_opt(size,length) __deref_inout_ecount_part_opt(size,length) __exceptthat __maybenull
+#define __deref_opt_inout_bcount_part_opt(size,length) __deref_inout_bcount_part_opt(size,length) __exceptthat __maybenull
+#define __deref_opt_inout_ecount_full_opt(size) __deref_inout_ecount_full_opt(size) __exceptthat __maybenull
+#define __deref_opt_inout_bcount_full_opt(size) __deref_inout_bcount_full_opt(size) __exceptthat __maybenull
+#define __deref_opt_inout_z_opt __deref_opt_inout_opt __pre __deref __nullterminated __post __deref __nullterminated
+#define __deref_opt_inout_ecount_z_opt(size) __deref_opt_inout_ecount_opt(size) __pre __deref __nullterminated __post __deref __nullterminated
+#define __deref_opt_inout_bcount_z_opt(size) __deref_opt_inout_bcount_opt(size) __pre __deref __nullterminated __post __deref __nullterminated
+#define __deref_opt_inout_nz_opt __deref_opt_inout_opt
+#define __deref_opt_inout_ecount_nz_opt(size) __deref_opt_inout_ecount_opt(size)
+#define __deref_opt_inout_bcount_nz_opt(size) __deref_opt_inout_bcount_opt(size)
+
+/*
+-------------------------------------------------------------------------------
+Advanced Annotation Definitions
+
+Any of these may be used to directly annotate functions, and may be used in
+combination with each other or with regular buffer macros. For an explanation
+of each annotation, see the advanced annotations section.
+-------------------------------------------------------------------------------
+*/
+
+#define __success(expr) __inner_success(expr)
+#define __nullterminated __readableTo(sentinel(0))
+#define __nullnullterminated
+#define __reserved __pre __null
+#define __checkReturn __inner_checkReturn
+#define __typefix(ctype) __inner_typefix(ctype)
+#define __override __inner_override
+#define __callback __inner_callback
+#define __format_string
+#define __blocksOn(resource) __inner_blocksOn(resource)
+#define __control_entrypoint(category) __inner_control_entrypoint(category)
+#define __data_entrypoint(category) __inner_data_entrypoint(category)
+
+#ifndef __fallthrough
+ __inner_fallthrough_dec
+ #define __fallthrough __inner_fallthrough
+#endif
+
+#ifndef __analysis_assume
+#ifdef _PREFAST_
+#define __analysis_assume(expr) __assume(expr)
+#else
+#define __analysis_assume(expr)
+#endif
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+