diff --git a/glm/gtx/simd_vec4.hpp b/glm/gtx/simd_vec4.hpp index aaf4874d..5c010bda 100644 --- a/glm/gtx/simd_vec4.hpp +++ b/glm/gtx/simd_vec4.hpp @@ -379,6 +379,12 @@ namespace glm detail::fvec4SIMD simdNormalize( detail::fvec4SIMD const & x); + //! Returns a vector in the same direction as x but with length of 1. + //! Less accurate but much faster than simdNormalize. + //! (From GLM_GTX_simd_vec4 extension, geometry functions) + detail::fvec4SIMD simdFastNormalize( + detail::fvec4SIMD const & x); + //! If dot(Nref, I) < 0.0, return N, otherwise, return -N. //! (From GLM_GTX_simd_vec4 extension, geometry functions) detail::fvec4SIMD simdFaceforward( @@ -407,12 +413,14 @@ namespace glm detail::fvec4SIMD simdSqrt( detail::fvec4SIMD const & x); - //! Returns the positive square root of x with the nicest quality but very slow + //! Returns the positive square root of x with the nicest quality but very slow. + //! Slightly more accurate but much slower than simdSqrt. //! (From GLM_GTX_simd_vec4 extension, exponential function) detail::fvec4SIMD simdNiceSqrt( detail::fvec4SIMD const & x); - //! Returns the positive square root of x but less accurate than simdSqrt but much faster. + //! Returns the positive square root of x + //! Less accurate but much faster than simdSqrt. //! (From GLM_GTX_simd_vec4 extension, exponential function) detail::fvec4SIMD simdFastSqrt( detail::fvec4SIMD const & x); @@ -422,8 +430,8 @@ namespace glm detail::fvec4SIMD simdInversesqrt( detail::fvec4SIMD const & x); - //! Returns the reciprocal of the positive square root of x, - //! faster than simdInversesqrt but less accurate. + //! Returns the reciprocal of the positive square root of x. + //! Faster than simdInversesqrt but less accurate. //! (From GLM_GTX_simd_vec4 extension, exponential function) detail::fvec4SIMD simdFastInversesqrt( detail::fvec4SIMD const & x); diff --git a/glm/gtx/simd_vec4.inl b/glm/gtx/simd_vec4.inl index 67613d8e..646b555c 100644 --- a/glm/gtx/simd_vec4.inl +++ b/glm/gtx/simd_vec4.inl @@ -585,7 +585,7 @@ namespace glm detail::fvec4SIMD const & y ) { - return detail::sse_dot_ss(x.Data, y.Data); + return detail::sse_dot_ps(x.Data, y.Data); } inline detail::fvec4SIMD simdCross @@ -602,7 +602,21 @@ namespace glm detail::fvec4SIMD const & x ) { - return detail::sse_nrm_ps(x.Data); + __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); + __m128 isr0 = simdInversesqrt(dot0).Data; + __m128 mul0 = _mm_mul_ps(x.Data, isr0); + return mul0; + } + + inline detail::fvec4SIMD simdFastNormalize + ( + detail::fvec4SIMD const & x + ) + { + __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); + __m128 isr0 = simdFastInversesqrt(dot0).Data; + __m128 mul0 = _mm_mul_ps(x.Data, isr0); + return mul0; } inline detail::fvec4SIMD simdFaceforward @@ -636,7 +650,7 @@ namespace glm inline detail::fvec4SIMD simdSqrt(detail::fvec4SIMD const & x) { - return _mm_mul_ps(simdInversesqrt(x.Data), x.Data); + return _mm_mul_ps(simdInversesqrt(x.Data).Data, x.Data); } inline detail::fvec4SIMD simdNiceSqrt(detail::fvec4SIMD const & x) @@ -646,7 +660,7 @@ namespace glm inline detail::fvec4SIMD simdFastSqrt(detail::fvec4SIMD const & x) { - return _mm_mul_ps(simdFastInversesqrt(x.Data), x.Data); + return _mm_mul_ps(simdFastInversesqrt(x.Data).Data, x.Data); } // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration