Updated fast versions

This commit is contained in:
Christophe Riccio 2011-02-08 14:16:54 +00:00
parent a53acffaf4
commit f2a8d5a976
2 changed files with 30 additions and 8 deletions

View file

@ -379,6 +379,12 @@ namespace glm
detail::fvec4SIMD simdNormalize(
detail::fvec4SIMD const & x);
//! Returns a vector in the same direction as x but with length of 1.
//! Less accurate but much faster than simdNormalize.
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
detail::fvec4SIMD simdFastNormalize(
detail::fvec4SIMD const & x);
//! If dot(Nref, I) < 0.0, return N, otherwise, return -N.
//! (From GLM_GTX_simd_vec4 extension, geometry functions)
detail::fvec4SIMD simdFaceforward(
@ -407,12 +413,14 @@ namespace glm
detail::fvec4SIMD simdSqrt(
detail::fvec4SIMD const & x);
//! Returns the positive square root of x with the nicest quality but very slow
//! Returns the positive square root of x with the nicest quality but very slow.
//! Slightly more accurate but much slower than simdSqrt.
//! (From GLM_GTX_simd_vec4 extension, exponential function)
detail::fvec4SIMD simdNiceSqrt(
detail::fvec4SIMD const & x);
//! Returns the positive square root of x but less accurate than simdSqrt but much faster.
//! Returns the positive square root of x
//! Less accurate but much faster than simdSqrt.
//! (From GLM_GTX_simd_vec4 extension, exponential function)
detail::fvec4SIMD simdFastSqrt(
detail::fvec4SIMD const & x);
@ -422,8 +430,8 @@ namespace glm
detail::fvec4SIMD simdInversesqrt(
detail::fvec4SIMD const & x);
//! Returns the reciprocal of the positive square root of x,
//! faster than simdInversesqrt but less accurate.
//! Returns the reciprocal of the positive square root of x.
//! Faster than simdInversesqrt but less accurate.
//! (From GLM_GTX_simd_vec4 extension, exponential function)
detail::fvec4SIMD simdFastInversesqrt(
detail::fvec4SIMD const & x);

View file

@ -585,7 +585,7 @@ namespace glm
detail::fvec4SIMD const & y
)
{
return detail::sse_dot_ss(x.Data, y.Data);
return detail::sse_dot_ps(x.Data, y.Data);
}
inline detail::fvec4SIMD simdCross
@ -602,7 +602,21 @@ namespace glm
detail::fvec4SIMD const & x
)
{
return detail::sse_nrm_ps(x.Data);
__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
__m128 isr0 = simdInversesqrt(dot0).Data;
__m128 mul0 = _mm_mul_ps(x.Data, isr0);
return mul0;
}
inline detail::fvec4SIMD simdFastNormalize
(
detail::fvec4SIMD const & x
)
{
__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
__m128 isr0 = simdFastInversesqrt(dot0).Data;
__m128 mul0 = _mm_mul_ps(x.Data, isr0);
return mul0;
}
inline detail::fvec4SIMD simdFaceforward
@ -636,7 +650,7 @@ namespace glm
inline detail::fvec4SIMD simdSqrt(detail::fvec4SIMD const & x)
{
return _mm_mul_ps(simdInversesqrt(x.Data), x.Data);
return _mm_mul_ps(simdInversesqrt(x.Data).Data, x.Data);
}
inline detail::fvec4SIMD simdNiceSqrt(detail::fvec4SIMD const & x)
@ -646,7 +660,7 @@ namespace glm
inline detail::fvec4SIMD simdFastSqrt(detail::fvec4SIMD const & x)
{
return _mm_mul_ps(simdFastInversesqrt(x.Data), x.Data);
return _mm_mul_ps(simdFastInversesqrt(x.Data).Data, x.Data);
}
// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration