From b02890730b6e5873d39a902b41942526e65511f6 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Mon, 31 Jan 2011 18:14:23 +0000 Subject: [PATCH] Removed some branching --- glm/core/func_common.hpp | 2 +- glm/core/func_common.inl | 2 +- glm/core/intrinsic_common.inl | 30 +++++++++++------------------- glm/gtx/simd_vec4.inl | 17 ++++++++++++++++- 4 files changed, 29 insertions(+), 22 deletions(-) diff --git a/glm/core/func_common.hpp b/glm/core/func_common.hpp index df60a378..2a5f0e92 100644 --- a/glm/core/func_common.hpp +++ b/glm/core/func_common.hpp @@ -26,7 +26,7 @@ namespace glm template genFIType abs(genFIType const & x); - //! Returns 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0. + //! Returns 1.0 if x > 0, 0.0 if x == 0, or -1.0 if x < 0. //! (From GLSL 1.30.08 specification, section 8.3) template genFIType sign(genFIType const & x); diff --git a/glm/core/func_common.inl b/glm/core/func_common.inl index 70d82123..53990a6c 100644 --- a/glm/core/func_common.inl +++ b/glm/core/func_common.inl @@ -189,7 +189,7 @@ namespace glm inline genType trunc(genType const & x) { GLM_STATIC_ASSERT(detail::type::is_float, "'trunc' only accept floating-point inputs"); - return x < 0 ? -floor(-x) : floor(x);; + return x < 0 ? -floor(-x) : floor(x); } template diff --git a/glm/core/intrinsic_common.inl b/glm/core/intrinsic_common.inl index ad0d01f0..b11f000d 100644 --- a/glm/core/intrinsic_common.inl +++ b/glm/core/intrinsic_common.inl @@ -34,7 +34,7 @@ namespace detail{ static const ieee754_QNAN absMask; static const __m128 abs4Mask = _mm_set_ps1(absMask.f); - //static const __m128 _epi32_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); + static const __m128 _epi32_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); //static const __m128 _epi32_inv_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)); //static const __m128 _epi32_mant_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7F800000)); //static const __m128 _epi32_inv_mant_mask = _mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF)); @@ -130,24 +130,16 @@ inline __m128 sse_abs_ps(__m128 x) inline __m128 sse_sgn_ps(__m128 x) { - //__m128 cmp0 = _mm_cmpeq_ps(x, zero); - //__m128 cmp1 = _mm_cmple_ps(x, zero); - //__m128 cmp2 = _mm_cmpge_ps(x, zero); + __m128 Neg = _mm_set1_ps(-1.0f); + __m128 Pos = _mm_set1_ps(1.0f); - __m128 result; - __m128 cmp0 = _mm_cmpeq_ps(x, glm::detail::zero); - if(_mm_movemask_ps(cmp0) == 0) - result = glm::detail::zero; - else - { - __m128 cmp1 = _mm_cmpge_ps(x, glm::detail::zero); - //__m128 cmp2 = _mm_cmple_ps(x, glm::detail::zero); - if(_mm_movemask_ps(cmp1) > 0) - result = glm::detail::one; - else //if(_mm_movemask_ps(cmp2) > 0) - result = glm::detail::minus_one; - } - return result; + __m128 Cmp0 = _mm_cmplt_ps(x, zero); + __m128 Cmp1 = _mm_cmpgt_ps(x, zero); + + __m128 And0 = _mm_and_ps(Cmp0, Neg); + __m128 And1 = _mm_and_ps(Cmp1, Pos); + + return _mm_or_ps(And0, And1); } //floor @@ -170,7 +162,7 @@ inline __m128 _mm_trc_ps(__m128 v) //round inline __m128 sse_rnd_ps(__m128 x) { - __m128 and0;// = _mm_and_ps(glm::detail::_epi32_sign_mask, x); + __m128 and0 = _mm_and_ps(glm::detail::_epi32_sign_mask, x); __m128 or0 = _mm_or_ps(and0, glm::detail::_ps_2pow23); __m128 add0 = _mm_add_ps(x, or0); __m128 sub0 = _mm_sub_ps(add0, or0); diff --git a/glm/gtx/simd_vec4.inl b/glm/gtx/simd_vec4.inl index bef92cb2..28b44eb0 100644 --- a/glm/gtx/simd_vec4.inl +++ b/glm/gtx/simd_vec4.inl @@ -280,6 +280,11 @@ namespace glm return Result; } + // Other possible implementation + //float abs(float a) + //{ + // return max(-a, a); + //} detail::fvec4SIMD abs ( detail::fvec4SIMD const & x @@ -309,7 +314,17 @@ namespace glm detail::fvec4SIMD const & x ) { - return detail::sse_flr_ps(detail::sse_abs_ps(x.Data)); + __m128 Flr0 = detail::sse_flr_ps(_mm_sub_ps(_mm_setzero_ps(), x.Data)); + __m128 Sub0 = _mm_sub_ps(Flr0, x.Data); + __m128 Flr1 = detail::sse_flr_ps(x.Data); + + __m128 Cmp0 = _mm_cmplt_ps(x.Data, glm::detail::zero); + __m128 Cmp1 = _mm_cmpnlt_ps(x.Data, glm::detail::zero); + + __m128 And0 = _mm_and_ps(Flr0, Cmp0); + __m128 And1 = _mm_and_ps(Flr1, Cmp1); + + return _mm_or_ps(And0, And1); } inline detail::fvec4SIMD round