From b02890730b6e5873d39a902b41942526e65511f6 Mon Sep 17 00:00:00 2001
From: Christophe Riccio <g.truc.creation@gmail.com>
Date: Mon, 31 Jan 2011 18:14:23 +0000
Subject: [PATCH] Removed some branching

---
 glm/core/func_common.hpp      |  2 +-
 glm/core/func_common.inl      |  2 +-
 glm/core/intrinsic_common.inl | 30 +++++++++++-------------------
 glm/gtx/simd_vec4.inl         | 17 ++++++++++++++++-
 4 files changed, 29 insertions(+), 22 deletions(-)
diff --git a/glm/core/func_common.hpp b/glm/core/func_common.hpp
index df60a378..2a5f0e92 100644
--- a/glm/core/func_common.hpp
+++ b/glm/core/func_common.hpp
@@ -26,7 +26,7 @@ namespace glm
 	template <typename genFIType> 
 	genFIType abs(genFIType const & x);
 
-	//! Returns 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0. 
+	//! Returns 1.0 if x > 0, 0.0 if x == 0, or -1.0 if x < 0. 
 	//! (From GLSL 1.30.08 specification, section 8.3)
 	template <typename genFIType> 
 	genFIType sign(genFIType const & x);
diff --git a/glm/core/func_common.inl b/glm/core/func_common.inl
index 70d82123..53990a6c 100644
--- a/glm/core/func_common.inl
+++ b/glm/core/func_common.inl
@@ -189,7 +189,7 @@ namespace glm
     inline genType trunc(genType const & x)
     {
 		GLM_STATIC_ASSERT(detail::type<genType>::is_float, "'trunc' only accept floating-point inputs");
-        return x < 0 ? -floor(-x) : floor(x);;
+        return x < 0 ? -floor(-x) : floor(x);
     }
 
     template <typename valType>
diff --git a/glm/core/intrinsic_common.inl b/glm/core/intrinsic_common.inl
index ad0d01f0..b11f000d 100644
--- a/glm/core/intrinsic_common.inl
+++ b/glm/core/intrinsic_common.inl
@@ -34,7 +34,7 @@ namespace detail{
 	static const ieee754_QNAN absMask;
 	static const __m128 abs4Mask = _mm_set_ps1(absMask.f);
 
-        //static const __m128 _epi32_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
+	static const __m128 _epi32_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
         //static const __m128 _epi32_inv_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF));
         //static const __m128 _epi32_mant_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7F800000));
         //static const __m128 _epi32_inv_mant_mask = _mm_castsi128_ps(_mm_set1_epi32(0x807FFFFF));
@@ -130,24 +130,16 @@ inline __m128 sse_abs_ps(__m128 x)
 
 inline __m128 sse_sgn_ps(__m128 x)
 {
-	//__m128 cmp0 = _mm_cmpeq_ps(x, zero);
-	//__m128 cmp1 = _mm_cmple_ps(x, zero);
-	//__m128 cmp2 = _mm_cmpge_ps(x, zero);
+	__m128 Neg = _mm_set1_ps(-1.0f);
+	__m128 Pos = _mm_set1_ps(1.0f);
 
-	__m128 result;
-	__m128 cmp0 = _mm_cmpeq_ps(x, glm::detail::zero);
-	if(_mm_movemask_ps(cmp0) == 0)
-		result = glm::detail::zero;
-	else
-	{
-		__m128 cmp1 = _mm_cmpge_ps(x, glm::detail::zero);
-		//__m128 cmp2 = _mm_cmple_ps(x, glm::detail::zero);
-		if(_mm_movemask_ps(cmp1) > 0)
-			result = glm::detail::one;
-		else //if(_mm_movemask_ps(cmp2) > 0)
-			result = glm::detail::minus_one;
-	}
-	return result;
+	__m128 Cmp0 = _mm_cmplt_ps(x, zero);
+	__m128 Cmp1 = _mm_cmpgt_ps(x, zero);
+
+	__m128 And0 = _mm_and_ps(Cmp0, Neg);
+	__m128 And1 = _mm_and_ps(Cmp1, Pos);
+
+	return _mm_or_ps(And0, And1);
 }
 
 //floor
@@ -170,7 +162,7 @@ inline __m128 _mm_trc_ps(__m128 v)
 //round
 inline __m128 sse_rnd_ps(__m128 x)
 {
-	__m128 and0;// = _mm_and_ps(glm::detail::_epi32_sign_mask, x);
+	__m128 and0 = _mm_and_ps(glm::detail::_epi32_sign_mask, x);
 	__m128 or0 = _mm_or_ps(and0, glm::detail::_ps_2pow23);
 	__m128 add0 = _mm_add_ps(x, or0);
 	__m128 sub0 = _mm_sub_ps(add0, or0);
diff --git a/glm/gtx/simd_vec4.inl b/glm/gtx/simd_vec4.inl
index bef92cb2..28b44eb0 100644
--- a/glm/gtx/simd_vec4.inl
+++ b/glm/gtx/simd_vec4.inl
@@ -280,6 +280,11 @@ namespace glm
 			return Result;
 		}
 
+		// Other possible implementation
+		//float abs(float a)
+		//{
+		//  return max(-a, a);
+		//}
 		detail::fvec4SIMD abs
 		(
 			detail::fvec4SIMD const & x
@@ -309,7 +314,17 @@ namespace glm
 			detail::fvec4SIMD const & x
 		)
 		{
-			return detail::sse_flr_ps(detail::sse_abs_ps(x.Data));
+			__m128 Flr0 = detail::sse_flr_ps(_mm_sub_ps(_mm_setzero_ps(), x.Data));
+			__m128 Sub0 = _mm_sub_ps(Flr0, x.Data);
+			__m128 Flr1 = detail::sse_flr_ps(x.Data);
+
+			__m128 Cmp0 = _mm_cmplt_ps(x.Data, glm::detail::zero);
+			__m128 Cmp1 = _mm_cmpnlt_ps(x.Data, glm::detail::zero);
+
+			__m128 And0 = _mm_and_ps(Flr0, Cmp0);
+			__m128 And1 = _mm_and_ps(Flr1, Cmp1);
+
+			return _mm_or_ps(And0, And1);
 		}
 
 		inline detail::fvec4SIMD round