diff --git a/glm/detail/func_geometric_simd.inl b/glm/detail/func_geometric_simd.inl index 56962601..93d93a33 100644 --- a/glm/detail/func_geometric_simd.inl +++ b/glm/detail/func_geometric_simd.inl @@ -9,10 +9,7 @@ namespace detail GLM_FUNC_QUALIFIER static float call(tvec4 const& x, tvec4 const& y) { __m128 const dot0 = glm_dot_ss(x.data, y.data); - - float Result = 0; - _mm_store_ss(&Result, dot0); - return Result; + return _mm_cvtss_f32(dot0); } }; }//namespace detail diff --git a/glm/simd/common.h b/glm/simd/common.h index 33959b64..6fa5d720 100644 --- a/glm/simd/common.h +++ b/glm/simd/common.h @@ -3,26 +3,6 @@ #pragma once -#if(GLM_COMPILER & GLM_COMPILER_VC) -#pragma warning(push) -#pragma warning(disable : 4510 4512 4610) -#endif - - union ieee754_QNAN - { - const float f; - struct i - { - const unsigned int mantissa:23, exp:8, sign:1; - }; - - ieee754_QNAN() : f(0.0)/*, mantissa(0x7FFFFF), exp(0xFF), sign(0x0)*/ {} - }; - -#if(GLM_COMPILER & GLM_COMPILER_VC) -#pragma warning(pop) -#endif - static const __m128 GLM_VAR_USED glm_zero = _mm_setzero_ps(); static const __m128 GLM_VAR_USED glm_one = _mm_set_ps1(1.0f); static const __m128 GLM_VAR_USED glm_half = _mm_set_ps1(0.5f); @@ -30,15 +10,13 @@ static const __m128 GLM_VAR_USED glm_minus_one = _mm_set_ps1(-1.0f); static const __m128 GLM_VAR_USED glm_two = _mm_set_ps1(2.0f); static const __m128 GLM_VAR_USED glm_three = _mm_set_ps1(3.0f); -static const ieee754_QNAN glm_abs_mask; -static const __m128 GLM_VAR_USED glm_abs4_mask = _mm_set_ps1(glm_abs_mask.f); static const __m128 GLM_VAR_USED glm_epi32_sign_mask = _mm_castsi128_ps(_mm_set1_epi32(static_cast(0x80000000))); static const __m128 GLM_VAR_USED glm_ps_2pow23 = _mm_set_ps1(8388608.0f); static const __m128 GLM_VAR_USED glm_ps_1 = _mm_set_ps1(1.0f); GLM_FUNC_QUALIFIER __m128 glm_abs_ps(__m128 x) { - return _mm_and_ps(glm_abs4_mask, x); + return _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF))); } //sign diff --git a/glm/simd/geometric.h b/glm/simd/geometric.h index 1c839d93..4143682d 100644 --- a/glm/simd/geometric.h +++ b/glm/simd/geometric.h @@ -9,6 +9,11 @@ GLM_FUNC_QUALIFIER __m128 glm_dot_ps(__m128 v1, __m128 v2) { # if GLM_ARCH & GLM_ARCH_AVX return _mm_dp_ps(v1, v2, 0xff); +# elif GLM_ARCH & GLM_ARCH_SSE3 + __m128 const Mul0 = _mm_mul_ps(v1, v2); + __m128 const Hadd0 = _mm_hadd_ps(Mul0, Mul0); + __m128 const Hadd1 = _mm_hadd_ps(Hadd0, Hadd0); + return Hadd1; # else __m128 const mul0 = _mm_mul_ps(v1, v2); __m128 const swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1)); @@ -21,12 +26,21 @@ GLM_FUNC_QUALIFIER __m128 glm_dot_ps(__m128 v1, __m128 v2) GLM_FUNC_QUALIFIER __m128 glm_dot_ss(__m128 v1, __m128 v2) { - __m128 const mul0 = _mm_mul_ps(v1, v2); - __m128 const mov0 = _mm_movehl_ps(mul0, mul0); - __m128 const add0 = _mm_add_ps(mov0, mul0); - __m128 const swp1 = _mm_shuffle_ps(add0, add0, 1); - __m128 const add1 = _mm_add_ss(add0, swp1); - return add1; +# if GLM_ARCH & GLM_ARCH_AVX + return _mm_dp_ps(v1, v2, 0xff); +# elif GLM_ARCH & GLM_ARCH_SSE3 + __m128 const Mul0 = _mm_mul_ps(v1, v2); + __m128 const Hadd0 = _mm_hadd_ps(Mul0, Mul0); + __m128 const Hadd1 = _mm_hadd_ps(Hadd0, Hadd0); + return Hadd1; +# else + __m128 const mul0 = _mm_mul_ps(v1, v2); + __m128 const mov0 = _mm_movehl_ps(mul0, mul0); + __m128 const add0 = _mm_add_ps(mov0, mul0); + __m128 const swp1 = _mm_shuffle_ps(add0, add0, 1); + __m128 const add1 = _mm_add_ss(add0, swp1); + return add1; +# endif } GLM_FUNC_QUALIFIER __m128 glm_len_ps(__m128 x)