Added specialized SSE2 and AVX bool mix

2025-04-07 22:40:17 +00:00 · 2016-05-26 02:47:43 +02:00 · 2016-05-26 02:47:43 +02:00 · 4797ea9540
commit 4797ea9540
parent 740e6d6e56
6 changed files with 40 additions and 9 deletions
--- a/glm/detail/func_common_simd.inl
+++ b/glm/detail/func_common_simd.inl
@ -1,6 +1,33 @@
+/// @ref core
+/// @file glm/detail/func_common_simd.inl
+
+#if GLM_ARCH & GLM_ARCH_SSE2
+
+#include "../simd/common.h"
+
+#include <immintrin.h>
+
 namespace glm{
 namespace detail
 {
+	template <precision P>
+	struct compute_mix_vector<float, bool, P, tvec4>
+	{
+		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & x, tvec4<float, P> const & y, tvec4<bool, P> const & a)
+		{
+			__m128i const Load = _mm_set_epi32(-(int)a.w, -(int)a.z, -(int)a.y, -(int)a.x);
+			__m128 const Mask = _mm_castsi128_ps(Load);
+
+			tvec4<float, P> Result(uninitialize);
+#			if 0 && GLM_ARCH & GLM_ARCH_AVX
+				Result.data = _mm_blendv_ps(x.data, y.data, Mask);
+#			else
+				Result.data = _mm_or_ps(_mm_and_ps(Mask, y.data), _mm_andnot_ps(Mask, x.data));
+#			endif
+			return Result;
+		}
+	};
+
 /*
 	static const __m128 GLM_VAR_USED zero = _mm_setzero_ps();
 	static const __m128 GLM_VAR_USED one = _mm_set_ps1(1.0f);
@ -107,3 +134,5 @@ namespace detail

 }//namespace detail
 }//namespace glm
+
+#endif//GLM_ARCH & GLM_ARCH_SSE2
--- a/glm/detail/type_vec4.hpp
+++ b/glm/detail/type_vec4.hpp
@ -17,6 +17,12 @@
 namespace glm{
 namespace detail
 {
+	template <int Value>
+	struct shuffle_mask
+	{
+		enum{value = Value};
+	};
+
 	template <typename T>
 	struct simd_data
 	{
--- a/glm/detail/type_vec4_simd.inl
+++ b/glm/detail/type_vec4_simd.inl
@ -1,5 +1,5 @@
 /// @ref core
-/// @file glm/detail/type_tvec4_sse2.inl
+/// @file glm/detail/type_tvec4_simd.inl

 #if GLM_ARCH & GLM_ARCH_SSE2

--- a/glm/gtx/simd_vec4.inl
+++ b/glm/gtx/simd_vec4.inl
@ -4,12 +4,6 @@
 namespace glm{
 namespace detail{

-template <int Value>
-struct shuffle_mask
-{
-	enum{value = Value};
-};
-
 //////////////////////////////////////
 // Implicit basic constructors

--- a/glm/simd/common.h
+++ b/glm/simd/common.h
@ -1,6 +1,8 @@
 /// @ref simd
 /// @file glm/simd/common.h

+#pragma once
+
 #if(GLM_COMPILER & GLM_COMPILER_VC)
 #pragma warning(push)
 #pragma warning(disable : 4510 4512 4610)
--- a/test/core/core_func_common.cpp
+++ b/test/core/core_func_common.cpp
@ -444,7 +444,7 @@ namespace mix_

 	entry<glm::vec4, glm::bvec4> TestBVec4[] = 
 	{
-		{glm::vec4(0.0f), glm::vec4(1.0f), glm::bvec4(false), glm::vec4(0.0f)},
+		{glm::vec4(0.0f, 0.0f, 1.0f, 1.0f), glm::vec4(2.0f, 2.0f, 3.0f, 3.0f), glm::bvec4(false, true, false, true), glm::vec4(0.0f, 2.0f, 1.0f, 3.0f)},
 		{glm::vec4(0.0f), glm::vec4(1.0f), glm::bvec4(true), glm::vec4(1.0f)},
 		{glm::vec4(-1.0f), glm::vec4(1.0f), glm::bvec4(false), glm::vec4(-1.0f)},
 		{glm::vec4(-1.0f), glm::vec4(1.0f), glm::bvec4(true), glm::vec4(1.0f)},
@ -1243,10 +1243,10 @@ int main()
 	Error += modf_::test();
 	Error += floatBitsToInt::test();
 	Error += floatBitsToUint::test();
+	Error += mix_::test();
 	Error += step_::test();
 	Error += max_::test();
 	Error += min_::test();
-	Error += mix_::test();
 	Error += round_::test();
 	Error += roundEven::test();
 	Error += isnan_::test();