diff --git a/glm/core/dummy.cpp b/glm/core/dummy.cpp index 9e5e4a10..bc041620 100644 --- a/glm/core/dummy.cpp +++ b/glm/core/dummy.cpp @@ -32,13 +32,11 @@ #define GLM_MESSAGES #include "../glm.hpp" -/* - -*/ - int main() { - //auto d = 90.0_deg; + glm::mat4 A(1.0f); + glm::vec4 B(1.0f); + glm::vec4 C = A * B; - //glm::vec3 v{0, 1, 2}; + return 0; } diff --git a/glm/core/type_mat4x4.inl b/glm/core/type_mat4x4.inl index 8d350dfd..74865095 100644 --- a/glm/core/type_mat4x4.inl +++ b/glm/core/type_mat4x4.inl @@ -708,11 +708,44 @@ namespace detail typename tmat4x4::row_type const & v ) { + __m128 v0 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(0, 0, 0, 0)); + __m128 v1 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(1, 1, 1, 1)); + __m128 v2 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 v3 = _mm_shuffle_ps(v.data, v.data, _MM_SHUFFLE(3, 3, 3, 3)); + + __m128 m0 = _mm_mul_ps(m[0].data, v0); + __m128 m1 = _mm_mul_ps(m[1].data, v1); + __m128 a0 = _mm_add_ps(m0, m1); + + __m128 m2 = _mm_mul_ps(m[2].data, v2); + __m128 m3 = _mm_mul_ps(m[3].data, v3); + __m128 a1 = _mm_add_ps(m2, m3); + + __m128 a2 = _mm_add_ps(a0, a1); + + return typename tmat4x4::col_type(a2); +/* + tmat4x4::col_type const Mov0(v[0]); + tmat4x4::col_type const Mov1(v[1]); + tmat4x4::col_type const Mul0 = m[0] * Mov0; + tmat4x4::col_type const Mul1 = m[1] * Mov1; + tmat4x4::col_type const Add0 = Mul0 * Mul1; + tmat4x4::col_type const Mov2(v[2]); + tmat4x4::col_type const Mov3(v[3]); + tmat4x4::col_type const Mul2 = m[2] * Mov2; + tmat4x4::col_type const Mul3 = m[3] * Mov3; + tmat4x4::col_type const Add1 = Mul2 * Mul3; + tmat4x4::col_type const Add2 = Add0 * Add1; + return Add2; +*/ + +/* return typename tmat4x4::col_type( - m[0][0] * v.x + m[1][0] * v.y + m[2][0] * v.z + m[3][0] * v.w, - m[0][1] * v.x + m[1][1] * v.y + m[2][1] * v.z + m[3][1] * v.w, - m[0][2] * v.x + m[1][2] * v.y + m[2][2] * v.z + m[3][2] * v.w, - m[0][3] * v.x + m[1][3] * v.y + m[2][3] * v.z + m[3][3] * v.w); + m[0][0] * v[0] + m[1][0] * v[1] + m[2][0] * v[2] + m[3][0] * v[3], + m[0][1] * v[0] + m[1][1] * v[1] + m[2][1] * v[2] + m[3][1] * v[3], + m[0][2] * v[0] + m[1][2] * v[1] + m[2][2] * v[2] + m[3][2] * v[3], + m[0][3] * v[0] + m[1][3] * v[1] + m[2][3] * v[2] + m[3][3] * v[3]); +*/ } template @@ -723,10 +756,10 @@ namespace detail ) { return typename tmat4x4::row_type( - m[0][0] * v.x + m[0][1] * v.y + m[0][2] * v.z + m[0][3] * v.w, - m[1][0] * v.x + m[1][1] * v.y + m[1][2] * v.z + m[1][3] * v.w, - m[2][0] * v.x + m[2][1] * v.y + m[2][2] * v.z + m[2][3] * v.w, - m[3][0] * v.x + m[3][1] * v.y + m[3][2] * v.z + m[3][3] * v.w); + m[0][0] * v[0] + m[0][1] * v[1] + m[0][2] * v[2] + m[0][3] * v[3], + m[1][0] * v[0] + m[1][1] * v[1] + m[1][2] * v[2] + m[1][3] * v[3], + m[2][0] * v[0] + m[2][1] * v[1] + m[2][2] * v[2] + m[2][3] * v[3], + m[3][0] * v[0] + m[3][1] * v[1] + m[3][2] * v[2] + m[3][3] * v[3]); } template diff --git a/glm/core/type_vec4.hpp b/glm/core/type_vec4.hpp index f41a6f5b..38adbb05 100644 --- a/glm/core/type_vec4.hpp +++ b/glm/core/type_vec4.hpp @@ -69,6 +69,7 @@ namespace detail struct {value_type r, g, b, a;}; struct {value_type s, t, p, q;}; struct {value_type x, y, z, w;}; + __m128 data; }; # elif(GLM_COMPONENT == GLM_COMPONENT_CXX98) union {value_type x, r, s;}; @@ -115,6 +116,8 @@ namespace detail value_type const & s1, value_type const & s2, value_type const & s3); + GLM_FUNC_DECL explicit tvec4( + __m128 const & v); ////////////////////////////////////// // Convertion scalar constructors diff --git a/glm/core/type_vec4.inl b/glm/core/type_vec4.inl index f8df571d..5af25212 100644 --- a/glm/core/type_vec4.inl +++ b/glm/core/type_vec4.inl @@ -115,6 +115,14 @@ namespace detail w(s4) {} + template + GLM_FUNC_QUALIFIER tvec4::tvec4 + ( + __m128 const & v + ) : + data(v) + {} + ////////////////////////////////////// // Swizzle constructors