Added SIMD optimization for geometric functions

2025-04-07 22:40:17 +00:00 · 2016-05-28 20:17:34 +02:00 · 2016-05-28 20:17:34 +02:00 · fb66c79ca4
commit fb66c79ca4
parent 29fa0f1607
2 changed files with 54 additions and 6 deletions
--- a/glm/detail/func_geometric.inl
+++ b/glm/detail/func_geometric.inl
@ -51,6 +51,31 @@ namespace detail
 			return (tmp.x + tmp.y) + (tmp.z + tmp.w);
 		}
 	};
+
+	template <typename T, precision P>
+	struct compute_cross
+	{
+		GLM_FUNC_QUALIFIER static tvec3<T, P> call(tvec3<T, P> const & x, tvec3<T, P> const & y)
+		{
+			GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'cross' accepts only floating-point inputs");
+
+			return tvec3<T, P>(
+				x.y * y.z - y.y * x.z,
+				x.z * y.x - y.z * x.x,
+				x.x * y.y - y.x * x.y);
+		}
+	};
+
+	template <typename T, precision P, template <typename, precision> class vecType>
+	struct compute_normalize
+	{
+		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v)
+		{
+			GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'normalize' accepts only floating-point inputs");
+
+			return v * inversesqrt(dot(v, v));
+		}
+	};
 }//namespace detail

 	// length
@ -104,12 +129,7 @@ namespace detail
 	template <typename T, precision P>
 	GLM_FUNC_QUALIFIER tvec3<T, P> cross(tvec3<T, P> const & x, tvec3<T, P> const & y)
 	{
-		GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'cross' accepts only floating-point inputs");
-
-		return tvec3<T, P>(
-			x.y * y.z - y.y * x.z,
-			x.z * y.x - y.z * x.x,
-			x.x * y.y - y.x * x.y);
+		return detail::compute_cross<T, P>::call(x, y);
 	}

 	// normalize
--- a/glm/detail/func_geometric_simd.inl
+++ b/glm/detail/func_geometric_simd.inl
@ -14,6 +14,34 @@ namespace detail
 			return _mm_cvtss_f32(dot0);
 		}
 	};
+
+	template <precision P>
+	struct compute_cross<float, P>
+	{
+		GLM_FUNC_QUALIFIER static tvec3<float, P> call(tvec3<float, P> const & a, tvec3<float, P> const & b)
+		{
+			__m128 const set0 = _mm_set_ps(0.0f, a.z, a.y, a.x);
+			__m128 const set1 = _mm_set_ps(0.0f, b.z, b.y, b.x);
+			__m128 const xpd0 = glm_f32v4_xpd(set0, set1);
+
+			tvec4<float, P> result(uninitialize);
+			result.data = xpd0;
+
+			return tvec3<float, P>(result);
+		}
+	};
+
+	template <precision P>
+	struct compute_normalize<float, P, tvec4>
+	{
+		GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
+		{
+			__m128 const nrm0 = glm_f32v4_nrm(v.data);
+			tvec4<float, P> result(uninitialize);
+			result.data = nrm0;
+			return result;
+		}
+	};
 }//namespace detail
 }//namespace glm