Added SIMD optimization for geometric functions

This commit is contained in:
Christophe Riccio 2016-05-28 20:17:34 +02:00
parent 29fa0f1607
commit fb66c79ca4
2 changed files with 54 additions and 6 deletions

View file

@ -51,6 +51,31 @@ namespace detail
return (tmp.x + tmp.y) + (tmp.z + tmp.w);
}
};
template <typename T, precision P>
struct compute_cross
{
GLM_FUNC_QUALIFIER static tvec3<T, P> call(tvec3<T, P> const & x, tvec3<T, P> const & y)
{
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'cross' accepts only floating-point inputs");
return tvec3<T, P>(
x.y * y.z - y.y * x.z,
x.z * y.x - y.z * x.x,
x.x * y.y - y.x * x.y);
}
};
template <typename T, precision P, template <typename, precision> class vecType>
struct compute_normalize
{
GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v)
{
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'normalize' accepts only floating-point inputs");
return v * inversesqrt(dot(v, v));
}
};
}//namespace detail
// length
@ -104,12 +129,7 @@ namespace detail
template <typename T, precision P>
GLM_FUNC_QUALIFIER tvec3<T, P> cross(tvec3<T, P> const & x, tvec3<T, P> const & y)
{
GLM_STATIC_ASSERT(std::numeric_limits<T>::is_iec559, "'cross' accepts only floating-point inputs");
return tvec3<T, P>(
x.y * y.z - y.y * x.z,
x.z * y.x - y.z * x.x,
x.x * y.y - y.x * x.y);
return detail::compute_cross<T, P>::call(x, y);
}
// normalize

View file

@ -14,6 +14,34 @@ namespace detail
return _mm_cvtss_f32(dot0);
}
};
template <precision P>
struct compute_cross<float, P>
{
GLM_FUNC_QUALIFIER static tvec3<float, P> call(tvec3<float, P> const & a, tvec3<float, P> const & b)
{
__m128 const set0 = _mm_set_ps(0.0f, a.z, a.y, a.x);
__m128 const set1 = _mm_set_ps(0.0f, b.z, b.y, b.x);
__m128 const xpd0 = glm_f32v4_xpd(set0, set1);
tvec4<float, P> result(uninitialize);
result.data = xpd0;
return tvec3<float, P>(result);
}
};
template <precision P>
struct compute_normalize<float, P, tvec4>
{
GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v)
{
__m128 const nrm0 = glm_f32v4_nrm(v.data);
tvec4<float, P> result(uninitialize);
result.data = nrm0;
return result;
}
};
}//namespace detail
}//namespace glm