This commit is contained in:
sharkautarch 2025-02-24 12:30:31 +08:00 committed by GitHub
commit 46e4158304
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 3495 additions and 16 deletions

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,7 @@
#pragma once
#if GLM_SIMD_CONSTEXPR == 1
#include "simd_constexpr/_vectorize.hpp"
#else
namespace glm{
namespace detail
{
@ -228,3 +230,4 @@ namespace detail
};
}//namespace detail
}//namespace glm
#endif

View file

@ -88,7 +88,77 @@ namespace detail
T data[L];
} type;
};
#if ((defined(__clang__) || defined(__GNUC__)) && (GLM_LANG_CXX20_FLAG & GLM_LANG)) && GLM_SIMD_CONSTEXPR
# if GLM_HAS_ALIGNOF
template<length_t L, typename T>
struct storage<L, T, true>
{
typedef struct alignas(L * sizeof(T)) type {
T data[L];
} type;
};
# endif
template <typename T>
static constexpr size_t requiredAlignment = alignof(T);
template<typename T>
struct __attribute__((packed,aligned(requiredAlignment<T>))) storage<2, T, false>
{
using VType = std::conditional_t< std::is_same_v<T, bool>, uint8_t, T>;
typedef VType type __attribute__((aligned( requiredAlignment<T> ), vector_size(2*sizeof(VType))));
};
template<typename T>
struct __attribute__((packed,aligned(requiredAlignment<T>))) storage<1, T, false>
{
using VType = std::conditional_t< std::is_same_v<T, bool>, uint8_t, T>;
typedef VType type __attribute__((aligned( requiredAlignment<T> ),vector_size(sizeof(VType))));
};
template<typename T>
struct storage<2, T, true>
{
using VType = std::conditional_t< std::is_same_v<T, bool>, uint8_t, T>;
typedef VType type __attribute__((aligned(2*sizeof(VType)),vector_size(2*sizeof(VType))));
};
template<typename T>
struct storage<1, T, true>
{
using VType = std::conditional_t< std::is_same_v<T, bool>, uint8_t, T>;
typedef VType type __attribute__((aligned(sizeof(VType)),vector_size(sizeof(VType))));
};
template <typename T>
struct __attribute__((packed,aligned(requiredAlignment<T>))) storage<3, T, false>
{
typedef struct __attribute__((packed,aligned(requiredAlignment<T>))) type {
T data[3];
} type;
};
template <typename T>
struct __attribute__((packed,aligned(requiredAlignment<T>))) storage<4, T, false>
{
using VType = std::conditional_t< std::is_same_v<T, bool>, uint8_t, T>;
typedef VType type __attribute__((aligned( requiredAlignment<T> ), vector_size(4*sizeof(VType))));
};
# if (!(GLM_ARCH & GLM_ARCH_SIMD_BIT))
template<typename T>
struct storage<4, T, true>
{
using VType = std::conditional_t< std::is_same_v<T, bool>, uint8_t, T>;
typedef VType type __attribute__((aligned(4*sizeof(VType)),vector_size(4*sizeof(VType))));
};
template<typename T>
struct storage<3, T, true>
{
using VType = std::conditional_t< std::is_same_v<T, bool>, uint8_t, T>;
typedef VType type __attribute__((aligned(4*sizeof(VType)),vector_size(4*sizeof(VType))));
};
template<>
struct storage<4, bool, true>
{
typedef uint8_t type __attribute__((aligned(4*sizeof(uint8_t)),vector_size(4*sizeof(uint8_t))));
};
# endif
#else
# if GLM_HAS_ALIGNOF
template<length_t L, typename T>
struct storage<L, T, true>
@ -106,6 +176,7 @@ namespace detail
} type;
};
# endif
#endif
# if GLM_ARCH & GLM_ARCH_SSE2_BIT
template<>

View file

@ -294,7 +294,7 @@
// N2235 Generalized Constant Expressions http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2235.pdf
// N3652 Extended Constant Expressions http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2013/n3652.html
#if (GLM_ARCH & GLM_ARCH_SIMD_BIT) // Compiler SIMD intrinsics don't support constexpr...
#if (GLM_ARCH & GLM_ARCH_SIMD_BIT) && GLM_SIMD_CONSTEXPR == 0 // Compiler SIMD intrinsics don't support constexpr...
# define GLM_HAS_CONSTEXPR 0
#elif (GLM_COMPILER & GLM_COMPILER_CLANG)
# define GLM_HAS_CONSTEXPR __has_feature(cxx_relaxed_constexpr)
@ -880,7 +880,7 @@ namespace detail
# define GLM_FORCE_ALIGNED_GENTYPES
#endif
#if GLM_HAS_ALIGNOF && (GLM_LANG & GLM_LANG_CXXMS_FLAG) && (defined(GLM_FORCE_ALIGNED_GENTYPES) || (GLM_CONFIG_SIMD == GLM_ENABLE))
#if (GLM_HAS_ALIGNOF && (GLM_LANG & GLM_LANG_CXXMS_FLAG) && (defined(GLM_FORCE_ALIGNED_GENTYPES) || (GLM_CONFIG_SIMD == GLM_ENABLE))) || GLM_SIMD_CONSTEXPR
# define GLM_CONFIG_ALIGNED_GENTYPES GLM_ENABLE
#else
# define GLM_CONFIG_ALIGNED_GENTYPES GLM_DISABLE

View file

@ -0,0 +1,92 @@
#pragma once
namespace glm{
namespace detail
{
template<template<length_t L, typename T, qualifier Q> class vec, length_t L, typename R, typename T, qualifier Q>
struct functor1
{
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, R, Q> call(R (*Func) (T x), vec<L, T, Q> const& v)
{
vec<L, R, Q> ret{v};
#pragma GCC unroll(4)
for (int i = 0; i < L; i++) {
ret[i] = Func(v[i]);
}
return ret;
}
};
template<template<length_t L, typename T, qualifier Q> class vec, length_t L, typename T, qualifier Q>
struct functor2
{
GLM_FUNC_QUALIFIER static vec<L, T, Q> call(T (*Func) (T x, T y), vec<L, T, Q> a, vec<L, T, Q> const& b)
{
vec<L, T, Q> ret{a};
#pragma GCC unroll(4)
for (int i = 0; i < L; i++) {
ret[i] = Func(a[i], b[i]);
}
return ret;
}
template<typename Fct>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, T, Q> call(Fct Func, vec<L, T, Q> a, vec<L, T, Q> const& b)
{
vec<L, T, Q> ret{a};
#pragma GCC unroll(4)
for (int i = 0; i < L; i++) {
ret[i] = Func(a[i], b[i]);
}
return ret;
}
};
template<template<length_t L, typename T, qualifier Q> class vec, length_t L, typename T, qualifier Q>
struct functor2_vec_sca{
GLM_FUNC_QUALIFIER static vec<L, T, Q> call(T (*Func) (T x, T y), vec<L, T, Q> a, T b)
{
vec<L, T, Q> ret{a};
#pragma GCC unroll(4)
for (int i = 0; i < L; i++) {
ret[i] = Func(a[i], b);
}
return ret;
}
template<class Fct>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, T, Q> call(Fct Func, vec<L, T, Q> a, T b)
{
vec<L, T, Q> ret{a};
#pragma GCC unroll(4)
for (int i = 0; i < L; i++) {
ret[i] = Func(a[i], b);
}
return ret;
}
};
template<length_t L, typename T, qualifier Q>
struct functor2_vec_int {
GLM_FUNC_QUALIFIER static vec<L, int, Q> call(int (*Func) (T x, int y), vec<L, T, Q> const& a, vec<L, int, Q> b)
{
vec<L, int, Q> ret{b};
#pragma GCC unroll(4)
for (int i = 0; i < L; i++) {
ret[i] = Func(a[i], b[i]);
}
return ret;
}
template<class Fct>
GLM_FUNC_QUALIFIER GLM_CONSTEXPR static vec<L, int, Q> call(Fct Func, vec<L, T, Q> const& a, vec<L, int, Q> b)
{
vec<L, int, Q> ret{b};
#pragma GCC unroll(4)
for (int i = 0; i < L; i++) {
ret[i] = Func(a[i], b[i]);
}
return ret;
}
};
}//namespace detail
}//namespace glm

View file

@ -0,0 +1,238 @@
#include <iostream>
#include <type_traits>
namespace glm::detail
{
consteval bool NotEmpty(length_t I, length_t L) { return I <= L; }
struct Empty {};
struct GLM_TRIVIAL RowTwo {
[[no_unique_address]] Empty y; [[no_unique_address]] Empty g; [[no_unique_address]] Empty t;
};
struct GLM_TRIVIAL RowThree {
[[no_unique_address]] Empty z; [[no_unique_address]] Empty b; [[no_unique_address]] Empty p;
};
struct GLM_TRIVIAL RowFour {
[[no_unique_address]] Empty w; [[no_unique_address]] Empty a; [[no_unique_address]] Empty q;
};
template <length_t L, typename T, qualifier Q>
struct ElementCollection;
#ifdef __clang__
template <qualifier Q, typename T>
struct GLM_TRIVIAL ElementCollection<4, T, Q> {
using data_t = typename detail::storage<4, T, detail::is_aligned<Q>::value>::type;
static constexpr T X = -1ll;
union
{
VDataArray<4, T, Q> elementArr;
struct {
union { T x, r, s; };
union { T y, g, t; };
union { T z, b, p; };
union { T w, a, q; };
};
data_t data;
};
template <auto member, auto* baseObj>
static consteval int64_t getOffset() {
using E = ElementCollection<4, T, Q>;
if constexpr (member == &baseObj->X) {
return -1ll;
} else if constexpr (member == &baseObj->x || member == &baseObj->r || member == &baseObj->s) {
return (int64_t)(offsetof(E, x)/sizeof(T));
} else if constexpr (member == &baseObj->y || member == &baseObj->g || member == &baseObj->t) {
return (int64_t)(offsetof(E, y)/sizeof(T));
} else if constexpr (member == &baseObj->z || member == &baseObj->b || member == &baseObj->p) {
return (int64_t)(offsetof(E, z)/sizeof(T));
} else if constexpr (member == &baseObj->w || member == &baseObj->a || member == &baseObj->q) {
return (int64_t)(offsetof(E, w)/sizeof(T));
} else {
static_assert(false);
}
}
GLM_SWIZZLE_GEN_VEC_FROM_VEC4(T, Q)
};
template <qualifier Q, typename T>
struct GLM_TRIVIAL ElementCollection<3, T, Q> : RowFour {
using data_t = typename detail::storage<3, T, detail::is_aligned<Q>::value>::type;
using RowFour::w;
using RowFour::a;
using RowFour::q;
static constexpr length_t data_len = (Q == aligned) ? 4 : 3;
static constexpr T X = -1ll;
union
{
VDataArray<data_len, T, Q> elementArr;
struct {
union { T x, r, s; };
union { T y, g, t; };
union { T z, b, p; };
};
data_t data;
};
template <auto member, auto* baseObj>
static consteval int64_t getOffset() {
using E = ElementCollection<3, T, Q>;
if constexpr (member == &baseObj->X) {
return -1ll;
} else if constexpr (member == &baseObj->x || member == &baseObj->r || member == &baseObj->s) {
return (int64_t)(offsetof(E, x)/sizeof(T));
} else if constexpr (member == &baseObj->y || member == &baseObj->g || member == &baseObj->t) {
return (int64_t)(offsetof(E, y)/sizeof(T));
} else if constexpr (member == &baseObj->z || member == &baseObj->b || member == &baseObj->p) {
return (int64_t)(offsetof(E, z)/sizeof(T));
} else {
static_assert(false);
}
}
GLM_SWIZZLE_GEN_VEC_FROM_VEC3(T, Q)
};
template <qualifier Q, typename T>
struct GLM_TRIVIAL ElementCollection<2, T, Q> : RowThree, RowFour {
using data_t = typename detail::storage<2, T, detail::is_aligned<Q>::value>::type;
using RowThree::z;
using RowThree::b;
using RowThree::p;
using RowFour::w;
using RowFour::a;
using RowFour::q;
static constexpr T X = -1ll;
union
{
VDataArray<2, T, Q> elementArr;
struct {
union { T x, r, s; };
union { T y, g, t; };
};
data_t data;
};
template <auto member, auto* baseObj>
static consteval int64_t getOffset() {
using E = ElementCollection<2, T, Q>;
if constexpr (member == &baseObj->X) {
return -1ll;
} else if constexpr (member == &baseObj->x || member == &baseObj->r || member == &baseObj->s) {
return (int64_t)(offsetof(E, x)/sizeof(T));
} else if constexpr (member == &baseObj->y || member == &baseObj->g || member == &baseObj->t) {
return (int64_t)(offsetof(E, y)/sizeof(T));
} else {
static_assert(false);
}
}
GLM_SWIZZLE_GEN_VEC_FROM_VEC2(T, Q)
};
template <qualifier Q, typename T>
struct GLM_TRIVIAL ElementCollection<1, T, Q> : RowTwo, RowThree, RowFour {
using data_t = typename detail::storage<1, T, detail::is_aligned<Q>::value>::type;
using RowTwo::y;
using RowTwo::g;
using RowTwo::t;
using RowThree::z;
using RowThree::b;
using RowThree::p;
using RowFour::w;
using RowFour::a;
using RowFour::q;
static constexpr T X = -1ll;
union
{
VDataArray<1, T, Q> elementArr;
struct {
union { T x, r, s; };
};
data_t data;
};
};
#else
template <length_t L, typename T, qualifier Q>
struct ElementCollection;
template <qualifier Q, typename T>
struct GLM_TRIVIAL ElementCollection<4, T, Q> {
using data_t = typename detail::storage<4, T, detail::is_aligned<Q>::value>::type;
union
{
VDataArray<4, T, Q> elementArr;
struct {
union { T x, r, s; };
union { T y, g, t; };
union { T z, b, p; };
union { T w, a, q; };
};
data_t data;
};
GLM_SWIZZLE_GEN_VEC_FROM_VEC4(T, Q)
};
template <qualifier Q, typename T>
struct GLM_TRIVIAL ElementCollection<3, T, Q> : RowFour {
using data_t = typename detail::storage<3, T, detail::is_aligned<Q>::value>::type;
using RowFour::w;
using RowFour::a;
using RowFour::q;
static constexpr length_t data_len = (Q == aligned) ? 4 : 3;
union
{
VDataArray<data_len, T, Q> elementArr;
struct {
union { T x, r, s; };
union { T y, g, t; };
union { T z, b, p; };
};
data_t data;
};
GLM_SWIZZLE_GEN_VEC_FROM_VEC3(T, Q)
};
template <qualifier Q, typename T>
struct GLM_TRIVIAL ElementCollection<2, T, Q> : RowThree, RowFour {
using data_t = typename detail::storage<2, T, detail::is_aligned<Q>::value>::type;
using RowThree::z;
using RowThree::b;
using RowThree::p;
using RowFour::w;
using RowFour::a;
using RowFour::q;
union
{
VDataArray<2, T, Q> elementArr;
struct {
union { T x, r, s; };
union { T y, g, t; };
};
data_t data;
};
GLM_SWIZZLE_GEN_VEC_FROM_VEC2(T, Q)
};
template <qualifier Q, typename T>
struct GLM_TRIVIAL ElementCollection<1, T, Q> : RowTwo, RowThree, RowFour {
using data_t = typename detail::storage<1, T, detail::is_aligned<Q>::value>::type;
using RowTwo::y;
using RowTwo::g;
using RowTwo::t;
using RowThree::z;
using RowThree::b;
using RowThree::p;
using RowFour::w;
using RowFour::a;
using RowFour::q;
union
{
VDataArray<1, T, Q> elementArr;
struct {
union { T x, r, s; };
};
data_t data;
};
};
#endif
}

View file

@ -0,0 +1,252 @@
namespace glm::detail
{
template<length_t L, typename T, qualifier Q>
struct SimdHelpers
{
template <typename Tx0, typename... Tx>
struct GetFirstType
{
using FirstTx = Tx0;
};
template <length_t Lx, typename Tx, qualifier Qx>
using GccVec = typename detail::GccVExt<Lx, Tx, Qx>::GccV;
using gcc_vec_t = GccVec<L, T, Q>;
using data_t = typename detail::storage<L, T, detail::is_aligned<Q>::value>::type;
static inline auto __attribute__((always_inline)) gcc_vec_to_data(auto v) {
static constexpr auto size = std::min(sizeof(v), sizeof(data_t));
static constexpr auto biggerSize = std::max(sizeof(v), sizeof(data_t));
if constexpr (size == biggerSize) {
if constexpr (L != 3 || (detail::is_aligned<Q>::value)) {
return reinterpret_cast<data_t>(v);
} else {
data_t d;
std::memcpy(&d, &v, size);
return d;
}
} else {
data_t d;
std::memcpy(&d, &v, size);
return d;
}
}
static inline auto __attribute__((always_inline)) simd_ctor_scalar(arithmetic auto scalar) {
gcc_vec_t v = ( (T)scalar ) - gcc_vec_t{};
using Tx = decltype(scalar);
scalar.Tx::~Tx();
return gcc_vec_to_data(v);
}
template <typename Tx, qualifier Qx>
static inline auto __attribute__((always_inline)) fetch_vec3_as_vec4(::glm::vec<3, Tx, Qx> const& v) {
using OtherVec = GccVec<3, Tx, Qx>;
#ifdef __clang__
//On clang, simply doing memcpy results in better overall codegen
//Also, this allows clang to avoid spilling registers to the stack, when this function is run on local lvalues
//The local lvalues thing only matters for clang, because gcc seems to always emit memory load/stores when going from packed vec3 -> vec4/aligned_vec3 :(
OtherVec o{};
std::memcpy(&o, &v, sizeof(v));
return o;
#else
typedef Tx v2_packed __attribute__((aligned(alignof(Tx)),vector_size(2*sizeof(Tx))));
struct __attribute__((packed,aligned(alignof(Tx)))) padded {
Tx data0;
v2_packed v2;
};
auto const& reinterpreted = reinterpret_cast<padded const&>(v);
OtherVec initialPart{};
initialPart[0] = v[0];
OtherVec fetched = __builtin_shufflevector(reinterpreted.v2, reinterpreted.v2, -1, -1, 0, 1);
initialPart = __builtin_shufflevector(initialPart, fetched, 0, 5, 6, -1 );
return initialPart;
#endif
}
template <typename Tx, qualifier Qx>
static inline auto __attribute__((always_inline)) fetch_vec3_as_vec4(::glm::vec<3, Tx, Qx>&& v) {
union M {
gcc_vec_t ourType;
::glm::vec<3, Tx, Qx> other;
};
M m {.ourType{}};
m.other = v;
return m.ourType;
}
template <length_t Lx, typename Tx, qualifier Qx> requires (Lx == L)
static inline auto __attribute__((always_inline)) simd_ctor_same_size_conversions(auto&& v) {
using OtherVec = GccVec<Lx, Tx, Qx>;
static_assert(sizeof(v) == sizeof(data_t));
if constexpr (std::is_same_v<::glm::vec<Lx, Tx, Qx>, ::glm::vec<L,T,Q>>) {
return v.data;
} else if constexpr (L == 3 && !BIsAlignedQ<Q>()) {
if constexpr (std::is_same_v<T, Tx>) {
return v.data;
} else {
using Vec4 = GccVec<4, T, Qx>;
gcc_vec_t converted = __builtin_convertvector(fetch_vec3_as_vec4(v), Vec4);
return gcc_vec_to_data(converted);
}
} else {
gcc_vec_t converted = __builtin_convertvector(v.data, gcc_vec_t);
return gcc_vec_to_data(converted);
}
}
template <length_t Lx, typename Tx, qualifier Qx> requires (Lx == L)
static inline auto __attribute__((always_inline)) simd_ctor(::glm::vec<Lx, Tx, Qx>&& v)
{
using OtherVec = GccVec<Lx, Tx, Qx>;
if constexpr (sizeof(v) == sizeof(data_t)) {
return simd_ctor_same_size_conversions<Lx, Tx, Qx>(v);
} else if constexpr (BIsAlignedQ<Q>() && !BIsAlignedQ<Qx>() && Lx == 3) {
auto o = fetch_vec3_as_vec4<Tx,Qx>(v);
if constexpr (std::is_same_v<T, Tx>) {
return gcc_vec_to_data(o);
} else {
gcc_vec_t converted = __builtin_convertvector(o, gcc_vec_t);
return gcc_vec_to_data(converted);
}
} else {
OtherVec o;
static constexpr auto size = std::min(sizeof(v.data), sizeof(o));
std::memcpy(&o, &(v.data), size);
//using o_vec_t = decltype(v);
//v.o_vec_t::~o_vec_t();
gcc_vec_t converted = __builtin_convertvector(o, gcc_vec_t);
return gcc_vec_to_data(converted);
}
}
template <length_t Lx, typename Tx, qualifier Qx> requires (Lx == L)
static inline auto __attribute__((always_inline)) simd_ctor(::glm::vec<Lx, Tx, Qx> const& v)
{
using OtherVec = GccVec<Lx, Tx, Qx>;
if constexpr (sizeof(v) == sizeof(data_t)) {
return simd_ctor_same_size_conversions<Lx, Tx, Qx>(v);
} else if constexpr (BIsAlignedQ<Q>() && !BIsAlignedQ<Qx>() && Lx == 3) {
auto o = fetch_vec3_as_vec4<Tx,Qx>(v);
if constexpr (std::is_same_v<T, Tx>) {
return gcc_vec_to_data(o);
} else {
gcc_vec_t converted = __builtin_convertvector(o, gcc_vec_t);
return gcc_vec_to_data(converted);
}
} else {
OtherVec o;
static constexpr auto size = std::min(sizeof(v.data), sizeof(o));
std::memcpy(&o, &(v.data), size);
//using o_vec_t = decltype(v);
//v.o_vec_t::~o_vec_t();
gcc_vec_t converted = __builtin_convertvector(o, gcc_vec_t);
return gcc_vec_to_data(converted);
}
}
template <length_t Lx, typename Tx, qualifier Qx> requires (Lx != L)
static inline auto __attribute__((always_inline)) simd_ctor(::glm::vec<Lx, Tx, Qx> v)
{
using OtherVec = GccVec<Lx, Tx, Qx>;
if constexpr ( ((Lx != 3 || L == 3) && (!BIsAlignedQ<Q>() || !BIsAlignedQ<Qx>()))
&& L != 3 && L > 0 && L <= 4 ) {
static constexpr int64_t posOne = 0;
static constexpr int64_t posTwo = Lx > 1 ? 1 : -1;
static constexpr int64_t posThree = Lx > 2 ? 2 : -1;
static constexpr int64_t posFour = Lx > 3 ? 3 : -1;
if constexpr (L == 4) {
OtherVec o = v.data;
auto oExt = __builtin_shufflevector(o, o, posOne, posTwo, posThree, posFour);
if constexpr (std::is_same_v<T, Tx>) {
return gcc_vec_to_data(oExt);
} else {
return gcc_vec_to_data(__builtin_convertvector(oExt, gcc_vec_t));
}
} else if constexpr (L == 2) {
OtherVec o = v.data;
auto oExt = __builtin_shufflevector(o, o, posOne, posTwo);
if constexpr (std::is_same_v<T, Tx>) {
return gcc_vec_to_data(oExt);
} else {
return gcc_vec_to_data(__builtin_convertvector(oExt, gcc_vec_t));
}
} else if constexpr (L == 1) {
OtherVec o = v.data;
auto oExt = __builtin_shufflevector(o, o, posOne);
if constexpr (std::is_same_v<T, Tx>) {
return gcc_vec_to_data(oExt);
} else {
return gcc_vec_to_data(__builtin_convertvector(oExt, gcc_vec_t));
}
} else {
static_assert(false, "unreachable");
}
} else {
using OurSizeTheirType = GccVec<L, Tx, Qx>;
static constexpr auto size = std::min(sizeof(OurSizeTheirType), sizeof(v.data));
OurSizeTheirType oExpanded;
std::memcpy(&oExpanded, &(v.data), size);
using o_vec_t = decltype(v);
v.o_vec_t::~o_vec_t();
gcc_vec_t converted = __builtin_convertvector(oExpanded, gcc_vec_t);
return gcc_vec_to_data(converted);
}
}
template<arithmetic... A>
static consteval bool isLengthOfVector() {
return sizeof...(A) == L;
}
template <arithmetic... A>
static inline auto __attribute__((always_inline)) simd_ctor_multi_scalars(A... scalars) requires ( isLengthOfVector<A...>() && SameTypes<A...>())
{
using OtherType = GetFirstType<A...>::FirstTx;
using other_vec_t = GccVec<L, OtherType, Q>;
other_vec_t o {scalars...};
if constexpr (std::is_same_v<T, OtherType>) {
return gcc_vec_to_data(o);
} else {
return gcc_vec_to_data(__builtin_convertvector(o, gcc_vec_t));
}
}
template <arithmetic... A>
static inline auto __attribute__((always_inline)) simd_ctor_multi_scalars(A... scalars) requires ( isLengthOfVector<A...>() && std::is_floating_point_v<T> && AllIntegralTypes<A...>() && !SameTypes<A...>())
{
using OtherType = GetCommonType<A...>::Type;
using other_vec_t = GccVec<L, OtherType, Q>;
other_vec_t o {scalars...};
if constexpr (std::is_same_v<T, OtherType>) {
return gcc_vec_to_data(o);
} else {
return gcc_vec_to_data(__builtin_convertvector(o, gcc_vec_t));
}
}
template <arithmetic... A>
static inline auto __attribute__((always_inline)) simd_ctor_multi_scalars(A... scalars) requires ( isLengthOfVector<A...>() && std::is_integral_v<T> && AllFloatTypes<A...>() && !SameTypes<A...>())
{
using OtherType = GetCommonType<A...>::Type;
using other_vec_t = GccVec<L, OtherType, Q>;
other_vec_t o {scalars...};
if constexpr (std::is_same_v<T, OtherType>) {
return gcc_vec_to_data(o);
} else {
return gcc_vec_to_data(__builtin_convertvector(o, gcc_vec_t));
}
}
template <arithmetic... A>
static inline auto __attribute__((always_inline)) simd_ctor_multi_scalars(A... scalars) requires ( isLengthOfVector<A...>() && !SameTypes<A...>() && SameArithmeticTypes<A...>())
{
gcc_vec_t v;
std::array<T, sizeof...(scalars)> pack{static_cast<T>(scalars)...};
for (int i = 0; i != sizeof...(scalars); i++ ) {
v[i] = pack[i];
pack[i].T::~T();
}
return gcc_vec_to_data(v);
}
};
}

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
/// @ref core
/// @file glm/detail/type_vec1.hpp
#if GLM_SIMD_CONSTEXPR == 0
#pragma once
#include "qualifier.hpp"
@ -306,3 +306,4 @@ namespace glm
#ifndef GLM_EXTERNAL_TEMPLATE
#include "type_vec1.inl"
#endif//GLM_EXTERNAL_TEMPLATE
#endif

View file

@ -1,9 +1,9 @@
/// @ref core
/// @file glm/detail/type_vec2.hpp
#pragma once
#include "qualifier.hpp"
#if GLM_SIMD_CONSTEXPR == 0
#if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR
# include "_swizzle.hpp"
#elif GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_FUNCTION
@ -404,3 +404,4 @@ namespace glm
#ifndef GLM_EXTERNAL_TEMPLATE
#include "type_vec2.inl"
#endif//GLM_EXTERNAL_TEMPLATE
#endif

View file

@ -1,9 +1,9 @@
/// @ref core
/// @file glm/detail/type_vec3.hpp
#pragma once
#include "qualifier.hpp"
#if GLM_SIMD_CONSTEXPR == 0
#if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR
# include "_swizzle.hpp"
#elif GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_FUNCTION
@ -445,3 +445,5 @@ namespace glm
#ifndef GLM_EXTERNAL_TEMPLATE
#include "type_vec3.inl"
#endif//GLM_EXTERNAL_TEMPLATE
#endif

View file

@ -1,9 +1,9 @@
/// @ref core
/// @file glm/detail/type_vec4.hpp
#pragma once
#include "qualifier.hpp"
#if GLM_SIMD_CONSTEXPR == 0
#if GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_OPERATOR
# include "_swizzle.hpp"
#elif GLM_CONFIG_SWIZZLE == GLM_SWIZZLE_FUNCTION
@ -512,3 +512,5 @@ namespace glm
#ifndef GLM_EXTERNAL_TEMPLATE
#include "type_vec4.inl"
#endif//GLM_EXTERNAL_TEMPLATE
#endif

View file

@ -1,5 +1,5 @@
/// @ref core
#if GLM_SIMD_CONSTEXPR == 0
#include "compute_vector_relational.hpp"
#include "compute_vector_decl.hpp"
@ -1128,3 +1128,5 @@ namespace glm {
}
#endif
#endif

View file

@ -101,6 +101,14 @@
/// included a specific file.
///
#ifndef GLM_SIMD_CONSTEXPR
#define GLM_SIMD_CONSTEXPR 0
#endif
#if GLM_SIMD_CONSTEXPR == 1
# define GLM_FORCE_INTRINSICS 1
#endif
#include "detail/_fixes.hpp"
#include "detail/setup.hpp"
@ -114,9 +122,14 @@
#include <cassert>
#include "fwd.hpp"
#include "vec2.hpp"
#include "vec3.hpp"
#include "vec4.hpp"
#if GLM_SIMD_CONSTEXPR == 0
# include "vec2.hpp"
# include "vec3.hpp"
# include "vec4.hpp"
#else
# include "simd_constexpr/vec.hpp"
#endif
#include "mat2x2.hpp"
#include "mat2x3.hpp"
#include "mat2x4.hpp"

View file

@ -15,9 +15,14 @@
// Dependencies
#include "detail/qualifier.hpp"
#include "detail/setup.hpp"
#include "vec2.hpp"
#include "vec3.hpp"
#include "vec4.hpp"
#if GLM_SIMD_CONSTEXPR == 0
# include "vec2.hpp"
# include "vec3.hpp"
# include "vec4.hpp"
#else
# include"simd_constexpr/vec.hpp"
#endif
#include "mat2x2.hpp"
#include "mat2x3.hpp"
#include "mat2x4.hpp"

View file

@ -373,7 +373,7 @@
#elif defined(GLM_FORCE_SSE)
# define GLM_ARCH (GLM_ARCH_SSE)
# define GLM_FORCE_INTRINSICS
#elif defined(GLM_FORCE_INTRINSICS) && !defined(GLM_FORCE_XYZW_ONLY)
#elif ( (defined(GLM_FORCE_INTRINSICS) && !defined(GLM_FORCE_XYZW_ONLY)) || GLM_SIMD_CONSTEXPR == 1 )
# if defined(__AVX2__)
# define GLM_ARCH (GLM_ARCH_AVX2)
# elif defined(__AVX__)

View file

@ -0,0 +1,27 @@
/// @ref core
/// @file glm/simd_constexpr/vec4.hpp
#pragma once
namespace glm
{
typedef vec<1, float, defaultp> vec1;
typedef vec<2, float, defaultp> vec2;
typedef vec<3, float, defaultp> vec3;
typedef vec<4, float, defaultp> vec4;
typedef vec<1, int, defaultp> ivec1;
typedef vec<2, int, defaultp> ivec2;
typedef vec<3, int, defaultp> ivec3;
typedef vec<4, int, defaultp> ivec4;
typedef vec<1, unsigned int, defaultp> uvec1;
typedef vec<2, unsigned int, defaultp> uvec2;
typedef vec<3, unsigned int, defaultp> uvec3;
typedef vec<4, unsigned int, defaultp> uvec4;
typedef vec<1, bool, defaultp> bvec1;
typedef vec<2, bool, defaultp> bvec2;
typedef vec<3, bool, defaultp> bvec3;
typedef vec<4, bool, defaultp> bvec4;
}
#include "../detail/simd_constexpr/vec.hpp"

View file

@ -0,0 +1,51 @@
#define GLM_SIMD_CONSTEXPR 1
#include <cmath>
#include <glm/glm.hpp>
#include <glm/vec4.hpp>
#include <cstdio>
#include <ctime>
#include <vector>
#include <cstdlib>
#define GLM_FORCE_ALIGNED_GENTYPES 1
#include <glm/detail/qualifier.hpp>
#if GLM_COMPILER & GLM_COMPILER_CLANG
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wglobal-constructors"
# pragma clang diagnostic ignored "-Wunused-variable"
#endif
int main()
{
#if defined(__x86_64__) || defined(__aarch64__)
static_assert(GLM_ARCH & GLM_ARCH_SIMD_BIT);
static_assert(GLM_CONFIG_SIMD);
static_assert(GLM_ARCH_SIMD_BIT);
#endif
using avec4 = glm::vec<4, float, glm::aligned_highp>;
static constexpr avec4 v{1.0f};//, 1.1f, 1.2f, 1.0f};
avec4 v1{static_cast<float>(rand() % 2)};
avec4 v2{static_cast<float>(rand() % 2)};//, static_cast<float>(rand() % 255), static_cast<float>(rand() % 255), static_cast<float>(rand() % 255)};
static constexpr avec4 v3 = avec4{1.5f,2.0f,3.0f,4.0f};
static constexpr avec4 v4 = v3;
printf("v1 = %f %f %f %f\n", v1[0], v1[1], v1[2], v1[3]);
printf("v2 = %f %f %f %f\n", v2[0], v2[1], v2[2], v2[3]);
v1.x;
avec4 vfin = glm::max(v1, v2) + v3;
static_assert(sizeof(vfin)>0);
double w = v3.w;
printf("vfin = %f %f %f %f\n", vfin[0], vfin[1], vfin[2], vfin[3]);
printf("v3 = %f %f %f %f\n", v3[0], v3[1], v3.z, w);
auto v5 = v3.xyzw();
printf("v3.xyzw() = %f %f %f %f\n", v5.x, v5.y, v5.z, v5.w);
#ifdef __clang__
auto v6 = v3.Xyzw();
printf("v3.Xyzw() = %f %f %f %f\n", -1.0, v6.y, v6.z, v6.w);
#endif
auto v7 = v3.blend<{0, 1, 0, 1}>(vfin);
printf("v3.blend<glm::bvec4{0, 1, 0, 1}>(vfin) = %f %f %f %f\n", v7.x, v7.y, v7.z, v7.w);
static constexpr auto v8 = v + v4;
printf("static constexpr auto v8 = v + v4 = %f %f %f %f\n", v8.x, v8.y, v8.z, v8.w);
return 0;
}