From a2403634daa51dcbb5ab5e6f07a696a334099534 Mon Sep 17 00:00:00 2001 From: Amaury Le Leyzour Date: Tue, 12 Nov 2019 14:55:24 -0800 Subject: [PATCH 1/7] glm-aarch64: Add neon's mat4 inverse --- glm/detail/func_matrix_simd.inl | 140 ++++++++++++++++++++++++++--- glm/simd/neon.h | 155 ++++++++++++++++++++++++++++++++ glm/simd/platform.h | 2 +- 3 files changed, 285 insertions(+), 12 deletions(-) create mode 100644 glm/simd/neon.h diff --git a/glm/detail/func_matrix_simd.inl b/glm/detail/func_matrix_simd.inl index d052bf12..f67ac66a 100644 --- a/glm/detail/func_matrix_simd.inl +++ b/glm/detail/func_matrix_simd.inl @@ -103,17 +103,10 @@ namespace glm { auto MulRow = [&](int l) { float32x4_t const SrcA = m2[l].data; -#if GLM_ARCH & GLM_ARCH_ARMV8_BIT - float32x4_t r= vmulq_laneq_f32(m1[0].data, SrcA, 0); - r = vaddq_f32(r, vmulq_laneq_f32(m1[1].data, SrcA, 1)); - r = vaddq_f32(r, vmulq_laneq_f32(m1[2].data, SrcA, 2)); - r = vaddq_f32(r, vmulq_laneq_f32(m1[3].data, SrcA, 3)); -#else - float32x4_t r= vmulq_f32(m1[0].data, vdupq_n_f32(vgetq_lane_f32(SrcA, 0))); - r = vaddq_f32(r, vmulq_f32(m1[1].data, vdupq_n_f32(vgetq_lane_f32(SrcA, 1)))); - r = vaddq_f32(r, vmulq_f32(m1[2].data, vdupq_n_f32(vgetq_lane_f32(SrcA, 2)))); - r = vaddq_f32(r, vmulq_f32(m1[3].data, vdupq_n_f32(vgetq_lane_f32(SrcA, 3)))); -#endif + float32x4_t r = neon::mul_lane(m1[0].data, SrcA, 0); + r = neon::madd_lane(r, m1[1].data, SrcA, 1); + r = neon::madd_lane(r, m1[2].data, SrcA, 2); + r = neon::madd_lane(r, m1[3].data, SrcA, 3); return r; }; @@ -127,5 +120,130 @@ namespace glm { return Result; } #endif // CXX11 + + template + struct detail::compute_inverse<4, 4, float, Q, true> + { + GLM_FUNC_QUALIFIER static mat<4, 4, float, Q> call(mat<4, 4, float, Q> const& m) + { + float32x4_t const& m0 = m[0].data; + float32x4_t const& m1 = m[1].data; + float32x4_t const& m2 = m[2].data; + float32x4_t const& m3 = m[3].data; + + // m[2][2] * m[3][3] - m[3][2] * m[2][3]; + // m[2][2] * m[3][3] - m[3][2] * m[2][3]; + // m[1][2] * m[3][3] - m[3][2] * m[1][3]; + // m[1][2] * m[2][3] - m[2][2] * m[1][3]; + + float32x4_t Fac0; + { + float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 2), neon::dup_lane(m1, 2)); + float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 3), 3, m2, 3); + float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 2), 3, m2, 2); + float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 3), neon::dup_lane(m1, 3)); + Fac0 = w0 * w1 - w2 * w3; + } + + // m[2][1] * m[3][3] - m[3][1] * m[2][3]; + // m[2][1] * m[3][3] - m[3][1] * m[2][3]; + // m[1][1] * m[3][3] - m[3][1] * m[1][3]; + // m[1][1] * m[2][3] - m[2][1] * m[1][3]; + + float32x4_t Fac1; + { + float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 1), neon::dup_lane(m1, 1)); + float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 3), 3, m2, 3); + float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 1), 3, m2, 1); + float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 3), neon::dup_lane(m1, 3)); + Fac1 = w0 * w1 - w2 * w3; + } + + // m[2][1] * m[3][2] - m[3][1] * m[2][2]; + // m[2][1] * m[3][2] - m[3][1] * m[2][2]; + // m[1][1] * m[3][2] - m[3][1] * m[1][2]; + // m[1][1] * m[2][2] - m[2][1] * m[1][2]; + + float32x4_t Fac2; + { + float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 1), neon::dup_lane(m1, 1)); + float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 2), 3, m2, 2); + float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 1), 3, m2, 1); + float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 2), neon::dup_lane(m1, 2)); + Fac2 = w0 * w1 - w2 * w3; + } + + // m[2][0] * m[3][3] - m[3][0] * m[2][3]; + // m[2][0] * m[3][3] - m[3][0] * m[2][3]; + // m[1][0] * m[3][3] - m[3][0] * m[1][3]; + // m[1][0] * m[2][3] - m[2][0] * m[1][3]; + + float32x4_t Fac3; + { + float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 0), neon::dup_lane(m1, 0)); + float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 3), 3, m2, 3); + float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 0), 3, m2, 0); + float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 3), neon::dup_lane(m1, 3)); + Fac3 = w0 * w1 - w2 * w3; + } + + // m[2][0] * m[3][2] - m[3][0] * m[2][2]; + // m[2][0] * m[3][2] - m[3][0] * m[2][2]; + // m[1][0] * m[3][2] - m[3][0] * m[1][2]; + // m[1][0] * m[2][2] - m[2][0] * m[1][2]; + + float32x4_t Fac4; + { + float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 0), neon::dup_lane(m1, 0)); + float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 2), 3, m2, 2); + float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 0), 3, m2, 0); + float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 2), neon::dup_lane(m1, 2)); + Fac4 = w0 * w1 - w2 * w3; + } + + // m[2][0] * m[3][1] - m[3][0] * m[2][1]; + // m[2][0] * m[3][1] - m[3][0] * m[2][1]; + // m[1][0] * m[3][1] - m[3][0] * m[1][1]; + // m[1][0] * m[2][1] - m[2][0] * m[1][1]; + + float32x4_t Fac5; + { + float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 0), neon::dup_lane(m1, 0)); + float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 1), 3, m2, 1); + float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 0), 3, m2, 0); + float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 1), neon::dup_lane(m1, 1)); + Fac5 = w0 * w1 - w2 * w3; + } + + float32x4_t Vec0 = neon::copy_lane(neon::dupq_lane(m0, 0), 0, m1, 0); // (m[1][0], m[0][0], m[0][0], m[0][0]); + float32x4_t Vec1 = neon::copy_lane(neon::dupq_lane(m0, 1), 0, m1, 1); // (m[1][1], m[0][1], m[0][1], m[0][1]); + float32x4_t Vec2 = neon::copy_lane(neon::dupq_lane(m0, 2), 0, m1, 2); // (m[1][2], m[0][2], m[0][2], m[0][2]); + float32x4_t Vec3 = neon::copy_lane(neon::dupq_lane(m0, 3), 0, m1, 3); // (m[1][3], m[0][3], m[0][3], m[0][3]); + + float32x4_t Inv0 = Vec1 * Fac0 - Vec2 * Fac1 + Vec3 * Fac2; + float32x4_t Inv1 = Vec0 * Fac0 - Vec2 * Fac3 + Vec3 * Fac4; + float32x4_t Inv2 = Vec0 * Fac1 - Vec1 * Fac3 + Vec3 * Fac5; + float32x4_t Inv3 = Vec0 * Fac2 - Vec1 * Fac4 + Vec2 * Fac5; + + float32x4_t r0 = float32x4_t{-1, +1, -1, +1} * Inv0; + float32x4_t r1 = float32x4_t{+1, -1, +1, -1} * Inv1; + float32x4_t r2 = float32x4_t{-1, +1, -1, +1} * Inv2; + float32x4_t r3 = float32x4_t{+1, -1, +1, -1} * Inv3; + + float32x4_t det = neon::mul_lane(r0, m0, 0); + det = neon::madd_lane(det, r1, m0, 1); + det = neon::madd_lane(det, r2, m0, 2); + det = neon::madd_lane(det, r3, m0, 3); + + float32x4_t rdet = vdupq_n_f32(1 / vgetq_lane_f32(det, 0)); + + mat<4, 4, float, Q> r; + r[0].data = vmulq_f32(r0, rdet); + r[1].data = vmulq_f32(r1, rdet); + r[2].data = vmulq_f32(r2, rdet); + r[3].data = vmulq_f32(r3, rdet); + return r; + } + }; }//namespace glm #endif diff --git a/glm/simd/neon.h b/glm/simd/neon.h new file mode 100644 index 00000000..51a90c7c --- /dev/null +++ b/glm/simd/neon.h @@ -0,0 +1,155 @@ +/// @ref simd_neon +/// @file glm/simd/neon.h + +#pragma once + +#if GLM_ARCH & GLM_ARCH_NEON_BIT +#include + +namespace glm { + namespace neon { + static float32x4_t dupq_lane(float32x4_t vsrc, int lane) { + switch(lane) { +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT + case 0: return vdupq_laneq_f32(vsrc, 0); + case 1: return vdupq_laneq_f32(vsrc, 1); + case 2: return vdupq_laneq_f32(vsrc, 2); + case 3: return vdupq_laneq_f32(vsrc, 3); +#else + case 0: return vdupq_n_f32(vgetq_lane_f32(vsrc, 0)); + case 1: return vdupq_n_f32(vgetq_lane_f32(vsrc, 1)); + case 2: return vdupq_n_f32(vgetq_lane_f32(vsrc, 2)); + case 3: return vdupq_n_f32(vgetq_lane_f32(vsrc, 3)); +#endif + } + assert(!"Unreachable code executed!"); + return vdupq_n_f32(0.0f); + } + + static float32x2_t dup_lane(float32x4_t vsrc, int lane) { + switch(lane) { +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT + case 0: return vdup_laneq_f32(vsrc, 0); + case 1: return vdup_laneq_f32(vsrc, 1); + case 2: return vdup_laneq_f32(vsrc, 2); + case 3: return vdup_laneq_f32(vsrc, 3); +#else + case 0: return vdup_n_f32(vgetq_lane_f32(vsrc, 0)); + case 1: return vdup_n_f32(vgetq_lane_f32(vsrc, 1)); + case 2: return vdup_n_f32(vgetq_lane_f32(vsrc, 2)); + case 3: return vdup_n_f32(vgetq_lane_f32(vsrc, 3)); +#endif + } + assert(!"Unreachable code executed!"); + return vdup_n_f32(0.0f); + } + + static float32x4_t copy_lane(float32x4_t vdst, int dlane, float32x4_t vsrc, int slane) { +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT + switch(dlane) { + case 0: + switch(slane) { + case 0: return vcopyq_laneq_f32(vdst, 0, vsrc, 0); + case 1: return vcopyq_laneq_f32(vdst, 0, vsrc, 1); + case 2: return vcopyq_laneq_f32(vdst, 0, vsrc, 2); + case 3: return vcopyq_laneq_f32(vdst, 0, vsrc, 3); + } + assert(!"Unreachable code executed!"); + case 1: + switch(slane) { + case 0: return vcopyq_laneq_f32(vdst, 1, vsrc, 0); + case 1: return vcopyq_laneq_f32(vdst, 1, vsrc, 1); + case 2: return vcopyq_laneq_f32(vdst, 1, vsrc, 2); + case 3: return vcopyq_laneq_f32(vdst, 1, vsrc, 3); + } + assert(!"Unreachable code executed!"); + case 2: + switch(slane) { + case 0: return vcopyq_laneq_f32(vdst, 2, vsrc, 0); + case 1: return vcopyq_laneq_f32(vdst, 2, vsrc, 1); + case 2: return vcopyq_laneq_f32(vdst, 2, vsrc, 2); + case 3: return vcopyq_laneq_f32(vdst, 2, vsrc, 3); + } + assert(!"Unreachable code executed!"); + case 3: + switch(slane) { + case 0: return vcopyq_laneq_f32(vdst, 3, vsrc, 0); + case 1: return vcopyq_laneq_f32(vdst, 3, vsrc, 1); + case 2: return vcopyq_laneq_f32(vdst, 3, vsrc, 2); + case 3: return vcopyq_laneq_f32(vdst, 3, vsrc, 3); + } + assert(!"Unreachable code executed!"); + } +#else + + float l; + switch(slane) { + case 0: l = vgetq_lane_f32(vsrc, 0); break; + case 1: l = vgetq_lane_f32(vsrc, 1); break; + case 2: l = vgetq_lane_f32(vsrc, 2); break; + case 3: l = vgetq_lane_f32(vsrc, 3); break; + default: + assert(!"Unreachable code executed!"); + } + switch(dlane) { + case 0: return vsetq_lane_f32(l, vdst, 0); + case 1: return vsetq_lane_f32(l, vdst, 1); + case 2: return vsetq_lane_f32(l, vdst, 2); + case 3: return vsetq_lane_f32(l, vdst, 3); + } +#endif + assert(!"Unreachable code executed!"); + return vdupq_n_f32(0.0f); + } + + static float32x4_t mul_lane(float32x4_t v, float32x4_t vlane, int lane) { +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT + switch(lane) { + case 0: return vmulq_laneq_f32(v, vlane, 0); break; + case 1: return vmulq_laneq_f32(v, vlane, 1); break; + case 2: return vmulq_laneq_f32(v, vlane, 2); break; + case 3: return vmulq_laneq_f32(v, vlane, 3); break; + default: + assert(!"Unreachable code executed!"); + } + assert(!"Unreachable code executed!"); + return vdupq_n_f32(0.0f); +#else + return vmulq_f32(v, dupq_lane(vlane, lane)); +#endif + } + + static float32x4_t madd_lane(float32x4_t acc, float32x4_t v, float32x4_t vlane, int lane) { +#if GLM_ARCH & GLM_ARCH_ARMV8_BIT +#ifdef GLM_CONFIG_FORCE_FMA +# define FMADD_LANE(acc, x, y, L) do { asm volatile ("fmla %0.4s, %1.4s, %2.4s" : "+w"(acc) : "w"(x), "w"(dup_lane(y, L))); } while(0) +#else +# define FMADD_LANE(acc, x, y, L) do { acc = vmlaq_laneq_f32(acc, x, y, L); } while(0) +#endif + + switch(lane) { + case 0: + FMADD_LANE(acc, v, vlane, 0); + return acc; + case 1: + FMADD_LANE(acc, v, vlane, 1); + return acc; + case 2: + FMADD_LANE(acc, v, vlane, 2); + return acc; + case 3: + FMADD_LANE(acc, v, vlane, 3); + return acc; + default: + assert(!"Unreachable code executed!"); + } + assert(!"Unreachable code executed!"); + return vdupq_n_f32(0.0f); +# undef FMADD_LANE +#else + return vaddq_f32(acc, vmulq_f32(v, dupq_lane(vlane, lane))); +#endif + } + } //namespace neon +} // namespace glm +#endif // GLM_ARCH & GLM_ARCH_NEON_BIT diff --git a/glm/simd/platform.h b/glm/simd/platform.h index 24cb411c..ad25cc15 100644 --- a/glm/simd/platform.h +++ b/glm/simd/platform.h @@ -364,7 +364,7 @@ #elif GLM_ARCH & GLM_ARCH_SSE2_BIT # include #elif GLM_ARCH & GLM_ARCH_NEON_BIT -# include +# include "neon.h" #endif//GLM_ARCH #if GLM_ARCH & GLM_ARCH_SSE2_BIT From 77d797bd54c42473bc3c5e06ce7ea2abbbb3b331 Mon Sep 17 00:00:00 2001 From: Amaury Le Leyzour Date: Tue, 12 Nov 2019 15:49:26 -0800 Subject: [PATCH 2/7] respect project's coding style --- glm/simd/neon.h | 78 ++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/glm/simd/neon.h b/glm/simd/neon.h index 51a90c7c..6c38b06c 100644 --- a/glm/simd/neon.h +++ b/glm/simd/neon.h @@ -47,39 +47,39 @@ namespace glm { static float32x4_t copy_lane(float32x4_t vdst, int dlane, float32x4_t vsrc, int slane) { #if GLM_ARCH & GLM_ARCH_ARMV8_BIT switch(dlane) { - case 0: + case 0: switch(slane) { - case 0: return vcopyq_laneq_f32(vdst, 0, vsrc, 0); - case 1: return vcopyq_laneq_f32(vdst, 0, vsrc, 1); - case 2: return vcopyq_laneq_f32(vdst, 0, vsrc, 2); - case 3: return vcopyq_laneq_f32(vdst, 0, vsrc, 3); - } - assert(!"Unreachable code executed!"); - case 1: + case 0: return vcopyq_laneq_f32(vdst, 0, vsrc, 0); + case 1: return vcopyq_laneq_f32(vdst, 0, vsrc, 1); + case 2: return vcopyq_laneq_f32(vdst, 0, vsrc, 2); + case 3: return vcopyq_laneq_f32(vdst, 0, vsrc, 3); + } + assert(!"Unreachable code executed!"); + case 1: switch(slane) { - case 0: return vcopyq_laneq_f32(vdst, 1, vsrc, 0); - case 1: return vcopyq_laneq_f32(vdst, 1, vsrc, 1); - case 2: return vcopyq_laneq_f32(vdst, 1, vsrc, 2); - case 3: return vcopyq_laneq_f32(vdst, 1, vsrc, 3); - } - assert(!"Unreachable code executed!"); - case 2: + case 0: return vcopyq_laneq_f32(vdst, 1, vsrc, 0); + case 1: return vcopyq_laneq_f32(vdst, 1, vsrc, 1); + case 2: return vcopyq_laneq_f32(vdst, 1, vsrc, 2); + case 3: return vcopyq_laneq_f32(vdst, 1, vsrc, 3); + } + assert(!"Unreachable code executed!"); + case 2: switch(slane) { - case 0: return vcopyq_laneq_f32(vdst, 2, vsrc, 0); - case 1: return vcopyq_laneq_f32(vdst, 2, vsrc, 1); - case 2: return vcopyq_laneq_f32(vdst, 2, vsrc, 2); - case 3: return vcopyq_laneq_f32(vdst, 2, vsrc, 3); - } - assert(!"Unreachable code executed!"); - case 3: + case 0: return vcopyq_laneq_f32(vdst, 2, vsrc, 0); + case 1: return vcopyq_laneq_f32(vdst, 2, vsrc, 1); + case 2: return vcopyq_laneq_f32(vdst, 2, vsrc, 2); + case 3: return vcopyq_laneq_f32(vdst, 2, vsrc, 3); + } + assert(!"Unreachable code executed!"); + case 3: switch(slane) { - case 0: return vcopyq_laneq_f32(vdst, 3, vsrc, 0); - case 1: return vcopyq_laneq_f32(vdst, 3, vsrc, 1); - case 2: return vcopyq_laneq_f32(vdst, 3, vsrc, 2); - case 3: return vcopyq_laneq_f32(vdst, 3, vsrc, 3); - } - assert(!"Unreachable code executed!"); - } + case 0: return vcopyq_laneq_f32(vdst, 3, vsrc, 0); + case 1: return vcopyq_laneq_f32(vdst, 3, vsrc, 1); + case 2: return vcopyq_laneq_f32(vdst, 3, vsrc, 2); + case 3: return vcopyq_laneq_f32(vdst, 3, vsrc, 3); + } + assert(!"Unreachable code executed!"); + } #else float l; @@ -104,7 +104,7 @@ namespace glm { static float32x4_t mul_lane(float32x4_t v, float32x4_t vlane, int lane) { #if GLM_ARCH & GLM_ARCH_ARMV8_BIT - switch(lane) { + switch(lane) { case 0: return vmulq_laneq_f32(v, vlane, 0); break; case 1: return vmulq_laneq_f32(v, vlane, 1); break; case 2: return vmulq_laneq_f32(v, vlane, 2); break; @@ -127,19 +127,19 @@ namespace glm { # define FMADD_LANE(acc, x, y, L) do { acc = vmlaq_laneq_f32(acc, x, y, L); } while(0) #endif - switch(lane) { + switch(lane) { case 0: - FMADD_LANE(acc, v, vlane, 0); - return acc; + FMADD_LANE(acc, v, vlane, 0); + return acc; case 1: - FMADD_LANE(acc, v, vlane, 1); - return acc; + FMADD_LANE(acc, v, vlane, 1); + return acc; case 2: - FMADD_LANE(acc, v, vlane, 2); - return acc; + FMADD_LANE(acc, v, vlane, 2); + return acc; case 3: - FMADD_LANE(acc, v, vlane, 3); - return acc; + FMADD_LANE(acc, v, vlane, 3); + return acc; default: assert(!"Unreachable code executed!"); } From 9ce038104c672aca0d16952b55f48fcec0ab5712 Mon Sep 17 00:00:00 2001 From: Amaury Le Leyzour Date: Tue, 19 Nov 2019 17:30:57 -0800 Subject: [PATCH 3/7] Fixing minor compiler issue --- glm/detail/func_common.inl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/glm/detail/func_common.inl b/glm/detail/func_common.inl index 8a7681c4..8c615f11 100644 --- a/glm/detail/func_common.inl +++ b/glm/detail/func_common.inl @@ -287,7 +287,8 @@ namespace detail std::numeric_limits::is_iec559 || (std::numeric_limits::is_signed && std::numeric_limits::is_integer), "'sign' only accept signed inputs"); - return detail::compute_sign<1, genFIType, defaultp, std::numeric_limits::is_iec559, highp>::call(vec<1, genFIType>(x)).x; + return detail::compute_sign<1, genFIType, defaultp, + std::numeric_limits::is_iec559, detailed::is_aligned::value>::call(vec<1, genFIType>(x)).x; } template From 923f7222d10d47bad2046d4f5e2e19d68aaf2e48 Mon Sep 17 00:00:00 2001 From: Amaury Le Leyzour Date: Tue, 19 Nov 2019 18:26:16 -0800 Subject: [PATCH 4/7] Fix Neon issues --- glm/detail/type_vec4_simd.inl | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/glm/detail/type_vec4_simd.inl b/glm/detail/type_vec4_simd.inl index 404c991c..29559b53 100644 --- a/glm/detail/type_vec4_simd.inl +++ b/glm/detail/type_vec4_simd.inl @@ -582,28 +582,6 @@ namespace detail { } }; - template - struct compute_vec4_div - { - static vec<4, uint, Q> call(vec<4, uint, Q> const& a, vec<4, uint, Q> const& b) - { - vec<4, uint, Q> Result; - Result.data = vdivq_u32(a.data, b.data); - return Result; - } - }; - - template - struct compute_vec4_div - { - static vec<4, int, Q> call(vec<4, float, Q> const& a, vec<4, int, Q> const& b) - { - vec<4, int, Q> Result; - Result.data = vdivq_s32(a.data, b.data); - return Result; - } - }; - template struct compute_vec4_equal { From c7dbbee6a1530c716e9f56a1a3450cdd472b60c7 Mon Sep 17 00:00:00 2001 From: Amaury Le Leyzour Date: Wed, 20 Nov 2019 09:05:39 -0800 Subject: [PATCH 5/7] Fix typo --- glm/detail/func_common.inl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glm/detail/func_common.inl b/glm/detail/func_common.inl index 8c615f11..4b5f1441 100644 --- a/glm/detail/func_common.inl +++ b/glm/detail/func_common.inl @@ -288,7 +288,7 @@ namespace detail "'sign' only accept signed inputs"); return detail::compute_sign<1, genFIType, defaultp, - std::numeric_limits::is_iec559, detailed::is_aligned::value>::call(vec<1, genFIType>(x)).x; + std::numeric_limits::is_iec559, detail::is_aligned::value>::call(vec<1, genFIType>(x)).x; } template From 9da0c80e26acd26490b9970ae449505bf5fb53be Mon Sep 17 00:00:00 2001 From: Amaury Le Leyzour Date: Thu, 21 Nov 2019 13:03:22 -0800 Subject: [PATCH 6/7] Yet another fix as this is no longer covered by unit tests --- glm/detail/func_geometric_simd.inl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glm/detail/func_geometric_simd.inl b/glm/detail/func_geometric_simd.inl index ab658be1..dfe3f4c9 100644 --- a/glm/detail/func_geometric_simd.inl +++ b/glm/detail/func_geometric_simd.inl @@ -155,7 +155,7 @@ namespace detail float32x4_t vd = vrsqrteq_f32(p); vec<4, float, Q> Result; - Result.data = vmulq_f32(v, vd); + Result.data = vmulq_f32(v.data, vd); return Result; } }; From 8828c3f1fd05e173d94417ca4565aa634dabb1c1 Mon Sep 17 00:00:00 2001 From: Christophe Date: Mon, 9 Dec 2019 14:21:08 +0100 Subject: [PATCH 7/7] Fixed GLM_EXT_matrix_clip_space warnings #980 --- glm/ext/matrix_clip_space.inl | 121 ++++++++++++++++++++-------------- readme.md | 1 + 2 files changed, 72 insertions(+), 50 deletions(-) diff --git a/glm/ext/matrix_clip_space.inl b/glm/ext/matrix_clip_space.inl index baf68cca..1b4c7708 100644 --- a/glm/ext/matrix_clip_space.inl +++ b/glm/ext/matrix_clip_space.inl @@ -67,51 +67,56 @@ namespace glm template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> orthoZO(T left, T right, T bottom, T top, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return orthoLH_ZO(left, right, bottom, top, zNear, zFar); - else +# else return orthoRH_ZO(left, right, bottom, top, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> orthoNO(T left, T right, T bottom, T top, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return orthoLH_NO(left, right, bottom, top, zNear, zFar); - else +# else return orthoRH_NO(left, right, bottom, top, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> orthoLH(T left, T right, T bottom, T top, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return orthoLH_ZO(left, right, bottom, top, zNear, zFar); - else +# else return orthoLH_NO(left, right, bottom, top, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> orthoRH(T left, T right, T bottom, T top, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return orthoRH_ZO(left, right, bottom, top, zNear, zFar); - else +# else return orthoRH_NO(left, right, bottom, top, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> ortho(T left, T right, T bottom, T top, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO) +# if GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO return orthoLH_ZO(left, right, bottom, top, zNear, zFar); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO return orthoLH_NO(left, right, bottom, top, zNear, zFar); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO return orthoRH_ZO(left, right, bottom, top, zNear, zFar); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO return orthoRH_NO(left, right, bottom, top, zNear, zFar); +# endif } template @@ -173,50 +178,55 @@ namespace glm template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustumZO(T left, T right, T bottom, T top, T nearVal, T farVal) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return frustumLH_ZO(left, right, bottom, top, nearVal, farVal); - else +# else return frustumRH_ZO(left, right, bottom, top, nearVal, farVal); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustumNO(T left, T right, T bottom, T top, T nearVal, T farVal) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return frustumLH_NO(left, right, bottom, top, nearVal, farVal); - else +# else return frustumRH_NO(left, right, bottom, top, nearVal, farVal); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustumLH(T left, T right, T bottom, T top, T nearVal, T farVal) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return frustumLH_ZO(left, right, bottom, top, nearVal, farVal); - else +# else return frustumLH_NO(left, right, bottom, top, nearVal, farVal); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustumRH(T left, T right, T bottom, T top, T nearVal, T farVal) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return frustumRH_ZO(left, right, bottom, top, nearVal, farVal); - else +# else return frustumRH_NO(left, right, bottom, top, nearVal, farVal); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> frustum(T left, T right, T bottom, T top, T nearVal, T farVal) { - if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO) +# if GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO return frustumLH_ZO(left, right, bottom, top, nearVal, farVal); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO return frustumLH_NO(left, right, bottom, top, nearVal, farVal); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO return frustumRH_ZO(left, right, bottom, top, nearVal, farVal); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO return frustumRH_NO(left, right, bottom, top, nearVal, farVal); +# endif } template @@ -286,51 +296,56 @@ namespace glm template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveZO(T fovy, T aspect, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return perspectiveLH_ZO(fovy, aspect, zNear, zFar); - else +# else return perspectiveRH_ZO(fovy, aspect, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveNO(T fovy, T aspect, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return perspectiveLH_NO(fovy, aspect, zNear, zFar); - else +# else return perspectiveRH_NO(fovy, aspect, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveLH(T fovy, T aspect, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return perspectiveLH_ZO(fovy, aspect, zNear, zFar); - else +# else return perspectiveLH_NO(fovy, aspect, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveRH(T fovy, T aspect, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return perspectiveRH_ZO(fovy, aspect, zNear, zFar); - else +# else return perspectiveRH_NO(fovy, aspect, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspective(T fovy, T aspect, T zNear, T zFar) { - GLM_IF_CONSTEXPR(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO) +# if GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO return perspectiveLH_ZO(fovy, aspect, zNear, zFar); - else GLM_IF_CONSTEXPR(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO return perspectiveLH_NO(fovy, aspect, zNear, zFar); - else GLM_IF_CONSTEXPR(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO return perspectiveRH_ZO(fovy, aspect, zNear, zFar); - else GLM_IF_CONSTEXPR(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO) +# elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO return perspectiveRH_NO(fovy, aspect, zNear, zFar); +# endif } template @@ -416,50 +431,55 @@ namespace glm template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFovZO(T fov, T width, T height, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return perspectiveFovLH_ZO(fov, width, height, zNear, zFar); - else +# else return perspectiveFovRH_ZO(fov, width, height, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFovNO(T fov, T width, T height, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return perspectiveFovLH_NO(fov, width, height, zNear, zFar); - else +# else return perspectiveFovRH_NO(fov, width, height, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFovLH(T fov, T width, T height, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return perspectiveFovLH_ZO(fov, width, height, zNear, zFar); - else +# else return perspectiveFovLH_NO(fov, width, height, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFovRH(T fov, T width, T height, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_ZO_BIT return perspectiveFovRH_ZO(fov, width, height, zNear, zFar); - else +# else return perspectiveFovRH_NO(fov, width, height, zNear, zFar); +# endif } template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> perspectiveFov(T fov, T width, T height, T zNear, T zFar) { - if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO) +# if GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_ZO return perspectiveFovLH_ZO(fov, width, height, zNear, zFar); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO) + elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_LH_NO return perspectiveFovLH_NO(fov, width, height, zNear, zFar); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO) + elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_ZO return perspectiveFovRH_ZO(fov, width, height, zNear, zFar); - else if(GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO) + elif GLM_CONFIG_CLIP_CONTROL == GLM_CLIP_CONTROL_RH_NO return perspectiveFovRH_NO(fov, width, height, zNear, zFar); +# endif } template @@ -501,10 +521,11 @@ namespace glm template GLM_FUNC_QUALIFIER mat<4, 4, T, defaultp> infinitePerspective(T fovy, T aspect, T zNear) { - if(GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT) +# if GLM_CONFIG_CLIP_CONTROL & GLM_CLIP_CONTROL_LH_BIT return infinitePerspectiveLH(fovy, aspect, zNear); - else +# else return infinitePerspectiveRH(fovy, aspect, zNear); +# endif } // Infinite projection matrix: http://www.terathon.com/gdc07_lengyel.pdf diff --git a/readme.md b/readme.md index 547602ef..bae80988 100644 --- a/readme.md +++ b/readme.md @@ -64,6 +64,7 @@ glm::mat4 camera(float Translate, glm::vec2 const& Rotate) - Fixed equal ULP variation when using negative sign #965 - Fixed for intersection ray/plane and added related tests #953 - Fixed ARM 64bit detection #949 +- Fixed GLM_EXT_matrix_clip_space warnings #980 ### [GLM 0.9.9.6](https://github.com/g-truc/glm/releases/tag/0.9.9.6) - 2019-09-08 #### Features: