From d8d954fca044e67e0982cecc8736fca53aff499e Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Sun, 6 Sep 2015 22:02:54 +0200 Subject: [PATCH] - Fixed builtin bitscan always used even when GLM_FORCE_PURE requested #392 --- glm/detail/setup.hpp | 230 +++++++++++++++++++++---------------------- readme.md | 1 + 2 files changed, 116 insertions(+), 115 deletions(-) diff --git a/glm/detail/setup.hpp b/glm/detail/setup.hpp index 830eb700..605fe8e1 100644 --- a/glm/detail/setup.hpp +++ b/glm/detail/setup.hpp @@ -366,6 +366,121 @@ # endif//GLM_MODEL #endif//GLM_MESSAGE +/////////////////////////////////////////////////////////////////////////////////// +// Platform + +// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 + +#define GLM_ARCH_PURE 0x0000 +#define GLM_ARCH_ARM 0x0001 +#define GLM_ARCH_X86 0x0002 +#define GLM_ARCH_SSE2 0x0004 +#define GLM_ARCH_SSE3 0x0008 +#define GLM_ARCH_SSE4 0x0010 +#define GLM_ARCH_AVX 0x0020 +#define GLM_ARCH_AVX2 0x0040 + +#if defined(GLM_FORCE_PURE) +# define GLM_ARCH GLM_ARCH_PURE +#elif defined(GLM_FORCE_AVX2) +# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +#elif defined(GLM_FORCE_AVX) +# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +#elif defined(GLM_FORCE_SSE4) +# define GLM_ARCH (GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +#elif defined(GLM_FORCE_SSE3) +# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +#elif defined(GLM_FORCE_SSE2) +# define GLM_ARCH (GLM_ARCH_SSE2) +#elif (GLM_COMPILER & (GLM_COMPILER_APPLE_CLANG | GLM_COMPILER_LLVM | GLM_COMPILER_GCC)) || ((GLM_COMPILER & GLM_COMPILER_INTEL) && (GLM_PLATFORM & GLM_PLATFORM_LINUX)) +# if(__AVX2__) +# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +# elif(__AVX__) +# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +# elif(__SSE3__) +# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +# elif(__SSE2__) +# define GLM_ARCH (GLM_ARCH_SSE2) +# else +# define GLM_ARCH GLM_ARCH_PURE +# endif +#elif (GLM_COMPILER & GLM_COMPILER_VC) || ((GLM_COMPILER & GLM_COMPILER_INTEL) && (GLM_PLATFORM & GLM_PLATFORM_WINDOWS)) +# if defined(_M_ARM_FP) +# define GLM_ARCH (GLM_ARCH_ARM) +# elif defined(__AVX2__) +# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +# elif defined(__AVX__) +# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +# elif _M_IX86_FP == 2 +# define GLM_ARCH (GLM_ARCH_SSE2) +# else +# define GLM_ARCH (GLM_ARCH_PURE) +# endif +#elif (GLM_COMPILER & GLM_COMPILER_GCC) && (defined(__i386__) || defined(__x86_64__)) +# if defined(__AVX2__) +# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +# elif defined(__AVX__) +# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +# elif defined(__SSE4_1__ ) +# define GLM_ARCH (GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +# elif defined(__SSE3__) +# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2) +# elif defined(__SSE2__) +# define GLM_ARCH (GLM_ARCH_SSE2) +# else +# define GLM_ARCH (GLM_ARCH_PURE) +# endif +#else +# define GLM_ARCH GLM_ARCH_PURE +#endif + +// With MinGW-W64, including intrinsic headers before intrin.h will produce some errors. The problem is +// that windows.h (and maybe other headers) will silently include intrin.h, which of course causes problems. +// To fix, we just explicitly include intrin.h here. +#if defined(__MINGW64__) && (GLM_ARCH != GLM_ARCH_PURE) +# include +#endif + +#if GLM_ARCH & GLM_ARCH_AVX2 +# include +#endif//GLM_ARCH +#if GLM_ARCH & GLM_ARCH_AVX +# include +#endif//GLM_ARCH +#if GLM_ARCH & GLM_ARCH_SSE4 +# include +#endif//GLM_ARCH +#if GLM_ARCH & GLM_ARCH_SSE3 +# include +#endif//GLM_ARCH +#if GLM_ARCH & GLM_ARCH_SSE2 +# include +# if(GLM_COMPILER == GLM_COMPILER_VC2005) // VC2005 is missing some intrinsics, workaround + inline float _mm_cvtss_f32(__m128 A) { return A.m128_f32[0]; } + inline __m128 _mm_castpd_ps(__m128d PD) { union { __m128 ps; __m128d pd; } c; c.pd = PD; return c.ps; } + inline __m128d _mm_castps_pd(__m128 PS) { union { __m128 ps; __m128d pd; } c; c.ps = PS; return c.pd; } + inline __m128i _mm_castps_si128(__m128 PS) { union { __m128 ps; __m128i pi; } c; c.ps = PS; return c.pi; } + inline __m128 _mm_castsi128_ps(__m128i PI) { union { __m128 ps; __m128i pi; } c; c.pi = PI; return c.ps; } +# endif +#endif//GLM_ARCH + +#if defined(GLM_MESSAGES) && !defined(GLM_MESSAGE_ARCH_DISPLAYED) +# define GLM_MESSAGE_ARCH_DISPLAYED +# if(GLM_ARCH == GLM_ARCH_PURE) +# pragma message("GLM: Platform independent code") +# elif(GLM_ARCH & GLM_ARCH_ARM) +# pragma message("GLM: ARM instruction set") +# elif(GLM_ARCH & GLM_ARCH_AVX2) +# pragma message("GLM: AVX2 instruction set") +# elif(GLM_ARCH & GLM_ARCH_AVX) +# pragma message("GLM: AVX instruction set") +# elif(GLM_ARCH & GLM_ARCH_SSE3) +# pragma message("GLM: SSE3 instruction set") +# elif(GLM_ARCH & GLM_ARCH_SSE2) +# pragma message("GLM: SSE2 instruction set") +# endif//GLM_ARCH +#endif//GLM_MESSAGE + /////////////////////////////////////////////////////////////////////////////////// // C++ Version @@ -719,121 +834,6 @@ // Not standard #define GLM_HAS_ANONYMOUS_UNION (GLM_LANG & GLM_LANG_CXXMS_FLAG) -/////////////////////////////////////////////////////////////////////////////////// -// Platform - -// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 - -#define GLM_ARCH_PURE 0x0000 -#define GLM_ARCH_ARM 0x0001 -#define GLM_ARCH_X86 0x0002 -#define GLM_ARCH_SSE2 0x0004 -#define GLM_ARCH_SSE3 0x0008 -#define GLM_ARCH_SSE4 0x0010 -#define GLM_ARCH_AVX 0x0020 -#define GLM_ARCH_AVX2 0x0040 - -#if defined(GLM_FORCE_PURE) -# define GLM_ARCH GLM_ARCH_PURE -#elif defined(GLM_FORCE_AVX2) -# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) -#elif defined(GLM_FORCE_AVX) -# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) -#elif defined(GLM_FORCE_SSE4) -# define GLM_ARCH (GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) -#elif defined(GLM_FORCE_SSE3) -# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2) -#elif defined(GLM_FORCE_SSE2) -# define GLM_ARCH (GLM_ARCH_SSE2) -#elif (GLM_COMPILER & (GLM_COMPILER_APPLE_CLANG | GLM_COMPILER_LLVM | GLM_COMPILER_GCC)) || ((GLM_COMPILER & GLM_COMPILER_INTEL) && (GLM_PLATFORM & GLM_PLATFORM_LINUX)) -# if(__AVX2__) -# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) -# elif(__AVX__) -# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) -# elif(__SSE3__) -# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2) -# elif(__SSE2__) -# define GLM_ARCH (GLM_ARCH_SSE2) -# else -# define GLM_ARCH GLM_ARCH_PURE -# endif -#elif (GLM_COMPILER & GLM_COMPILER_VC) || ((GLM_COMPILER & GLM_COMPILER_INTEL) && (GLM_PLATFORM & GLM_PLATFORM_WINDOWS)) -# if defined(_M_ARM_FP) -# define GLM_ARCH (GLM_ARCH_ARM) -# elif defined(__AVX2__) -# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) -# elif defined(__AVX__) -# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) -# elif _M_IX86_FP == 2 -# define GLM_ARCH (GLM_ARCH_SSE2) -# else -# define GLM_ARCH (GLM_ARCH_PURE) -# endif -#elif (GLM_COMPILER & GLM_COMPILER_GCC) && (defined(__i386__) || defined(__x86_64__)) -# if defined(__AVX2__) -# define GLM_ARCH (GLM_ARCH_AVX2 | GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) -# elif defined(__AVX__) -# define GLM_ARCH (GLM_ARCH_AVX | GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) -# elif defined(__SSE4_1__ ) -# define GLM_ARCH (GLM_ARCH_SSE4 | GLM_ARCH_SSE3 | GLM_ARCH_SSE2) -# elif defined(__SSE3__) -# define GLM_ARCH (GLM_ARCH_SSE3 | GLM_ARCH_SSE2) -# elif defined(__SSE2__) -# define GLM_ARCH (GLM_ARCH_SSE2) -# else -# define GLM_ARCH (GLM_ARCH_PURE) -# endif -#else -# define GLM_ARCH GLM_ARCH_PURE -#endif - -// With MinGW-W64, including intrinsic headers before intrin.h will produce some errors. The problem is -// that windows.h (and maybe other headers) will silently include intrin.h, which of course causes problems. -// To fix, we just explicitly include intrin.h here. -#if defined(__MINGW64__) && (GLM_ARCH != GLM_ARCH_PURE) -# include -#endif - -#if GLM_ARCH & GLM_ARCH_AVX2 -# include -#endif//GLM_ARCH -#if GLM_ARCH & GLM_ARCH_AVX -# include -#endif//GLM_ARCH -#if GLM_ARCH & GLM_ARCH_SSE4 -# include -#endif//GLM_ARCH -#if GLM_ARCH & GLM_ARCH_SSE3 -# include -#endif//GLM_ARCH -#if GLM_ARCH & GLM_ARCH_SSE2 -# include -# if(GLM_COMPILER == GLM_COMPILER_VC2005) // VC2005 is missing some intrinsics, workaround - inline float _mm_cvtss_f32(__m128 A) { return A.m128_f32[0]; } - inline __m128 _mm_castpd_ps(__m128d PD) { union { __m128 ps; __m128d pd; } c; c.pd = PD; return c.ps; } - inline __m128d _mm_castps_pd(__m128 PS) { union { __m128 ps; __m128d pd; } c; c.ps = PS; return c.pd; } - inline __m128i _mm_castps_si128(__m128 PS) { union { __m128 ps; __m128i pi; } c; c.ps = PS; return c.pi; } - inline __m128 _mm_castsi128_ps(__m128i PI) { union { __m128 ps; __m128i pi; } c; c.pi = PI; return c.ps; } -# endif -#endif//GLM_ARCH - -#if defined(GLM_MESSAGES) && !defined(GLM_MESSAGE_ARCH_DISPLAYED) -# define GLM_MESSAGE_ARCH_DISPLAYED -# if(GLM_ARCH == GLM_ARCH_PURE) -# pragma message("GLM: Platform independent code") -# elif(GLM_ARCH & GLM_ARCH_ARM) -# pragma message("GLM: ARM instruction set") -# elif(GLM_ARCH & GLM_ARCH_AVX2) -# pragma message("GLM: AVX2 instruction set") -# elif(GLM_ARCH & GLM_ARCH_AVX) -# pragma message("GLM: AVX instruction set") -# elif(GLM_ARCH & GLM_ARCH_SSE3) -# pragma message("GLM: SSE3 instruction set") -# elif(GLM_ARCH & GLM_ARCH_SSE2) -# pragma message("GLM: SSE2 instruction set") -# endif//GLM_ARCH -#endif//GLM_MESSAGE - /////////////////////////////////////////////////////////////////////////////////// // Static assert diff --git a/readme.md b/readme.md index 0162e3cc..194f0e6a 100644 --- a/readme.md +++ b/readme.md @@ -62,6 +62,7 @@ glm::mat4 camera(float Translate, glm::vec2 const & Rotate) - Fixed non-identity quaternions for equal vectors #234 - Fixed excessive GTX_fast_trigonometry execution time #396 - Fixed Visual Studio 2015 'hides class member' warnings #394 +- Fixed builtin bitscan always used even when GLM_FORCE_PURE requested #392 #### [GLM 0.9.7.0](https://github.com/g-truc/glm/releases/tag/0.9.7.0) - 2015-08-02 ##### Features: