From 7e81213fdddf4df6e8617cb26288dda7820698ff Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Sat, 29 Nov 2014 18:47:58 +0100 Subject: [PATCH] Fixed mod function specialization #281 Fixed bitscan detection --- glm/detail/func_common.inl | 16 ++++++--- glm/detail/func_integer.inl | 8 ++--- glm/detail/setup.hpp | 9 +++-- glm/gtc/integer.hpp | 40 +++++++++++++++++++-- glm/gtc/integer.inl | 35 ++++++++++-------- test/core/core_func_common.cpp | 35 ++++++++++++++++++ test/core/core_func_integer.cpp | 55 +++++++++++++++++++++++----- test/gtc/gtc_integer.cpp | 63 +++++++++++++++++++++++++++++++-- 8 files changed, 225 insertions(+), 36 deletions(-) diff --git a/glm/detail/func_common.inl b/glm/detail/func_common.inl index def8b26a..cd04085e 100644 --- a/glm/detail/func_common.inl +++ b/glm/detail/func_common.inl @@ -162,6 +162,16 @@ namespace detail return (x >> Shift) | y; } }; + + template class vecType, typename genType, bool isFloat = true> + struct compute_mod + { + GLM_FUNC_QUALIFIER static vecType call(vecType const & a, genType const & b) + { + GLM_STATIC_ASSERT(std::numeric_limits::is_iec559, "'mod' only accept floating-point inputs. Include for integer inputs."); + return a - b * floor(a / b); + } + }; }//namespace detail // abs @@ -334,15 +344,13 @@ namespace detail template class vecType> GLM_FUNC_QUALIFIER vecType mod(vecType const & x, T y) { - GLM_STATIC_ASSERT(std::numeric_limits::is_iec559, "'mod' only accept floating-point inputs"); - return x - y * floor(x / y); + return detail::compute_mod::is_iec559>::call(x, y); } template class vecType> GLM_FUNC_QUALIFIER vecType mod(vecType const & x, vecType const & y) { - GLM_STATIC_ASSERT(std::numeric_limits::is_iec559, "'mod' only accept floating-point inputs"); - return x - y * floor(x / y); + return detail::compute_mod, std::numeric_limits::is_iec559>::call(x, y); } // modf diff --git a/glm/detail/func_integer.inl b/glm/detail/func_integer.inl index a4f6847e..a57db01a 100644 --- a/glm/detail/func_integer.inl +++ b/glm/detail/func_integer.inl @@ -104,7 +104,7 @@ namespace detail } }; -# if(GLM_ARCH != GLM_ARCH_PURE) && ((GLM_COMPILER & GLM_COMPILER_VC) || ((GLM_COMPILER & (GLM_COMPILER_LLVM | GLM_COMPILER_INTEL)) && (GLM_PLATFORM & GLM_PLATFORM_WINDOWS))) +# if GLM_HAS_BITSCAN_WINDOWS template struct compute_findLSB { @@ -126,7 +126,7 @@ namespace detail return IsNotNull ? int(Result) : -1; } }; -# endif//GLM_ARCH != GLM_ARCH_PURE +# endif//GLM_HAS_BITSCAN_WINDOWS template class vecType, bool EXEC = true> struct compute_findMSB_step_vec @@ -162,7 +162,7 @@ namespace detail } }; -# if(GLM_ARCH != GLM_ARCH_PURE) && ((GLM_COMPILER & GLM_COMPILER_VC) || ((GLM_COMPILER & (GLM_COMPILER_LLVM | GLM_COMPILER_INTEL)) && (GLM_PLATFORM & GLM_PLATFORM_WINDOWS))) +# if GLM_HAS_BITSCAN_WINDOWS template GLM_FUNC_QUALIFIER int compute_findMSB_32(genIUType Value) { @@ -196,7 +196,7 @@ namespace detail return detail::functor1::call(compute_findMSB_64, x); } }; -# endif//GLM_ARCH != GLM_ARCH_PURE +# endif//GLM_HAS_BITSCAN_WINDOWS }//namespace detail // uaddCarry diff --git a/glm/detail/setup.hpp b/glm/detail/setup.hpp index 5c905e42..8a856260 100644 --- a/glm/detail/setup.hpp +++ b/glm/detail/setup.hpp @@ -525,14 +525,19 @@ (GLM_LANG & GLM_LANG_CXX11_FLAG) || \ ((GLM_LANG & GLM_LANG_CXX0X_FLAG) && (GLM_COMPILER & GLM_COMPILER_GCC) && (GLM_COMPILER >= GLM_COMPILER_GCC49))) -#define GLM_HAS_TRIVIAL_QUERIES (\ +#define GLM_HAS_TRIVIAL_QUERIES ( \ ((GLM_LANG & GLM_LANG_CXX11_FLAG) && !(GLM_COMPILER & GLM_COMPILER_GCC)) || \ ((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC2013))) -#define GLM_HAS_MAKE_SIGNED (\ +#define GLM_HAS_MAKE_SIGNED ( \ (GLM_LANG & GLM_LANG_CXX11_FLAG) || \ ((GLM_COMPILER & GLM_COMPILER_VC) && (GLM_COMPILER >= GLM_COMPILER_VC2013))) +#define GLM_HAS_BITSCAN_WINDOWS ( \ + (GLM_ARCH != GLM_ARCH_PURE) && \ + (GLM_PLATFORM & GLM_PLATFORM_WINDOWS) && \ + (GLM_COMPILER & (GLM_COMPILER_VC | GLM_COMPILER_LLVM | GLM_COMPILER_INTEL))) + // OpenMP #ifdef _OPENMP # if GLM_COMPILER & GLM_COMPILER_GCC diff --git a/glm/gtc/integer.hpp b/glm/gtc/integer.hpp index b3dda8ab..edcbb9b8 100644 --- a/glm/gtc/integer.hpp +++ b/glm/gtc/integer.hpp @@ -45,6 +45,7 @@ // Dependencies #include "../detail/setup.hpp" #include "../detail/precision.hpp" +#include "../detail/func_common.hpp" #include "../detail/func_integer.hpp" #include "../detail/func_exponential.hpp" #include @@ -58,11 +59,46 @@ namespace glm /// @addtogroup gtc_integer /// @{ - /// Returns the log2 of x. Can be reliably using to compute mipmap count from the texture size. - /// From GLM_GTC_integer extension. + /// Returns the log2 of x for integer values. Can be reliably using to compute mipmap count from the texture size. + /// @see gtc_integer template GLM_FUNC_DECL genIUType log2(genIUType x); + /// Modulus. Returns x % y + /// for each component in x using the floating point value y. + /// + /// @tparam genIUType Integer-point scalar or vector types. + /// + /// @see gtc_integer + /// @see GLSL mod man page + /// @see GLSL 4.20.8 specification, section 8.3 Common Functions + template + GLM_FUNC_DECL genIUType mod(genIUType x, genIUType y); + + /// Modulus. Returns x % y + /// for each component in x using the floating point value y. + /// + /// @tparam T Integer scalar types. + /// @tparam vecType vector types. + /// + /// @see gtc_integer + /// @see GLSL mod man page + /// @see GLSL 4.20.8 specification, section 8.3 Common Functions + template class vecType> + GLM_FUNC_DECL vecType mod(vecType const & x, T y); + + /// Modulus. Returns x % y + /// for each component in x using the floating point value y. + /// + /// @tparam T Integer scalar types. + /// @tparam vecType vector types. + /// + /// @see gtc_integer + /// @see GLSL mod man page + /// @see GLSL 4.20.8 specification, section 8.3 Common Functions + template class vecType> + GLM_FUNC_DECL vecType mod(vecType const & x, vecType const & y); + /// @} } //namespace glm diff --git a/glm/gtc/integer.inl b/glm/gtc/integer.inl index 9e3c24be..5a23f0b2 100644 --- a/glm/gtc/integer.inl +++ b/glm/gtc/integer.inl @@ -44,24 +44,31 @@ namespace detail } }; -# if(GLM_ARCH != GLM_ARCH_PURE) && (GLM_COMPILER & (GLM_COMPILER_VC | GLM_COMPILER_APPLE_CLANG | GLM_COMPILER_LLVM)) - - template - struct compute_log2 - { - GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & vec) +# if GLM_HAS_BITSCAN_WINDOWS + template + struct compute_log2 { - tvec4 Result(glm::uninitialize); + GLM_FUNC_QUALIFIER static tvec4 call(tvec4 const & vec) + { + tvec4 Result(glm::uninitialize); - _BitScanReverse(reinterpret_cast(&Result.x), vec.x); - _BitScanReverse(reinterpret_cast(&Result.y), vec.y); - _BitScanReverse(reinterpret_cast(&Result.z), vec.z); - _BitScanReverse(reinterpret_cast(&Result.w), vec.w); + _BitScanReverse(reinterpret_cast(&Result.x), vec.x); + _BitScanReverse(reinterpret_cast(&Result.y), vec.y); + _BitScanReverse(reinterpret_cast(&Result.z), vec.z); + _BitScanReverse(reinterpret_cast(&Result.w), vec.w); - return Result; + return Result; + } + }; +# endif//GLM_HAS_BITSCAN_WINDOWS + + template class vecType, typename genType> + struct compute_mod + { + GLM_FUNC_QUALIFIER static vecType call(vecType const & a, genType const & b) + { + return a % b; } }; - -# endif//GLM_ARCH != GLM_ARCH_PURE }//namespace detail }//namespace glm diff --git a/test/core/core_func_common.cpp b/test/core/core_func_common.cpp index 349b73fa..5bef95d7 100644 --- a/test/core/core_func_common.cpp +++ b/test/core/core_func_common.cpp @@ -162,6 +162,40 @@ namespace modf_ } }//namespace modf +namespace mod_ +{ + int test() + { + int Error(0); + + { + float A(3.0); + float B(2.0f); + float C = glm::mod(A, B); + + Error += glm::abs(C - 1.0f) < 0.00001f ? 0 : 1; + } + + { + glm::vec4 A(3.0); + float B(2.0f); + glm::vec4 C = glm::mod(A, B); + + Error += glm::all(glm::epsilonEqual(C, glm::vec4(1.0f), 0.00001f)) ? 0 : 1; + } + + { + glm::vec4 A(3.0); + glm::vec4 B(2.0f); + glm::vec4 C = glm::mod(A, B); + + Error += glm::all(glm::epsilonEqual(C, glm::vec4(1.0f), 0.00001f)) ? 0 : 1; + } + + return Error; + } +}//namespace mod_ + namespace floatBitsToInt { int test() @@ -1109,6 +1143,7 @@ int main() Error += sign::test(); Error += floor_::test(); + Error += mod_::test(); Error += modf_::test(); Error += floatBitsToInt::test(); Error += floatBitsToUint::test(); diff --git a/test/core/core_func_integer.cpp b/test/core/core_func_integer.cpp index eac947f9..739d3347 100644 --- a/test/core/core_func_integer.cpp +++ b/test/core/core_func_integer.cpp @@ -578,6 +578,7 @@ namespace findMSB genType Return; }; +# if GLM_HAS_BITSCAN_WINDOWS template GLM_FUNC_QUALIFIER int findMSB_intrinsic(genIUType Value) { @@ -590,6 +591,20 @@ namespace findMSB _BitScanReverse(&Result, Value); return int(Result); } +# endif//GLM_HAS_BITSCAN_WINDOWS + +# if GLM_ARCH & GLM_ARCH_AVX + template + GLM_FUNC_QUALIFIER int findMSB_avx(genIUType Value) + { + GLM_STATIC_ASSERT(std::numeric_limits::is_integer, "'findMSB' only accept integer values"); + + if(Value == 0) + return -1; + + return int(_tzcnt_u32(Value)); + } +# endif template GLM_FUNC_QUALIFIER int findMSB_095(genIUType Value) @@ -698,7 +713,7 @@ namespace findMSB }; int Error(0); - std::size_t const Count(1000000); + std::size_t const Count(10000000); std::clock_t Timestamps0 = std::clock(); @@ -738,12 +753,14 @@ namespace findMSB std::clock_t Timestamps4 = std::clock(); - for(std::size_t k = 0; k < Count; ++k) - for(std::size_t i = 0; i < sizeof(Data) / sizeof(type); ++i) - { - int Result = findMSB_intrinsic(Data[i].Value); - Error += Data[i].Return == Result ? 0 : 1; - } +# if GLM_HAS_BITSCAN_WINDOWS + for(std::size_t k = 0; k < Count; ++k) + for(std::size_t i = 0; i < sizeof(Data) / sizeof(type); ++i) + { + int Result = findMSB_intrinsic(Data[i].Value); + Error += Data[i].Return == Result ? 0 : 1; + } +# endif//GLM_HAS_BITSCAN_WINDOWS std::clock_t Timestamps5 = std::clock(); @@ -756,13 +773,31 @@ namespace findMSB std::clock_t Timestamps6 = std::clock(); +# if GLM_ARCH & GLM_ARCH_AVX + for(std::size_t k = 0; k < Count; ++k) + for(std::size_t i = 0; i < sizeof(Data) / sizeof(type); ++i) + { + int Result = findMSB_avx(Data[i].Value); + Error += Data[i].Return == Result ? 0 : 1; + } +# endif + + std::clock_t Timestamps7 = std::clock(); + std::printf("glm::findMSB: %d clocks\n", static_cast(Timestamps1 - Timestamps0)); std::printf("findMSB - nlz1: %d clocks\n", static_cast(Timestamps2 - Timestamps1)); std::printf("findMSB - nlz2: %d clocks\n", static_cast(Timestamps3 - Timestamps2)); std::printf("findMSB - 0.9.5: %d clocks\n", static_cast(Timestamps4 - Timestamps3)); - std::printf("findMSB - intrinsics: %d clocks\n", static_cast(Timestamps5 - Timestamps4)); + +# if GLM_HAS_BITSCAN_WINDOWS + std::printf("findMSB - intrinsics: %d clocks\n", static_cast(Timestamps5 - Timestamps4)); +# endif//GLM_HAS_BITSCAN_WINDOWS std::printf("findMSB - pop: %d clocks\n", static_cast(Timestamps6 - Timestamps5)); +# if GLM_ARCH & GLM_ARCH_AVX + std::printf("findMSB - avx tzcnt: %d clocks\n", static_cast(Timestamps7 - Timestamps6)); +# endif + return Error; } @@ -888,6 +923,8 @@ namespace findMSB for(std::size_t i = 0; i < sizeof(Data) / sizeof(type); ++i) { int Result0 = findMSB_intrinsic(Data[i].Value); + //unsigned int A = _lzcnt_u32(Data[i].Value); + //unsigned int B = _tzcnt_u32(Data[i].Value); Error += Data[i].Return == Result0 ? 0 : 1; } @@ -1527,6 +1564,8 @@ int main() Error += ::bitfieldInsert::test(); Error += ::bitfieldExtract::test(); + Error += ::findMSB::perf(); + # ifdef GLM_TEST_ENABLE_PERF Error += ::bitCount::perf(); Error += ::bitfieldReverse::perf(); diff --git a/test/gtc/gtc_integer.cpp b/test/gtc/gtc_integer.cpp index 9fc1aa61..f671c17f 100644 --- a/test/gtc/gtc_integer.cpp +++ b/test/gtc/gtc_integer.cpp @@ -30,6 +30,7 @@ /////////////////////////////////////////////////////////////////////////////////// #define GLM_FORCE_INLINE +#include #include #include #include @@ -102,7 +103,7 @@ namespace log2_ printf("glm::log2: %d clocks\n", End - Begin); } -# if(GLM_ARCH != GLM_ARCH_PURE) && (GLM_COMPILER & (GLM_COMPILER_VC | GLM_COMPILER_APPLE_CLANG | GLM_COMPILER_LLVM)) +# if GLM_HAS_BITSCAN_WINDOWS { std::vector Result; Result.resize(Count); @@ -163,7 +164,7 @@ namespace log2_ printf("glm::log2 reinterpret: %d clocks\n", End - Begin); } -# endif//GLM_ARCH != GLM_ARCH_PURE +# endif//GLM_HAS_BITSCAN_WINDOWS { std::vector Result; @@ -197,12 +198,70 @@ namespace log2_ } }//namespace log2_ +namespace mod_ +{ + int test() + { + int Error(0); + + { + float A(3.0); + float B(2.0f); + float C = glm::mod(A, B); + + Error += glm::abs(C - 1.0f) < 0.00001f ? 0 : 1; + } + + { + glm::vec4 A(3.0); + float B(2.0f); + glm::vec4 C = glm::mod(A, B); + + Error += glm::all(glm::epsilonEqual(C, glm::vec4(1.0f), 0.00001f)) ? 0 : 1; + } + + { + glm::vec4 A(3.0); + glm::vec4 B(2.0f); + glm::vec4 C = glm::mod(A, B); + + Error += glm::all(glm::epsilonEqual(C, glm::vec4(1.0f), 0.00001f)) ? 0 : 1; + } + + { + int A(3); + int B(2); + int C = glm::mod(A, B); + + Error += C == 1 ? 0 : 1; + } + + { + glm::ivec4 A(3); + int B(2); + glm::ivec4 C = glm::mod(A, B); + + Error += glm::all(glm::equal(C, glm::ivec4(1))) ? 0 : 1; + } + + { + glm::ivec4 A(3); + glm::ivec4 B(2); + glm::ivec4 C = glm::mod(A, B); + + Error += glm::all(glm::equal(C, glm::ivec4(1))) ? 0 : 1; + } + + return Error; + } +}//namespace mod_ int main() { int Error(0); Error += ::log2_::test(); + Error += ::mod_::test(); # ifdef GLM_TEST_ENABLE_PERF Error += ::log2_::perf();