From 377d5a40469d5ed81b903c76319a9a382c039633 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Sun, 2 Nov 2014 23:48:02 +0100 Subject: [PATCH] Optimized bitCount function --- glm/detail/func_integer.hpp | 13 ++- glm/detail/func_integer.inl | 43 ++++++--- glm/detail/type_int.hpp | 104 ++++++++++++++++++++++ readme.txt | 2 +- test/core/core_func_integer.cpp | 80 ++++++++++++----- test/core/core_func_integer_bit_count.cpp | 15 ++-- 6 files changed, 217 insertions(+), 40 deletions(-) diff --git a/glm/detail/func_integer.hpp b/glm/detail/func_integer.hpp index a3f45c3f..55552149 100644 --- a/glm/detail/func_integer.hpp +++ b/glm/detail/func_integer.hpp @@ -157,7 +157,18 @@ namespace glm /// @see GLSL 4.20.8 specification, section 8.8 Integer Functions template class vecType> GLM_FUNC_DECL vecType bitfieldReverse(vecType const & v); - + + /// Returns the number of bits set to 1 in the binary representation of value. + /// + /// @tparam T Signed or unsigned integer scalar or vector types. + /// + /// @see GLSL bitCount man page + /// @see GLSL 4.20.8 specification, section 8.8 Integer Functions + /// + /// @todo Clarify the declaration to specify that scalars are suported. + template + GLM_FUNC_DECL int bitCount(genType v); + /// Returns the number of bits set to 1 in the binary representation of value. /// /// @tparam T Signed or unsigned integer scalar or vector types. diff --git a/glm/detail/func_integer.inl b/glm/detail/func_integer.inl index 13908e8b..e76b2384 100644 --- a/glm/detail/func_integer.inl +++ b/glm/detail/func_integer.inl @@ -66,6 +66,26 @@ namespace detail return (v & Mask) << Shift | (v & (~Mask)) >> Shift; } }; + + template + struct compute_bitfieldBitCountStep + { + template class vecType> + GLM_FUNC_QUALIFIER static vecType call(vecType const & v, T, T) + { + return v; + } + }; + + template <> + struct compute_bitfieldBitCountStep + { + template class vecType> + GLM_FUNC_QUALIFIER static vecType call(vecType const & v, T Mask, T Shift) + { + return (v & Mask) + ((v >> Shift) & Mask); + } + }; }//namespace detail // uaddCarry @@ -207,21 +227,24 @@ namespace detail } // bitCount - template - GLM_FUNC_QUALIFIER int bitCount(genIUType x) + template + GLM_FUNC_QUALIFIER int bitCount(genType x) { - return bitCount(tvec1(x)).x; + return bitCount(glm::tvec1(x)).x; } - template class vecType> + template class vecType> GLM_FUNC_QUALIFIER vecType bitCount(vecType const & v) { - GLM_STATIC_ASSERT(std::numeric_limits::is_integer, "'bitCount' only accept integer values"); - - vecType Count(0); - for(T i = 0, n = static_cast(sizeof(T) * 8); i < n; ++i) - Count += vecType((v >> i) & static_cast(1)); - return Count; + typedef glm::detail::make_unsigned::type U; + vecType x(*reinterpret_cast const *>(&v)); + x = detail::compute_bitfieldBitCountStep= 2>::call(x, U(0x5555555555555555ull), static_cast( 1)); + x = detail::compute_bitfieldBitCountStep= 4>::call(x, U(0x3333333333333333ull), static_cast( 2)); + x = detail::compute_bitfieldBitCountStep= 8>::call(x, U(0x0F0F0F0F0F0F0F0Full), static_cast( 4)); + x = detail::compute_bitfieldBitCountStep= 16>::call(x, U(0x00FF00FF00FF00FFull), static_cast( 8)); + x = detail::compute_bitfieldBitCountStep= 32>::call(x, U(0x0000FFFF0000FFFFull), static_cast(16)); + x = detail::compute_bitfieldBitCountStep= 64>::call(x, U(0x00000000FFFFFFFFull), static_cast(32)); + return vecType(x); } // findLSB diff --git a/glm/detail/type_int.hpp b/glm/detail/type_int.hpp index fcf99a3f..d217c747 100644 --- a/glm/detail/type_int.hpp +++ b/glm/detail/type_int.hpp @@ -83,6 +83,110 @@ namespace detail typedef unsigned int lowp_uint_t; typedef unsigned int mediump_uint_t; typedef unsigned int highp_uint_t; + + template + struct make_signed + {}; + + template <> + struct make_signed + { + typedef int8 type; + }; + + template <> + struct make_signed + { + typedef int8 type; + }; + + template <> + struct make_signed + { + typedef int16 type; + }; + + template <> + struct make_signed + { + typedef int16 type; + }; + + template <> + struct make_signed + { + typedef int32 type; + }; + + template <> + struct make_signed + { + typedef int32 type; + }; + + template <> + struct make_signed + { + typedef int64 type; + }; + + template <> + struct make_signed + { + typedef int64 type; + }; + + template + struct make_unsigned + {}; + + template <> + struct make_unsigned + { + typedef uint8 type; + }; + + template <> + struct make_unsigned + { + typedef uint8 type; + }; + + template <> + struct make_unsigned + { + typedef uint16 type; + }; + + template <> + struct make_unsigned + { + typedef uint16 type; + }; + + template <> + struct make_unsigned + { + typedef uint32 type; + }; + + template <> + struct make_unsigned + { + typedef uint32 type; + }; + + template <> + struct make_unsigned + { + typedef uint64 type; + }; + + template <> + struct make_unsigned + { + typedef uint64 type; + }; }//namespace detail typedef detail::int8 int8; diff --git a/readme.txt b/readme.txt index d9a90e99..a2eee497 100644 --- a/readme.txt +++ b/readme.txt @@ -80,7 +80,7 @@ GLM 0.9.6.0: 2014-XX-XX - Added GTC_bitfield extension, promoted GTX_bit - Added GTC_integer extension, promoted GTX_bit - Fixed bad matrix-vector multiple performance with Cuda #257, #258 -- Optimized bitfieldReverse function +- Optimized bitfieldReverse and bitCount functions ================================================================================ GLM 0.9.5.4: 2014-06-21 diff --git a/test/core/core_func_integer.cpp b/test/core/core_func_integer.cpp index 8a765c5e..5493c4bf 100644 --- a/test/core/core_func_integer.cpp +++ b/test/core/core_func_integer.cpp @@ -858,7 +858,7 @@ namespace findMSB int Error(0); Error += perf_950(); - Error += perf_ops(); + //Error += perf_ops(); return Error; } @@ -1190,17 +1190,44 @@ namespace bitCount return Count; } - template - inline int bitCount_bits(T v) + template + struct compute_bitfieldBitCountStep { - GLM_STATIC_ASSERT(std::numeric_limits::is_integer, "'bitCount' only accept integer values"); - - int Count(0); - for(T i = 0, n = static_cast(sizeof(T) * 8); i < n; ++i) + template class vecType> + GLM_FUNC_QUALIFIER static vecType call(vecType const & v, T, T) { - Count += static_cast((v >> i) & static_cast(1)); + return v; } - return Count; + }; + + template <> + struct compute_bitfieldBitCountStep + { + template class vecType> + GLM_FUNC_QUALIFIER static vecType call(vecType const & v, T Mask, T Shift) + { + return (v & Mask) + ((v >> Shift) & Mask); + } + }; + + template class vecType> + GLM_FUNC_QUALIFIER vecType bitCount_bitfield(vecType const & v) + { + typedef glm::detail::make_unsigned::type U; + vecType x(*reinterpret_cast const *>(&v)); + x = compute_bitfieldBitCountStep= 2>::call(x, U(0x5555555555555555ull), static_cast( 1)); + x = compute_bitfieldBitCountStep= 4>::call(x, U(0x3333333333333333ull), static_cast( 2)); + x = compute_bitfieldBitCountStep= 8>::call(x, U(0x0F0F0F0F0F0F0F0Full), static_cast( 4)); + x = compute_bitfieldBitCountStep= 16>::call(x, U(0x00FF00FF00FF00FFull), static_cast( 8)); + x = compute_bitfieldBitCountStep= 32>::call(x, U(0x0000FFFF0000FFFFull), static_cast(16)); + x = compute_bitfieldBitCountStep= 64>::call(x, U(0x00000000FFFFFFFFull), static_cast(32)); + return vecType(x); + } + + template + GLM_FUNC_QUALIFIER int bitCount_bitfield(genType x) + { + return bitCount_bitfield(glm::tvec1(x)).x; } int perf() @@ -1249,15 +1276,18 @@ namespace bitCount std::clock_t TimestampsE = std::clock(); - std::clock_t TimeIf = TimestampsB - TimestampsA; - std::clock_t TimeVec = TimestampsC - TimestampsB; - std::clock_t TimeDefault = TimestampsD - TimestampsC; - std::clock_t TimeVec4 = TimestampsE - TimestampsD; + { + for(std::size_t i = 0, n = v.size(); i < n; ++i) + v[i] = bitCount_bitfield(static_cast(i)); + } - std::printf("bitCount - TimeIf %d\n", static_cast(TimeIf)); - std::printf("bitCount - TimeVec %d\n", static_cast(TimeVec)); - std::printf("bitCount - TimeDefault %d\n", static_cast(TimeDefault)); - std::printf("bitCount - TimeVec4 %d\n", static_cast(TimeVec4)); + std::clock_t TimestampsF = std::clock(); + + std::printf("bitCount - TimeIf %d\n", static_cast(TimestampsB - TimestampsA)); + std::printf("bitCount - TimeVec %d\n", static_cast(TimestampsC - TimestampsB)); + std::printf("bitCount - TimeDefault %d\n", static_cast(TimestampsD - TimestampsC)); + std::printf("bitCount - TimeVec4 %d\n", static_cast(TimestampsE - TimestampsD)); + std::printf("bitCount - bitfield %d\n", static_cast(TimestampsF - TimestampsE)); return Error; } @@ -1268,8 +1298,16 @@ namespace bitCount for(std::size_t i = 0, n = sizeof(DataI32) / sizeof(type); i < n; ++i) { - int Result = glm::bitCount(DataI32[i].Value); - Error += DataI32[i].Return == Result ? 0 : 1; + int ResultA = glm::bitCount(DataI32[i].Value); + int ResultB = bitCount_if(DataI32[i].Value); + int ResultC = bitCount_vec(DataI32[i].Value); + int ResultE = bitCount_bitfield(DataI32[i].Value); + + Error += DataI32[i].Return == ResultA ? 0 : 1; + Error += DataI32[i].Return == ResultB ? 0 : 1; + Error += DataI32[i].Return == ResultC ? 0 : 1; + Error += DataI32[i].Return == ResultE ? 0 : 1; + assert(!Error); } @@ -1281,6 +1319,8 @@ int main() { int Error = 0; + Error += ::bitCount::test(); + Error += ::bitCount::perf(); Error += ::bitfieldReverse::test(); Error += ::bitfieldReverse::perf(); Error += ::findMSB::test(); @@ -1292,8 +1332,6 @@ int main() Error += ::usubBorrow::test(); Error += ::bitfieldInsert::test(); Error += ::bitfieldExtract::test(); - Error += ::bitCount::test(); - Error += ::bitCount::perf(); return Error; } diff --git a/test/core/core_func_integer_bit_count.cpp b/test/core/core_func_integer_bit_count.cpp index 370af34e..cc21b275 100644 --- a/test/core/core_func_integer_bit_count.cpp +++ b/test/core/core_func_integer_bit_count.cpp @@ -10,13 +10,14 @@ unsigned rotatel(unsigned x, int n) { return (x << n) | (x >> (32 - n)); } -int pop0(unsigned x) { - x = (x & 0x55555555) + ((x >> 1) & 0x55555555); - x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F); - x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF); - x = (x & 0x0000FFFF) + ((x >>16) & 0x0000FFFF); - return x; +int pop0(unsigned x) +{ + x = (x & 0x55555555) + ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F); + x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF); + x = (x & 0x0000FFFF) + ((x >>16) & 0x0000FFFF); + return x; } int pop1(unsigned x) {