diff --git a/glm/detail/func_integer.hpp b/glm/detail/func_integer.hpp
index a3f45c3f..55552149 100644
--- a/glm/detail/func_integer.hpp
+++ b/glm/detail/func_integer.hpp
@@ -157,7 +157,18 @@ namespace glm
/// @see GLSL 4.20.8 specification, section 8.8 Integer Functions
template class vecType>
GLM_FUNC_DECL vecType bitfieldReverse(vecType const & v);
-
+
+ /// Returns the number of bits set to 1 in the binary representation of value.
+ ///
+ /// @tparam T Signed or unsigned integer scalar or vector types.
+ ///
+ /// @see GLSL bitCount man page
+ /// @see GLSL 4.20.8 specification, section 8.8 Integer Functions
+ ///
+ /// @todo Clarify the declaration to specify that scalars are suported.
+ template
+ GLM_FUNC_DECL int bitCount(genType v);
+
/// Returns the number of bits set to 1 in the binary representation of value.
///
/// @tparam T Signed or unsigned integer scalar or vector types.
diff --git a/glm/detail/func_integer.inl b/glm/detail/func_integer.inl
index 13908e8b..e76b2384 100644
--- a/glm/detail/func_integer.inl
+++ b/glm/detail/func_integer.inl
@@ -66,6 +66,26 @@ namespace detail
return (v & Mask) << Shift | (v & (~Mask)) >> Shift;
}
};
+
+ template
+ struct compute_bitfieldBitCountStep
+ {
+ template class vecType>
+ GLM_FUNC_QUALIFIER static vecType call(vecType const & v, T, T)
+ {
+ return v;
+ }
+ };
+
+ template <>
+ struct compute_bitfieldBitCountStep
+ {
+ template class vecType>
+ GLM_FUNC_QUALIFIER static vecType call(vecType const & v, T Mask, T Shift)
+ {
+ return (v & Mask) + ((v >> Shift) & Mask);
+ }
+ };
}//namespace detail
// uaddCarry
@@ -207,21 +227,24 @@ namespace detail
}
// bitCount
- template
- GLM_FUNC_QUALIFIER int bitCount(genIUType x)
+ template
+ GLM_FUNC_QUALIFIER int bitCount(genType x)
{
- return bitCount(tvec1(x)).x;
+ return bitCount(glm::tvec1(x)).x;
}
- template class vecType>
+ template class vecType>
GLM_FUNC_QUALIFIER vecType bitCount(vecType const & v)
{
- GLM_STATIC_ASSERT(std::numeric_limits::is_integer, "'bitCount' only accept integer values");
-
- vecType Count(0);
- for(T i = 0, n = static_cast(sizeof(T) * 8); i < n; ++i)
- Count += vecType((v >> i) & static_cast(1));
- return Count;
+ typedef glm::detail::make_unsigned::type U;
+ vecType x(*reinterpret_cast const *>(&v));
+ x = detail::compute_bitfieldBitCountStep= 2>::call(x, U(0x5555555555555555ull), static_cast( 1));
+ x = detail::compute_bitfieldBitCountStep= 4>::call(x, U(0x3333333333333333ull), static_cast( 2));
+ x = detail::compute_bitfieldBitCountStep= 8>::call(x, U(0x0F0F0F0F0F0F0F0Full), static_cast( 4));
+ x = detail::compute_bitfieldBitCountStep= 16>::call(x, U(0x00FF00FF00FF00FFull), static_cast( 8));
+ x = detail::compute_bitfieldBitCountStep= 32>::call(x, U(0x0000FFFF0000FFFFull), static_cast(16));
+ x = detail::compute_bitfieldBitCountStep= 64>::call(x, U(0x00000000FFFFFFFFull), static_cast(32));
+ return vecType(x);
}
// findLSB
diff --git a/glm/detail/type_int.hpp b/glm/detail/type_int.hpp
index fcf99a3f..d217c747 100644
--- a/glm/detail/type_int.hpp
+++ b/glm/detail/type_int.hpp
@@ -83,6 +83,110 @@ namespace detail
typedef unsigned int lowp_uint_t;
typedef unsigned int mediump_uint_t;
typedef unsigned int highp_uint_t;
+
+ template
+ struct make_signed
+ {};
+
+ template <>
+ struct make_signed
+ {
+ typedef int8 type;
+ };
+
+ template <>
+ struct make_signed
+ {
+ typedef int8 type;
+ };
+
+ template <>
+ struct make_signed
+ {
+ typedef int16 type;
+ };
+
+ template <>
+ struct make_signed
+ {
+ typedef int16 type;
+ };
+
+ template <>
+ struct make_signed
+ {
+ typedef int32 type;
+ };
+
+ template <>
+ struct make_signed
+ {
+ typedef int32 type;
+ };
+
+ template <>
+ struct make_signed
+ {
+ typedef int64 type;
+ };
+
+ template <>
+ struct make_signed
+ {
+ typedef int64 type;
+ };
+
+ template
+ struct make_unsigned
+ {};
+
+ template <>
+ struct make_unsigned
+ {
+ typedef uint8 type;
+ };
+
+ template <>
+ struct make_unsigned
+ {
+ typedef uint8 type;
+ };
+
+ template <>
+ struct make_unsigned
+ {
+ typedef uint16 type;
+ };
+
+ template <>
+ struct make_unsigned
+ {
+ typedef uint16 type;
+ };
+
+ template <>
+ struct make_unsigned
+ {
+ typedef uint32 type;
+ };
+
+ template <>
+ struct make_unsigned
+ {
+ typedef uint32 type;
+ };
+
+ template <>
+ struct make_unsigned
+ {
+ typedef uint64 type;
+ };
+
+ template <>
+ struct make_unsigned
+ {
+ typedef uint64 type;
+ };
}//namespace detail
typedef detail::int8 int8;
diff --git a/readme.txt b/readme.txt
index d9a90e99..a2eee497 100644
--- a/readme.txt
+++ b/readme.txt
@@ -80,7 +80,7 @@ GLM 0.9.6.0: 2014-XX-XX
- Added GTC_bitfield extension, promoted GTX_bit
- Added GTC_integer extension, promoted GTX_bit
- Fixed bad matrix-vector multiple performance with Cuda #257, #258
-- Optimized bitfieldReverse function
+- Optimized bitfieldReverse and bitCount functions
================================================================================
GLM 0.9.5.4: 2014-06-21
diff --git a/test/core/core_func_integer.cpp b/test/core/core_func_integer.cpp
index 8a765c5e..5493c4bf 100644
--- a/test/core/core_func_integer.cpp
+++ b/test/core/core_func_integer.cpp
@@ -858,7 +858,7 @@ namespace findMSB
int Error(0);
Error += perf_950();
- Error += perf_ops();
+ //Error += perf_ops();
return Error;
}
@@ -1190,17 +1190,44 @@ namespace bitCount
return Count;
}
- template
- inline int bitCount_bits(T v)
+ template
+ struct compute_bitfieldBitCountStep
{
- GLM_STATIC_ASSERT(std::numeric_limits::is_integer, "'bitCount' only accept integer values");
-
- int Count(0);
- for(T i = 0, n = static_cast(sizeof(T) * 8); i < n; ++i)
+ template class vecType>
+ GLM_FUNC_QUALIFIER static vecType call(vecType const & v, T, T)
{
- Count += static_cast((v >> i) & static_cast(1));
+ return v;
}
- return Count;
+ };
+
+ template <>
+ struct compute_bitfieldBitCountStep
+ {
+ template class vecType>
+ GLM_FUNC_QUALIFIER static vecType call(vecType const & v, T Mask, T Shift)
+ {
+ return (v & Mask) + ((v >> Shift) & Mask);
+ }
+ };
+
+ template class vecType>
+ GLM_FUNC_QUALIFIER vecType bitCount_bitfield(vecType const & v)
+ {
+ typedef glm::detail::make_unsigned::type U;
+ vecType x(*reinterpret_cast const *>(&v));
+ x = compute_bitfieldBitCountStep= 2>::call(x, U(0x5555555555555555ull), static_cast( 1));
+ x = compute_bitfieldBitCountStep= 4>::call(x, U(0x3333333333333333ull), static_cast( 2));
+ x = compute_bitfieldBitCountStep= 8>::call(x, U(0x0F0F0F0F0F0F0F0Full), static_cast( 4));
+ x = compute_bitfieldBitCountStep= 16>::call(x, U(0x00FF00FF00FF00FFull), static_cast( 8));
+ x = compute_bitfieldBitCountStep= 32>::call(x, U(0x0000FFFF0000FFFFull), static_cast(16));
+ x = compute_bitfieldBitCountStep= 64>::call(x, U(0x00000000FFFFFFFFull), static_cast(32));
+ return vecType(x);
+ }
+
+ template
+ GLM_FUNC_QUALIFIER int bitCount_bitfield(genType x)
+ {
+ return bitCount_bitfield(glm::tvec1(x)).x;
}
int perf()
@@ -1249,15 +1276,18 @@ namespace bitCount
std::clock_t TimestampsE = std::clock();
- std::clock_t TimeIf = TimestampsB - TimestampsA;
- std::clock_t TimeVec = TimestampsC - TimestampsB;
- std::clock_t TimeDefault = TimestampsD - TimestampsC;
- std::clock_t TimeVec4 = TimestampsE - TimestampsD;
+ {
+ for(std::size_t i = 0, n = v.size(); i < n; ++i)
+ v[i] = bitCount_bitfield(static_cast(i));
+ }
- std::printf("bitCount - TimeIf %d\n", static_cast(TimeIf));
- std::printf("bitCount - TimeVec %d\n", static_cast(TimeVec));
- std::printf("bitCount - TimeDefault %d\n", static_cast(TimeDefault));
- std::printf("bitCount - TimeVec4 %d\n", static_cast(TimeVec4));
+ std::clock_t TimestampsF = std::clock();
+
+ std::printf("bitCount - TimeIf %d\n", static_cast(TimestampsB - TimestampsA));
+ std::printf("bitCount - TimeVec %d\n", static_cast(TimestampsC - TimestampsB));
+ std::printf("bitCount - TimeDefault %d\n", static_cast(TimestampsD - TimestampsC));
+ std::printf("bitCount - TimeVec4 %d\n", static_cast(TimestampsE - TimestampsD));
+ std::printf("bitCount - bitfield %d\n", static_cast(TimestampsF - TimestampsE));
return Error;
}
@@ -1268,8 +1298,16 @@ namespace bitCount
for(std::size_t i = 0, n = sizeof(DataI32) / sizeof(type); i < n; ++i)
{
- int Result = glm::bitCount(DataI32[i].Value);
- Error += DataI32[i].Return == Result ? 0 : 1;
+ int ResultA = glm::bitCount(DataI32[i].Value);
+ int ResultB = bitCount_if(DataI32[i].Value);
+ int ResultC = bitCount_vec(DataI32[i].Value);
+ int ResultE = bitCount_bitfield(DataI32[i].Value);
+
+ Error += DataI32[i].Return == ResultA ? 0 : 1;
+ Error += DataI32[i].Return == ResultB ? 0 : 1;
+ Error += DataI32[i].Return == ResultC ? 0 : 1;
+ Error += DataI32[i].Return == ResultE ? 0 : 1;
+
assert(!Error);
}
@@ -1281,6 +1319,8 @@ int main()
{
int Error = 0;
+ Error += ::bitCount::test();
+ Error += ::bitCount::perf();
Error += ::bitfieldReverse::test();
Error += ::bitfieldReverse::perf();
Error += ::findMSB::test();
@@ -1292,8 +1332,6 @@ int main()
Error += ::usubBorrow::test();
Error += ::bitfieldInsert::test();
Error += ::bitfieldExtract::test();
- Error += ::bitCount::test();
- Error += ::bitCount::perf();
return Error;
}
diff --git a/test/core/core_func_integer_bit_count.cpp b/test/core/core_func_integer_bit_count.cpp
index 370af34e..cc21b275 100644
--- a/test/core/core_func_integer_bit_count.cpp
+++ b/test/core/core_func_integer_bit_count.cpp
@@ -10,13 +10,14 @@ unsigned rotatel(unsigned x, int n) {
return (x << n) | (x >> (32 - n));
}
-int pop0(unsigned x) {
- x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
- x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
- x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F);
- x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF);
- x = (x & 0x0000FFFF) + ((x >>16) & 0x0000FFFF);
- return x;
+int pop0(unsigned x)
+{
+ x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
+ x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F);
+ x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF);
+ x = (x & 0x0000FFFF) + ((x >>16) & 0x0000FFFF);
+ return x;
}
int pop1(unsigned x) {