From bd7125c50bc5c6d487839ac3a52a41218d3f2df6 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Sun, 10 Feb 2013 19:25:11 +0100 Subject: [PATCH] Added bit interleave for 3 and 4 integers --- glm/core/intrinsic_integer.inl | 115 +++++++++++++++++++++++++++++++++ glm/gtx/bit.inl | 64 ++++++++++++++++++ test/gtc/gtc_random.cpp | 43 ++++++++++++ test/gtx/gtx_bit.cpp | 33 +++++++++- 4 files changed, 252 insertions(+), 3 deletions(-) diff --git a/glm/core/intrinsic_integer.inl b/glm/core/intrinsic_integer.inl index 047fe9af..6ada7432 100644 --- a/glm/core/intrinsic_integer.inl +++ b/glm/core/intrinsic_integer.inl @@ -136,5 +136,120 @@ namespace detail return Reg1; } + inline __m128i _mm_bit_interleave3_si128(__m128i x) + { + __m128i const Mask4 = _mm_set1_epi32(0xFFFF00000000FFFF); + __m128i const Mask3 = _mm_set1_epi32(0x00FF0000FF0000FF); + __m128i const Mask2 = _mm_set1_epi32(0xF00F00F00F00F00F); + __m128i const Mask1 = _mm_set1_epi32(0x30C30C30C30C30C3); + __m128i const Mask0 = _mm_set1_epi32(0x9249249249249249); + + __m128i Reg1; + __m128i Reg2; + + // REG1 = x; + // REG2 = y; + Reg1 = _mm_unpacklo_epi64(x, y); + + //REG1 = ((REG1 << 32) | REG1) & glm::uint64(0xFFFF00000000FFFF); + //REG2 = ((REG2 << 32) | REG2) & glm::uint64(0xFFFF00000000FFFF); + //REG3 = ((REG3 << 32) | REG3) & glm::uint64(0xFFFF00000000FFFF); + Reg2 = _mm_slli_si128(Reg1, 4); + Reg1 = _mm_or_si128(Reg2, Reg1); + Reg1 = _mm_and_si128(Reg1, Mask4); + + //REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x00FF0000FF0000FF); + //REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x00FF0000FF0000FF); + //REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x00FF0000FF0000FF); + Reg2 = _mm_slli_si128(Reg1, 2); + Reg1 = _mm_or_si128(Reg2, Reg1); + Reg1 = _mm_and_si128(Reg1, Mask3); + + //REG1 = ((REG1 << 8) | REG1) & glm::uint64(0xF00F00F00F00F00F); + //REG2 = ((REG2 << 8) | REG2) & glm::uint64(0xF00F00F00F00F00F); + //REG3 = ((REG3 << 8) | REG3) & glm::uint64(0xF00F00F00F00F00F); + Reg2 = _mm_slli_si128(Reg1, 1); + Reg1 = _mm_or_si128(Reg2, Reg1); + Reg1 = _mm_and_si128(Reg1, Mask2); + + //REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x30C30C30C30C30C3); + //REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x30C30C30C30C30C3); + //REG3 = ((REG3 << 4) | REG3) & glm::uint64(0x30C30C30C30C30C3); + Reg2 = _mm_slli_epi32(Reg1, 4); + Reg1 = _mm_or_si128(Reg2, Reg1); + Reg1 = _mm_and_si128(Reg1, Mask1); + + //REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x9249249249249249); + //REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x9249249249249249); + //REG3 = ((REG3 << 2) | REG3) & glm::uint64(0x9249249249249249); + Reg2 = _mm_slli_epi32(Reg1, 2); + Reg1 = _mm_or_si128(Reg2, Reg1); + Reg1 = _mm_and_si128(Reg1, Mask0); + + //return REG1 | (REG2 << 1) | (REG3 << 2); + Reg2 = _mm_slli_epi32(Reg1, 1); + Reg2 = _mm_srli_si128(Reg2, 8); + Reg1 = _mm_or_si128(Reg1, Reg2); + + return Reg1; + } + + inline __m128i _mm_bit_interleave4_si128(__m128i x) + { + __m128i const Mask4 = _mm_set1_epi32(0xFFFF00000000FFFF); + __m128i const Mask3 = _mm_set1_epi32(0x00FF0000FF0000FF); + __m128i const Mask2 = _mm_set1_epi32(0xF00F00F00F00F00F); + __m128i const Mask1 = _mm_set1_epi32(0x30C30C30C30C30C3); + __m128i const Mask0 = _mm_set1_epi32(0x9249249249249249); + + __m128i Reg1; + __m128i Reg2; + + // REG1 = x; + // REG2 = y; + Reg1 = _mm_unpacklo_epi64(x, y); + + //REG1 = ((REG1 << 32) | REG1) & glm::uint64(0xFFFF00000000FFFF); + //REG2 = ((REG2 << 32) | REG2) & glm::uint64(0xFFFF00000000FFFF); + //REG3 = ((REG3 << 32) | REG3) & glm::uint64(0xFFFF00000000FFFF); + Reg2 = _mm_slli_si128(Reg1, 4); + Reg1 = _mm_or_si128(Reg2, Reg1); + Reg1 = _mm_and_si128(Reg1, Mask4); + + //REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x00FF0000FF0000FF); + //REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x00FF0000FF0000FF); + //REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x00FF0000FF0000FF); + Reg2 = _mm_slli_si128(Reg1, 2); + Reg1 = _mm_or_si128(Reg2, Reg1); + Reg1 = _mm_and_si128(Reg1, Mask3); + + //REG1 = ((REG1 << 8) | REG1) & glm::uint64(0xF00F00F00F00F00F); + //REG2 = ((REG2 << 8) | REG2) & glm::uint64(0xF00F00F00F00F00F); + //REG3 = ((REG3 << 8) | REG3) & glm::uint64(0xF00F00F00F00F00F); + Reg2 = _mm_slli_si128(Reg1, 1); + Reg1 = _mm_or_si128(Reg2, Reg1); + Reg1 = _mm_and_si128(Reg1, Mask2); + + //REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x30C30C30C30C30C3); + //REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x30C30C30C30C30C3); + //REG3 = ((REG3 << 4) | REG3) & glm::uint64(0x30C30C30C30C30C3); + Reg2 = _mm_slli_epi32(Reg1, 4); + Reg1 = _mm_or_si128(Reg2, Reg1); + Reg1 = _mm_and_si128(Reg1, Mask1); + + //REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x9249249249249249); + //REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x9249249249249249); + //REG3 = ((REG3 << 2) | REG3) & glm::uint64(0x9249249249249249); + Reg2 = _mm_slli_epi32(Reg1, 2); + Reg1 = _mm_or_si128(Reg2, Reg1); + Reg1 = _mm_and_si128(Reg1, Mask0); + + //return REG1 | (REG2 << 1) | (REG3 << 2); + Reg2 = _mm_slli_epi32(Reg1, 1); + Reg2 = _mm_srli_si128(Reg2, 8); + Reg1 = _mm_or_si128(Reg1, Reg2); + + return Reg1; + } }//namespace detail }//namespace glms diff --git a/glm/gtx/bit.inl b/glm/gtx/bit.inl index 825a20a8..03adc063 100644 --- a/glm/gtx/bit.inl +++ b/glm/gtx/bit.inl @@ -671,6 +671,70 @@ namespace glm return REG1 | (REG2 << 1); } + + inline glm::uint64 bitfieldInterleave(glm::uint32 x, glm::uint32 y, glm::uint32 z) + { + glm::uint64 REG1(x); + glm::uint64 REG2(y); + glm::uint64 REG3(z); + + REG1 = ((REG1 << 32) | REG1) & glm::uint64(0xFFFF00000000FFFF); + REG2 = ((REG2 << 32) | REG2) & glm::uint64(0xFFFF00000000FFFF); + REG3 = ((REG3 << 32) | REG3) & glm::uint64(0xFFFF00000000FFFF); + + REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x00FF0000FF0000FF); + REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x00FF0000FF0000FF); + REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x00FF0000FF0000FF); + + REG1 = ((REG1 << 8) | REG1) & glm::uint64(0xF00F00F00F00F00F); + REG2 = ((REG2 << 8) | REG2) & glm::uint64(0xF00F00F00F00F00F); + REG3 = ((REG3 << 8) | REG3) & glm::uint64(0xF00F00F00F00F00F); + + REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x30C30C30C30C30C3); + REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x30C30C30C30C30C3); + REG3 = ((REG3 << 4) | REG3) & glm::uint64(0x30C30C30C30C30C3); + + REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x9249249249249249); + REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x9249249249249249); + REG3 = ((REG3 << 2) | REG3) & glm::uint64(0x9249249249249249); + + return REG1 | (REG2 << 1) | (REG3 << 2); + } + + inline glm::uint64 bitfieldInterleave(glm::uint16 x, glm::uint16 y, glm::uint16 z, glm::uint16 w) + { + glm::uint64 REG1(x); + glm::uint64 REG2(y); + glm::uint64 REG3(z); + glm::uint64 REG4(w); +/* + REG1 = ((REG1 << 64) | REG1) & glm::uint64(0x000000000000FFFF); + REG2 = ((REG2 << 64) | REG2) & glm::uint64(0x000000000000FFFF); + REG3 = ((REG3 << 64) | REG3) & glm::uint64(0x000000000000FFFF); + REG4 = ((REG4 << 64) | REG4) & glm::uint64(0x000000000000FFFF); +*/ + REG1 = ((REG1 << 32) | REG1) & glm::uint64(0x000000FF000000FF); + REG2 = ((REG2 << 32) | REG2) & glm::uint64(0x000000FF000000FF); + REG3 = ((REG3 << 32) | REG3) & glm::uint64(0x000000FF000000FF); + REG4 = ((REG4 << 32) | REG4) & glm::uint64(0x000000FF000000FF); + + REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x000F000F000F000F); + REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x000F000F000F000F); + REG3 = ((REG3 << 16) | REG3) & glm::uint64(0x000F000F000F000F); + REG4 = ((REG4 << 16) | REG4) & glm::uint64(0x000F000F000F000F); + + REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x0303030303030303); + REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x0303030303030303); + REG3 = ((REG3 << 8) | REG3) & glm::uint64(0x0303030303030303); + REG4 = ((REG4 << 8) | REG4) & glm::uint64(0x0303030303030303); + + REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x1111111111111111); + REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x1111111111111111); + REG3 = ((REG3 << 4) | REG3) & glm::uint64(0x1111111111111111); + REG4 = ((REG4 << 4) | REG4) & glm::uint64(0x1111111111111111); + + return REG1 | (REG2 << 1) | (REG3 << 2) | (REG4 << 3); + } }//namespace detail inline int16 bitfieldInterleave(int8 x, int8 y) diff --git a/test/gtc/gtc_random.cpp b/test/gtc/gtc_random.cpp index d43ffbf8..b7772098 100644 --- a/test/gtc/gtc_random.cpp +++ b/test/gtc/gtc_random.cpp @@ -11,6 +11,9 @@ #include #include #include +#if(GLM_LANG & GLM_LANG_CXX0X) +# include +#endif int test_linearRand() { @@ -136,6 +139,46 @@ int test_ballRand() return Error; } +#if(GLM_LANG & GLM_LANG_CXX0X) +int test_grid() +{ + int Error = 0; + + typedef std::array colors; + typedef std::array grid; + + grid Grid; + colors Colors; + + grid GridBest; + colors ColorsBest; + + while(true) + { + for(std::size_t i = 0; i < Grid.size(); ++i) + Grid[i] = int(glm::linearRand(0.0, 8.0 * 8.0 * 8.0 - 1.0) / 64.0); + + for(std::size_t i = 0; i < Grid.size(); ++i) + ++Colors[Grid[i]]; + + bool Exit = true; + for(std::size_t i = 0; i < Colors.size(); ++i) + { + if(Colors[i] == 8) + continue; + + Exit = false; + break; + } + + if(Exit == true) + break; + } + + return Error; +} +#endif + int main() { int Error = 0; diff --git a/test/gtx/gtx_bit.cpp b/test/gtx/gtx_bit.cpp index 205ec704..5c3912a4 100644 --- a/test/gtx/gtx_bit.cpp +++ b/test/gtx/gtx_bit.cpp @@ -7,6 +7,8 @@ // File : test/gtx/bit.cpp /////////////////////////////////////////////////////////////////////////////////////////////////// +#include + #include #include #include @@ -19,8 +21,6 @@ #include #include -#include - enum result { SUCCESS, @@ -479,6 +479,17 @@ namespace bitfieldInterleave std::cout << "sseUnalignedBitfieldInterleave Time " << Time << " clocks" << std::endl; } + { + std::clock_t LastTime = std::clock(); + + for(std::size_t i = 0; i < Data.size(); ++i) + Data[i] = glm::detail::bitfieldInterleave(Param[i].x, Param[i].y, Param[i].x); + + std::clock_t Time = std::clock() - LastTime; + + std::cout << "glm::detail::bitfieldInterleave Time " << Time << " clocks" << std::endl; + } + # if(GLM_ARCH != GLM_ARCH_PURE) { // SIMD @@ -505,12 +516,28 @@ namespace bitfieldInterleave } } +namespace bitfieldInterleave3 +{ + int test() + { + int Error(0); + + glm::uint64 Result = glm::detail::bitfieldInterleave(0xFFFFFFFF, 0x00000000, 0x00000000); + + return Error; + } +} + int main() { - int Error = 0; + int Error(0); + + Error += ::bitfieldInterleave3::test(); Error += ::bitfieldInterleave::test(); Error += ::extractField::test(); Error += ::bitRevert::test(); + while(true); + return Error; }