From c038e80e9f908b56eb6baf6833e56ad29790fc40 Mon Sep 17 00:00:00 2001 From: Christophe Riccio Date: Mon, 9 Sep 2013 15:16:14 +0200 Subject: [PATCH] Optimized GTC packing functions --- glm/core/func_packing.inl | 14 +- glm/gtc/packing.inl | 329 +++++++++++++------------------------- 2 files changed, 117 insertions(+), 226 deletions(-) diff --git a/glm/core/func_packing.inl b/glm/core/func_packing.inl index 394ce450..4193f5f3 100644 --- a/glm/core/func_packing.inl +++ b/glm/core/func_packing.inl @@ -49,30 +49,32 @@ namespace glm GLM_FUNC_QUALIFIER vec2 unpackSnorm2x16(uint32 const & p) { vec2 Unpack(*reinterpret_cast(const_cast(&p))); - return clamp(Unpack * 1.0f / 32767.0f, -1.0f, 1.0f); + return clamp( + Unpack * 3.0518509475997192297128208258309e-5f, //1.0f / 32767.0f, + -1.0f, 1.0f); } GLM_FUNC_QUALIFIER uint32 packUnorm4x8(vec4 const & v) { - glm::u8vec4 Topack(round(clamp(v, 0.0f, 1.0f) * 255.0f)); + u8vec4 Topack(round(clamp(v, 0.0f, 1.0f) * 255.0f)); return *reinterpret_cast(&Topack); } GLM_FUNC_QUALIFIER vec4 unpackUnorm4x8(uint32 const & p) { - glm::vec4 Unpack(*reinterpret_cast(const_cast(&p))); - return Unpack * 0.003921568627451f; // 1 / 255 + vec4 Unpack(*reinterpret_cast(const_cast(&p))); + return Unpack * float(0.0039215686274509803921568627451); // 1 / 255 } GLM_FUNC_QUALIFIER uint32 packSnorm4x8(vec4 const & v) { - glm::i8vec4 Topack(round(clamp(v ,-1.0f, 1.0f) * 127.0f)); + i8vec4 Topack(round(clamp(v ,-1.0f, 1.0f) * 127.0f)); return *reinterpret_cast(&Topack); } GLM_FUNC_QUALIFIER glm::vec4 unpackSnorm4x8(uint32 const & p) { - glm::vec4 Unpack(*reinterpret_cast(const_cast(&p))); + vec4 Unpack(*reinterpret_cast(const_cast(&p))); return clamp( Unpack * 0.0078740157480315f, // 1.0f / 127.0f -1.0f, 1.0f); diff --git a/glm/gtc/packing.inl b/glm/gtc/packing.inl index 6cacf1ec..d7b98391 100644 --- a/glm/gtc/packing.inl +++ b/glm/gtc/packing.inl @@ -42,18 +42,18 @@ namespace detail // 0x7f800000 => 01111111 10000000 00000000 00000000 // 0x00008000 => 00000000 00000000 10000000 00000000 return - ((f >> 16) & 0x8000) | // sign - ((((f & 0x7f800000) - 0x38000000) >> 13) & 0x7c00) | // exponential - ((f >> 13) & 0x03ff); // Mantissa + ((f >> 16) & 0x8000) | // sign + ((((f & 0x7f800000) - 0x38000000) >> 13) & 0x7c00) | // exponential + ((f >> 13) & 0x03ff); // Mantissa } - + glm::uint32 float2packed11(glm::uint32 const & f) { // 10 bits => EE EEEFFFFF // 11 bits => EEE EEFFFFFF // Half bits => SEEEEEFF FFFFFFFF // Float bits => SEEEEEEE EFFFFFFF FFFFFFFF FFFFFFFF - + // 0x000007c0 => 00000000 00000000 00000111 11000000 // 0x00007c00 => 00000000 00000000 01111100 00000000 // 0x000003ff => 00000000 00000000 00000011 11111111 @@ -61,17 +61,17 @@ namespace detail // 0x7f800000 => 01111111 10000000 00000000 00000000 // 0x00008000 => 00000000 00000000 10000000 00000000 return - ((((f & 0x7f800000) - 0x38000000) >> 17) & 0x07c0) | // exponential - ((f >> 17) & 0x003f); // Mantissa + ((((f & 0x7f800000) - 0x38000000) >> 17) & 0x07c0) | // exponential + ((f >> 17) & 0x003f); // Mantissa } - - glm::uint32 packed11ToFloat(glm::uint32 const & p) + + glm::uint32 packed11ToFloat(glm::uint32 const & p) { // 10 bits => EE EEEFFFFF // 11 bits => EEE EEFFFFFF // Half bits => SEEEEEFF FFFFFFFF // Float bits => SEEEEEEE EFFFFFFF FFFFFFFF FFFFFFFF - + // 0x000007c0 => 00000000 00000000 00000111 11000000 // 0x00007c00 => 00000000 00000000 01111100 00000000 // 0x000003ff => 00000000 00000000 00000011 11111111 @@ -79,8 +79,8 @@ namespace detail // 0x7f800000 => 01111111 10000000 00000000 00000000 // 0x00008000 => 00000000 00000000 10000000 00000000 return - ((((p & 0x07c0) << 17) + 0x38000000) & 0x7f800000) | // exponential - ((p & 0x003f) << 17); // Mantissa + ((((p & 0x07c0) << 17) + 0x38000000) & 0x7f800000) | // exponential + ((p & 0x003f) << 17); // Mantissa } glm::uint32 float2packed10(glm::uint32 const & f) @@ -89,7 +89,7 @@ namespace detail // 11 bits => EEE EEFFFFFF // Half bits => SEEEEEFF FFFFFFFF // Float bits => SEEEEEEE EFFFFFFF FFFFFFFF FFFFFFFF - + // 0x0000001F => 00000000 00000000 00000000 00011111 // 0x0000003F => 00000000 00000000 00000000 00111111 // 0x000003E0 => 00000000 00000000 00000011 11100000 @@ -100,17 +100,17 @@ namespace detail // 0x7f800000 => 01111111 10000000 00000000 00000000 // 0x00008000 => 00000000 00000000 10000000 00000000 return - ((((f & 0x7f800000) - 0x38000000) >> 18) & 0x03E0) | // exponential - ((f >> 18) & 0x001f); // Mantissa + ((((f & 0x7f800000) - 0x38000000) >> 18) & 0x03E0) | // exponential + ((f >> 18) & 0x001f); // Mantissa } - + glm::uint32 packed10ToFloat(glm::uint32 const & p) { // 10 bits => EE EEEFFFFF // 11 bits => EEE EEFFFFFF // Half bits => SEEEEEFF FFFFFFFF // Float bits => SEEEEEEE EFFFFFFF FFFFFFFF FFFFFFFF - + // 0x0000001F => 00000000 00000000 00000000 00011111 // 0x0000003F => 00000000 00000000 00000000 00111111 // 0x000003E0 => 00000000 00000000 00000011 11100000 @@ -121,21 +121,21 @@ namespace detail // 0x7f800000 => 01111111 10000000 00000000 00000000 // 0x00008000 => 00000000 00000000 10000000 00000000 return - ((((p & 0x03E0) << 18) + 0x38000000) & 0x7f800000) | // exponential - ((p & 0x001f) << 18); // Mantissa + ((((p & 0x03E0) << 18) + 0x38000000) & 0x7f800000) | // exponential + ((p & 0x001f) << 18); // Mantissa } glm::uint half2float(glm::uint const & h) { return ((h & 0x8000) << 16) | ((( h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13); } - - union uif - { - glm::uint i; - float f; - }; - + + union uif + { + glm::uint i; + float f; + }; + glm::uint floatTo11bit(float x) { if(x == 0.0f) @@ -144,12 +144,12 @@ namespace detail return ~0; else if(glm::isinf(x)) return 0x1f << 6; - + uif Union; Union.f = x; return float2packed11(Union.i); } - + float packed11bitToFloat(glm::uint x) { if(x == 0) @@ -158,7 +158,7 @@ namespace detail return ~0;//NaN else if(x == (0x1f << 6)) return ~0;//Inf - + uif Union; Union.i = packed11ToFloat(x); return Union.f; @@ -172,7 +172,7 @@ namespace detail return ~0; else if(glm::isinf(x)) return 0x1f << 5; - + uif Union; Union.f = x; return float2packed10(Union.i); @@ -186,12 +186,12 @@ namespace detail return ~0;//NaN else if(x == (0x1f << 5)) return ~0;//Inf - + uif Union; Union.i = packed10ToFloat(x); return Union.f; } - + glm::uint f11_f11_f10(float x, float y, float z) { return ((floatTo11bit(x) & ((1 << 11) - 1)) << 0) | ((floatTo11bit(y) & ((1 << 11) - 1)) << 11) | ((floatTo10bit(z) & ((1 << 10) - 1)) << 22); @@ -221,253 +221,142 @@ namespace detail uint32 pack; }; - union unorm4x16 - { - struct - { - uint16 x; - uint16 y; - uint16 z; - uint16 w; - } data; - uint64 pack; - }; - - union snorm4x16 - { - struct - { - int16 x; - int16 y; - int16 z; - int16 w; - } data; - uint64 pack; - }; - - union snorm1x16 - { - int16 data; - uint16 pack; - }; - - union half1x16 - { - hdata data; - uint16 pack; - }; - - union half4x16 - { - struct - { - hdata x; - hdata y; - hdata z; - hdata w; - } data; - uint64 pack; - }; - - union unorm1x8 - { - uint8 data; - uint8 pack; - }; - - union unorm2x8 - { - struct - { - uint8 x; - uint8 y; - } data; - uint16 pack; - }; - - union snorm1x8 - { - int8 data; - uint8 pack; - }; - - union snorm2x8 - { - struct - { - int8 x; - int8 y; - } data; - uint16 pack; - }; }//namespace detail GLM_FUNC_QUALIFIER uint8 packUnorm1x8(float const & v) { - int8 Scaled(int8(round(clamp(v, -1.0f, 1.0f) * 255.0f))); - detail::unorm1x8 Packing; - Packing.data = Scaled; - return Packing.pack; + return static_cast(round(clamp(v, 0.0f, 1.0f) * 255.0f)); } - GLM_FUNC_QUALIFIER float unpackUnorm1x8(uint8 p) + GLM_FUNC_QUALIFIER float unpackUnorm1x8(uint8 const & p) { - detail::unorm1x8 Packing; - Packing.pack = p; - float Unpacked(Packing.data); - return Unpacked * float(0.0039215686274509803921568627451); + float Unpack(static_cast(*const_cast(&p))); + return Unpack * float(0.0039215686274509803921568627451); // 1 / 255 } GLM_FUNC_QUALIFIER uint16 packUnorm2x8(vec2 const & v) { - i8vec2 Scaled(round(clamp(v ,-1.0f, 1.0f) * 255.0f)); - detail::unorm2x8 Packing; - Packing.data.x = Scaled.x; - Packing.data.y = Scaled.y; - return Packing.pack; + u8vec2 Topack(round(clamp(v, 0.0f, 1.0f) * 255.0f)); + return *reinterpret_cast(&Topack); } - GLM_FUNC_QUALIFIER vec2 unpackUnorm2x8(uint16 p) + GLM_FUNC_QUALIFIER vec2 unpackUnorm2x8(uint16 const & p) { - detail::unorm2x8 Packing; - Packing.pack = p; - vec2 Unpacked(Packing.data.x, Packing.data.y); - return Unpacked * float(0.0039215686274509803921568627451); + vec2 Unpack(*reinterpret_cast(const_cast(&p))); + return Unpack * float(0.0039215686274509803921568627451); // 1 / 255 } GLM_FUNC_QUALIFIER uint8 packSnorm1x8(float const & v) { - glm::int8 Scaled(int8(round(clamp(v ,-1.0f, 1.0f) * 127.0f))); - detail::snorm1x8 Packing; - Packing.data = Scaled; - return Packing.pack; + int8 Topack(static_cast(round(clamp(v ,-1.0f, 1.0f) * 127.0f))); + return *reinterpret_cast(&Topack); } - GLM_FUNC_QUALIFIER float unpackSnorm1x8(uint8 p) + GLM_FUNC_QUALIFIER float unpackSnorm1x8(uint8 const & p) { - detail::snorm1x8 Packing; - Packing.pack = p; - float Unpacked(Packing.data); - return clamp(Unpacked * float(0.00787401574803149606299212598425), -1.0f, 1.0f); + float Unpack(static_cast(*const_cast(&p))); + return clamp( + Unpack * 0.00787401574803149606299212598425f, // 1.0f / 127.0f + -1.0f, 1.0f); } GLM_FUNC_QUALIFIER uint16 packSnorm2x8(vec2 const & v) { - glm::i8vec2 Scaled(round(clamp(v ,-1.0f, 1.0f) * 127.0f)); - detail::snorm2x8 Packing; - Packing.data.x = Scaled.x; - Packing.data.y = Scaled.y; - return Packing.pack; + i8vec2 Topack(round(clamp(v ,-1.0f, 1.0f) * 127.0f)); + return *reinterpret_cast(&Topack); } - GLM_FUNC_QUALIFIER vec2 unpackSnorm2x8(uint16 p) + GLM_FUNC_QUALIFIER vec2 unpackSnorm2x8(uint16 const & p) { - detail::snorm2x8 Packing; - Packing.pack = p; - vec2 Unpacked(Packing.data.x, Packing.data.y); - return clamp(Unpacked * float(0.00787401574803149606299212598425), -1.0f, 1.0f); + vec2 Unpack(*reinterpret_cast(const_cast(&p))); + return clamp( + Unpack * 0.00787401574803149606299212598425f, // 1.0f / 127.0f + -1.0f, 1.0f); } - GLM_FUNC_QUALIFIER uint16 packUnorm1x16(float s) + GLM_FUNC_QUALIFIER uint16 packUnorm1x16(float const & s) { - return uint16(round(clamp(s, 0.0f, 1.0f) * 65535.0f)); + return static_cast(round(clamp(s, 0.0f, 1.0f) * 65535.0f)); } - GLM_FUNC_QUALIFIER float unpackUnorm1x16(uint16 p) + GLM_FUNC_QUALIFIER float unpackUnorm1x16(uint16 const & p) { - return float(p) * 1.0f / 65535.0f; + float Unpack = static_cast(*const_cast(&p)); + return Unpack * 1.5259021896696421759365224689097e-5f; // 1.0 / 65535.0 } GLM_FUNC_QUALIFIER uint64 packUnorm4x16(vec4 const & v) { - i16vec4 Scaled(round(clamp(v, 0.0f, 1.0f) * 65535.0f)); - detail::unorm4x16 Packing; - Packing.data.x = Scaled[0]; - Packing.data.y = Scaled[1]; - Packing.data.z = Scaled[2]; - Packing.data.w = Scaled[3]; - return Packing.pack; + u16vec4 Topack(round(clamp(v , 0.0f, 1.0f) * 65535.0f)); + return *reinterpret_cast(&Topack); } GLM_FUNC_QUALIFIER vec4 unpackUnorm4x16(uint64 const & p) { - detail::unorm4x16 Packing; - vec4 Result( - float(Packing.data.x), - float(Packing.data.y), - float(Packing.data.z), - float(Packing.data.w)); - Result *= float(1.5259021896696421759365224689097e-5); // 1.0 / 65535.0 - return Result; + vec4 Unpack(*reinterpret_cast(const_cast(&p))); + return Unpack * 1.5259021896696421759365224689097e-5f; // 1.0 / 65535.0 } - GLM_FUNC_QUALIFIER uint16 packSnorm1x16(float v) + GLM_FUNC_QUALIFIER uint16 packSnorm1x16(float const & v) { - float Scaled = clamp(v ,-1.0f, 1.0f) * 32767.0f; - detail::snorm1x16 Packing; - Packing.data = detail::int16(Scaled); - return Packing.pack; + int16 Topack = static_cast(round(clamp(v ,-1.0f, 1.0f) * 32767.0f)); + return *reinterpret_cast(&Topack); } - GLM_FUNC_QUALIFIER float unpackSnorm1x16(uint16 p) + GLM_FUNC_QUALIFIER float unpackSnorm1x16(uint16 const & p) { - detail::snorm1x16 Packing; - Packing.pack = p; - return clamp(float(Packing.data) * float(3.0518509475997192297128208258309e-5), -1.0f, 1.0f); //1.0f / 32767.0f + float Unpack = static_cast(*const_cast(&p)); + return clamp( + Unpack * 3.0518509475997192297128208258309e-5f, //1.0f / 32767.0f, + -1.0f, 1.0f); } GLM_FUNC_QUALIFIER uint64 packSnorm4x16(vec4 const & v) { - i16vec4 Scaled(clamp(v ,-1.0f, 1.0f) * 32767.0f); - detail::snorm4x16 Packing; - Packing.data.x = Scaled.x; - Packing.data.y = Scaled.y; - Packing.data.z = Scaled.z; - Packing.data.w = Scaled.w; - return Packing.pack; + i16vec4 Topack = static_cast(round(clamp(v ,-1.0f, 1.0f) * 32767.0f)); + return *reinterpret_cast(&Topack); } GLM_FUNC_QUALIFIER vec4 unpackSnorm4x16(uint64 const & p) { - detail::snorm4x16 Packing; - Packing.pack = p; - vec4 Unpacked(Packing.data.x, Packing.data.y, Packing.data.z, Packing.data.w); - return clamp(Unpacked * float(3.0518509475997192297128208258309e-5), -1.0f, 1.0f); //1.0f / 32767.0f + vec4 Unpack(*reinterpret_cast(const_cast(&p))); + return clamp( + Unpack * 3.0518509475997192297128208258309e-5f, //1.0f / 32767.0f, + -1.0f, 1.0f); } GLM_FUNC_DECL uint16 packHalf1x16(float const & v) { - detail::half1x16 Packing; - Packing.data = detail::toFloat16(v); - return Packing.pack; + int16 Topack = detail::toFloat16(v); + return *reinterpret_cast(&Topack); } GLM_FUNC_DECL float unpackHalf1x16(uint16 const & v) { - detail::half1x16 Packing; - Packing.pack = v; - return detail::toFloat32(Packing.data); + int16 Unpack(*reinterpret_cast(const_cast(&v))); + return detail::toFloat32(Unpack); } GLM_FUNC_DECL uint64 packHalf4x16(glm::vec4 const & v) { - detail::half4x16 Packing; - Packing.data.x = detail::toFloat16(v.x); - Packing.data.y = detail::toFloat16(v.y); - Packing.data.z = detail::toFloat16(v.z); - Packing.data.w = detail::toFloat16(v.w); - return Packing.pack; + i16vec4 Unpack( + detail::toFloat16(v.x), + detail::toFloat16(v.y), + detail::toFloat16(v.z), + detail::toFloat16(v.w)); + + return *reinterpret_cast(&Unpack); } GLM_FUNC_DECL glm::vec4 unpackHalf4x16(uint64 const & v) { - detail::half4x16 Packing; - Packing.pack = v; - return glm::vec4( - detail::toFloat32(Packing.data.x), - detail::toFloat32(Packing.data.y), - detail::toFloat32(Packing.data.z), - detail::toFloat32(Packing.data.w)); + i16vec4 Unpack = *reinterpret_cast(const_cast(&v)); + + return vec4( + detail::toFloat32(Unpack.x), + detail::toFloat32(Unpack.y), + detail::toFloat32(Unpack.z), + detail::toFloat32(Unpack.w)); } GLM_FUNC_QUALIFIER uint32 packI3x10_1x2(ivec4 const & v) @@ -557,19 +446,19 @@ namespace detail } GLM_FUNC_QUALIFIER uint32 packF2x11_1x10(vec3 const & v) - { - return - ((detail::floatTo11bit(v.x) & ((1 << 11) - 1)) << 0) | - ((detail::floatTo11bit(v.y) & ((1 << 11) - 1)) << 11) | - ((detail::floatTo10bit(v.z) & ((1 << 10) - 1)) << 22); - } - + { + return + ((detail::floatTo11bit(v.x) & ((1 << 11) - 1)) << 0) | + ((detail::floatTo11bit(v.y) & ((1 << 11) - 1)) << 11) | + ((detail::floatTo10bit(v.z) & ((1 << 10) - 1)) << 22); + } + GLM_FUNC_QUALIFIER vec3 unpackF2x11_1x10(uint32 const & v) - { - return vec3( - detail::packed11bitToFloat(v >> 0), - detail::packed11bitToFloat(v >> 11), - detail::packed10bitToFloat(v >> 22)); - } + { + return vec3( + detail::packed11bitToFloat(v >> 0), + detail::packed11bitToFloat(v >> 11), + detail::packed10bitToFloat(v >> 22)); + } }//namespace glm