bitmath: Finish up optimizations

This patch adds support for other compilers and systems
including MSVC, Intel C compiler etc..

Signed-off-by: Erik de Castro Lopo <erikd@mega-nerd.com>
This commit is contained in:
Cristian Rodríguez
2012-05-08 23:58:19 -04:00
committed by Erik de Castro Lopo
parent 605f920816
commit 387b72731d
3 changed files with 116 additions and 106 deletions

View File

@@ -34,28 +34,126 @@
#include "FLAC/ordinals.h"
/* for CHAR_BIT */
#include <limits.h>
#if defined(__GNUC__)
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
#include <intrin.h> /* for _BitScanReverse* */
#endif
/* Will never be emitted for MSVC, GCC, Intel compilers */
inline unsigned int FLAC__clz_soft_uint32(unsigned int word)
{
static const unsigned char byte_to_unary_table[] = {
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
return (word) > 0xffffff ? byte_to_unary_table[(word) >> 24] :
!(word) ? 32 :
(word) > 0xffff ? byte_to_unary_table[(word) >> 16] + 8 :
(word) > 0xff ? byte_to_unary_table[(word) >> 8] + 16 :
byte_to_unary_table[(word)] + 24;
}
static inline unsigned int FLAC__clz_uint32(FLAC__uint32 v)
{
/* Never used with input 0 */
#if defined(__INTEL_COMPILER)
return _bit_scan_reverse(n) ^ 31U;
#elif defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
/* This will translate either to (bsr ^ 31U), clz , ctlz, cntlz, lzcnt depending on
* -march= setting or to a software rutine in exotic machines. */
return __builtin_clz(v);
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
FLAC__uint32 idx;
_BitScanReverse(&idx, v);
return idx ^ 31U;
#else
return FLAC__clz_soft_uint32(v);
#endif
}
/* An example of what FLAC__bitmath_ilog2() computes:
*
* ilog2( 0) = undefined
* ilog2( 1) = 0
* ilog2( 2) = 1
* ilog2( 3) = 1
* ilog2( 4) = 2
* ilog2( 5) = 2
* ilog2( 6) = 2
* ilog2( 7) = 2
* ilog2( 8) = 3
* ilog2( 9) = 3
* ilog2(10) = 3
* ilog2(11) = 3
* ilog2(12) = 3
* ilog2(13) = 3
* ilog2(14) = 3
* ilog2(15) = 3
* ilog2(16) = 4
* ilog2(17) = 4
* ilog2(18) = 4
*/
static inline unsigned FLAC__bitmath_ilog2(FLAC__uint32 v)
{
if (v == 0)
return 0;
return sizeof(FLAC__uint32) * __CHAR_BIT__ - 1 - __builtin_clz(v);
return sizeof(FLAC__uint32) * CHAR_BIT - 1 - FLAC__clz_uint32(v);
}
#ifdef FLAC__INTEGER_ONLY_LIBRARY /*Unused otherwise */
static inline unsigned FLAC__bitmath_ilog2_wide(FLAC__uint64 v)
{
if (v == 0)
return 0;
return sizeof(FLAC__uint64) * __CHAR_BIT__ - 1 - __builtin_clzll(v);
}
#if && defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
return sizeof(FLAC__uint64) * CHAR_BIT - 1 - __builtin_clzll(v);
/* Sorry, only supported in win64/Itanium.. */
#elif (defined(_MSC_VER) && (_MSC_VER >= 1400)) && (defined(_M_IA64) || defined(_WIN64))
FLAC__uint64 idx;
_BitScanReverse64(&idx, v);
return idx ^ 63U;
#else
unsigned FLAC__bitmath_ilog2(FLAC__uint32 v);
unsigned FLAC__bitmath_ilog2_wide(FLAC__uint64 v);
/* Brain-damaged compilers will use the fastest possible way that is,
de Bruijn sequences (http://supertech.csail.mit.edu/papers/debruijn.pdf)
(C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 LGPL (v2 or later).
*/
static const unsigned char DEBRUIJN_IDX64[64]={
0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
};
int ret;
ret= v>0;
v|= v>>1;
v|= v>>2;
v|= v>>4;
v|= v>>8;
v|= v>>16;
v|= v>>32;
v= (v>>1)+1;
ret+=DEBRUIJN_IDX64[v*0x218A392CD3D5DBF>>58&0x3F];
return ret;
#endif
}
#endif
unsigned FLAC__bitmath_silog2(int v);