diff --git a/src/libFLAC/bitwriter.c b/src/libFLAC/bitwriter.c index e994c5ea..09aef2a0 100644 --- a/src/libFLAC/bitwriter.c +++ b/src/libFLAC/bitwriter.c @@ -556,60 +556,93 @@ FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FL msbits = uval >> parameter; - /* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */ - /* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */ - if(bw->capacity <= bw->words + bw->bits + msbits + 1/*lsbits always fit in 1 bwword*/ && !bitwriter_grow_(bw, msbits+lsbits)) - return false; - - if(msbits) { - /* first part gets to word alignment */ - if(bw->bits) { - left = FLAC__BITS_PER_WORD - bw->bits; - if(msbits < left) { - bw->accum <<= msbits; - bw->bits += msbits; - goto break1; - } - else { - bw->accum <<= left; - msbits -= left; - bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); - bw->bits = 0; - } - } - /* do whole words */ - while(msbits >= FLAC__BITS_PER_WORD) { - bw->buffer[bw->words++] = 0; - msbits -= FLAC__BITS_PER_WORD; - } - /* do any leftovers */ - if(msbits > 0) { - bw->accum = 0; - bw->bits = msbits; - } - } -break1: - uval |= mask1; /* set stop bit */ - uval &= mask2; /* mask off unused top bits */ - - left = FLAC__BITS_PER_WORD - bw->bits; - if(lsbits < left) { +#if 1 /* OPT: can remove this special case if it doesn't make up for the extra compare */ + if(bw->bits + msbits + lsbits <= FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */ + bw->bits = bw->bits + msbits + lsbits; + uval |= mask1; /* set stop bit */ + uval &= mask2; /* mask off unused top bits */ + /* NOT: bw->accum <<= msbits + lsbits because msbits+lsbits could be 32, then the shift would be a NOP */ + bw->accum <<= msbits; bw->accum <<= lsbits; bw->accum |= uval; - bw->bits += lsbits; + if(bw->bits == FLAC__BITS_PER_WORD) { + bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); + bw->bits = 0; + /* burying the capacity check down here means we have to grow the buffer a little if there are more vals to do */ + if(bw->capacity <= bw->words && nvals > 1 && !bitwriter_grow_(bw, 1)) { + FLAC__ASSERT(bw->capacity == bw->words); + return false; + } + } } else { - /* if bw->bits == 0, left==FLAC__BITS_PER_WORD which will always - * be > lsbits (because of previous assertions) so it would have - * triggered the (lsbitsbits); - FLAC__ASSERT(left < FLAC__BITS_PER_WORD); - bw->accum <<= left; - bw->accum |= uval >> (bw->bits = lsbits - left); - bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); - bw->accum = uval; +#elif 0 /*@@@@@@ OPT: try this version with MSVC6 to see if better, not much difference for gcc-4 */ + if(bw->bits + msbits + lsbits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */ + bw->bits = bw->bits + msbits + lsbits; + uval |= mask1; /* set stop bit */ + uval &= mask2; /* mask off unused top bits */ + bw->accum <<= msbits + lsbits; + bw->accum |= uval; } + else { +#endif + /* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */ + /* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */ + if(bw->capacity <= bw->words + bw->bits + msbits + 1/*lsbits always fit in 1 bwword*/ && !bitwriter_grow_(bw, msbits+lsbits)) + return false; + + if(msbits) { + /* first part gets to word alignment */ + if(bw->bits) { + left = FLAC__BITS_PER_WORD - bw->bits; + if(msbits < left) { + bw->accum <<= msbits; + bw->bits += msbits; + goto break1; + } + else { + bw->accum <<= left; + msbits -= left; + bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); + bw->bits = 0; + } + } + /* do whole words */ + while(msbits >= FLAC__BITS_PER_WORD) { + bw->buffer[bw->words++] = 0; + msbits -= FLAC__BITS_PER_WORD; + } + /* do any leftovers */ + if(msbits > 0) { + bw->accum = 0; + bw->bits = msbits; + } + } +break1: + uval |= mask1; /* set stop bit */ + uval &= mask2; /* mask off unused top bits */ + + left = FLAC__BITS_PER_WORD - bw->bits; + if(lsbits < left) { + bw->accum <<= lsbits; + bw->accum |= uval; + bw->bits += lsbits; + } + else { + /* if bw->bits == 0, left==FLAC__BITS_PER_WORD which will always + * be > lsbits (because of previous assertions) so it would have + * triggered the (lsbitsbits); + FLAC__ASSERT(left < FLAC__BITS_PER_WORD); + bw->accum <<= left; + bw->accum |= uval >> (bw->bits = lsbits - left); + bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum); + bw->accum = uval; + } +#if 1 + } +#endif vals++; nvals--; }