Miroslav's speed optimization patch

This commit is contained in:
Josh Coalson
2003-02-27 06:12:55 +00:00
parent 94f81b0a9a
commit 13ad04bbb5
5 changed files with 158 additions and 285 deletions

View File

@@ -99,15 +99,16 @@ static FLAC__INLINE FLAC__int32 linear_dither(unsigned source_bps, unsigned targ
return output >> scalebits;
}
unsigned FLAC__plugin_common__pack_pcm_signed_little_endian(FLAC__byte *data, FLAC__int32 *input, unsigned wide_samples, unsigned channels, unsigned source_bps, unsigned target_bps)
unsigned FLAC__plugin_common__pack_pcm_signed_little_endian(FLAC__byte *data, const FLAC__int32 * const input[], unsigned wide_samples, unsigned channels, unsigned source_bps, unsigned target_bps)
{
static dither_state dither[FLAC_PLUGIN__MAX_SUPPORTED_CHANNELS];
FLAC__byte * const start = data;
FLAC__int32 sample;
unsigned samples = wide_samples * channels;
const FLAC__int32 *input_;
unsigned samples, channel;
const unsigned bytes_per_sample = target_bps / 8;
unsigned inc = bytes_per_sample * channels;
FLAC__ASSERT(FLAC_PLUGIN__MAX_SUPPORTED_CHANNELS == 2);
FLAC__ASSERT(channels > 0 && channels <= FLAC_PLUGIN__MAX_SUPPORTED_CHANNELS);
FLAC__ASSERT(source_bps < 32);
FLAC__ASSERT(target_bps <= 24);
@@ -116,47 +117,57 @@ unsigned FLAC__plugin_common__pack_pcm_signed_little_endian(FLAC__byte *data, FL
FLAC__ASSERT((target_bps & 7) == 0);
if(source_bps != target_bps) {
const FLAC__int32 MIN = -(1L << source_bps);
const FLAC__int32 MIN = -(1L << (source_bps - 1));
const FLAC__int32 MAX = ~MIN; /*(1L << (source_bps-1)) - 1 */
const unsigned dither_twiggle = channels - 1;
unsigned dither_source = 0;
while(samples--) {
sample = linear_dither(source_bps, target_bps, *input++, &dither[dither_source], MIN, MAX);
dither_source ^= dither_twiggle;
for(channel = 0; channel < channels; channel++) {
samples = wide_samples;
data = start + bytes_per_sample * channel;
input_ = input[channel];
switch(target_bps) {
case 8:
data[0] = sample ^ 0x80;
break;
case 24:
data[2] = (FLAC__byte)(sample >> 16);
/* fall through */
case 16:
data[1] = (FLAC__byte)(sample >> 8);
data[0] = (FLAC__byte)sample;
while(samples--) {
sample = linear_dither(source_bps, target_bps, *input_++, &dither[channel], MIN, MAX);
switch(target_bps) {
case 8:
data[0] = sample ^ 0x80;
break;
case 24:
data[2] = (FLAC__byte)(sample >> 16);
/* fall through */
case 16:
data[1] = (FLAC__byte)(sample >> 8);
data[0] = (FLAC__byte)sample;
}
data += inc;
}
data += bytes_per_sample;
}
}
else {
while(samples--) {
sample = *input++;
for(channel = 0; channel < channels; channel++) {
samples = wide_samples;
data = start + bytes_per_sample * channel;
input_ = input[channel];
switch(target_bps) {
case 8:
data[0] = sample ^ 0x80;
break;
case 24:
data[2] = (FLAC__byte)(sample >> 16);
/* fall through */
case 16:
data[1] = (FLAC__byte)(sample >> 8);
data[0] = (FLAC__byte)sample;
while(samples--) {
sample = *input_++;
switch(target_bps) {
case 8:
data[0] = sample ^ 0x80;
break;
case 24:
data[2] = (FLAC__byte)(sample >> 16);
/* fall through */
case 16:
data[1] = (FLAC__byte)(sample >> 8);
data[0] = (FLAC__byte)sample;
}
data += inc;
}
data += bytes_per_sample;
}
}

View File

@@ -22,6 +22,6 @@
#include "defs.h" /* buy FLAC_PLUGIN__MAX_SUPPORTED_CHANNELS for the caller */
#include "FLAC/ordinals.h"
unsigned FLAC__plugin_common__pack_pcm_signed_little_endian(FLAC__byte *data, FLAC__int32 *input, unsigned wide_samples, unsigned channels, unsigned source_bps, unsigned target_bps);
unsigned FLAC__plugin_common__pack_pcm_signed_little_endian(FLAC__byte *data, const FLAC__int32 * const input[], unsigned wide_samples, unsigned channels, unsigned source_bps, unsigned target_bps);
#endif

View File

@@ -208,6 +208,7 @@ void FLAC__plugin_common__init_dither_context(DitherContext *d, int bits, int sh
if (shapingtype < 0) shapingtype = 0;
if (shapingtype > 3) shapingtype = 3;
d->ShapingType = (NoiseShaping)shapingtype;
index = bits - 11 - shapingtype;
if (index < 0) index = 0;
if (index > 9) index = 9;
@@ -219,6 +220,7 @@ void FLAC__plugin_common__init_dither_context(DitherContext *d, int bits, int sh
d->Mask = ((FLAC__uint64)-1) << (32 - bits);
d->Add = 0.5 * ((1L << (32 - bits)) - 1);
d->Dither = 0.01f*default_dither[index] / (((FLAC__int64)1) << bits);
d->LastHistoryIndex = 0;
}
/*
@@ -286,7 +288,7 @@ static FLAC__INLINE FLAC__int64 dither_output_(DitherContext *d, FLAC__bool do_d
#endif
int FLAC__plugin_common__apply_gain(FLAC__byte *data_out, FLAC__int32 *input, unsigned wide_samples, unsigned channels, const unsigned source_bps, const unsigned target_bps, const float scale, const FLAC__bool hard_limit, FLAC__bool do_dithering, NoiseShaping noise_shaping, DitherContext *dither_context)
int FLAC__plugin_common__apply_gain(FLAC__byte *data_out, const FLAC__int32 * const input[], unsigned wide_samples, unsigned channels, const unsigned source_bps, const unsigned target_bps, const float scale, const FLAC__bool hard_limit, FLAC__bool do_dithering, DitherContext *dither_context)
{
static const FLAC__int32 conv_factors_[33] = {
-1, /* 0 bits-per-sample (not supported) */
@@ -369,16 +371,15 @@ int FLAC__plugin_common__apply_gain(FLAC__byte *data_out, FLAC__int32 *input, un
const double multi_scale = scale / (double)(1u << (source_bps-1));
FLAC__byte * const start = data_out;
const unsigned samples = wide_samples * channels;
#ifdef FLAC__PLUGIN_COMMON__DONT_UNROLL
const unsigned dither_twiggle = channels - 1;
unsigned dither_source = 0;
#endif
unsigned i;
int coeff;
unsigned i, channel;
const FLAC__int32 *input_;
double sample;
const unsigned bytes_per_sample = target_bps / 8;
unsigned inc = bytes_per_sample * channels, last_history_index = dither_context->LastHistoryIndex;
NoiseShaping noise_shaping = dither_context->ShapingType;
FLAC__int64 val64;
FLAC__int32 val32;
FLAC__ASSERT(FLAC_PLUGIN__MAX_SUPPORTED_CHANNELS == 2);
FLAC__ASSERT(channels > 0 && channels <= FLAC_PLUGIN__MAX_SUPPORTED_CHANNELS);
FLAC__ASSERT(source_bps >= 4);
FLAC__ASSERT(target_bps >= 4);
@@ -386,67 +387,11 @@ int FLAC__plugin_common__apply_gain(FLAC__byte *data_out, FLAC__int32 *input, un
FLAC__ASSERT(target_bps < 32);
FLAC__ASSERT((target_bps & 7) == 0);
#ifdef FLAC__PLUGIN_COMMON__DONT_UNROLL
/*
* This flavor handles 1 or 2 channels with the same code
*/
coeff = 0;
for(i = 0; i < samples; i++, coeff++) {
sample = (double)input[i] * multi_scale;
if(hard_limit) {
/* hard 6dB limiting */
if(sample < -0.5)
sample = tanh((sample + 0.5) / (1-0.5)) * (1-0.5) - 0.5;
else if(sample > 0.5)
sample = tanh((sample - 0.5) / (1-0.5)) * (1-0.5) + 0.5;
}
sample *= 2147483647.f;
{
FLAC__int64 val64;
FLAC__int32 val32;
if(coeff >= (32<<dither_twiggle))
coeff = 0;
/* 'coeff>>dither_twiggle' is the same as 'coeff/channels' */
val64 = dither_output_(dither_context, do_dithering, noise_shaping, coeff>>dither_twiggle, sample, dither_source) / conv_factor;
dither_source ^= dither_twiggle;
val32 = (FLAC__int32)val64;
if(val64 >= -hard_clip_factor)
val32 = (FLAC__int32)(-(hard_clip_factor+1));
else if(val64 < hard_clip_factor)
val32 = (FLAC__int32)hard_clip_factor;
switch(target_bps) {
case 8:
data_out[0] = val32 ^ 0x80;
break;
case 24:
data_out[2] = (FLAC__byte)(val32 >> 16);
/* fall through */
case 16:
data_out[1] = (FLAC__byte)(val32 >> 8);
data_out[0] = (FLAC__byte)val32;
}
}
data_out += target_bps/8;
}
#else
/*
* This flavor has optimized versions for 1 or 2 channels
*/
if(channels == 2) {
FLAC__int64 val64;
FLAC__int32 val32;
coeff = 0;
for(i = 0; i < samples; ) {
sample = (double)input[i] * multi_scale;
for(channel = 0; channel < channels; channel++) {
data_out = start + bytes_per_sample * channel;
input_ = input[channel];
for(i = 0; i < wide_samples; i++, data_out += inc) {
sample = (double)input_[i] * multi_scale;
if(hard_limit) {
/* hard 6dB limiting */
@@ -457,7 +402,7 @@ int FLAC__plugin_common__apply_gain(FLAC__byte *data_out, FLAC__int32 *input, un
}
sample *= 2147483647.f;
val64 = dither_output_(dither_context, do_dithering, noise_shaping, coeff, sample, 0) / conv_factor;
val64 = dither_output_(dither_context, do_dithering, noise_shaping, (i + last_history_index) % 32, sample, channel) / conv_factor;
val32 = (FLAC__int32)val64;
if(val64 >= -hard_clip_factor)
@@ -476,94 +421,9 @@ int FLAC__plugin_common__apply_gain(FLAC__byte *data_out, FLAC__int32 *input, un
data_out[1] = (FLAC__byte)(val32 >> 8);
data_out[0] = (FLAC__byte)val32;
}
data_out += target_bps/8;
i++;
sample = (double)input[i] * multi_scale;
if(hard_limit) {
/* hard 6dB limiting */
if(sample < -0.5)
sample = tanh((sample + 0.5) / (1-0.5)) * (1-0.5) - 0.5;
else if(sample > 0.5)
sample = tanh((sample - 0.5) / (1-0.5)) * (1-0.5) + 0.5;
}
sample *= 2147483647.f;
val64 = dither_output_(dither_context, do_dithering, noise_shaping, coeff, sample, 1) / conv_factor;
val32 = (FLAC__int32)val64;
if(val64 >= -hard_clip_factor)
val32 = (FLAC__int32)(-(hard_clip_factor+1));
else if(val64 < hard_clip_factor)
val32 = (FLAC__int32)hard_clip_factor;
switch(target_bps) {
case 8:
data_out[0] = val32 ^ 0x80;
break;
case 24:
data_out[2] = (FLAC__byte)(val32 >> 16);
/* fall through */
case 16:
data_out[1] = (FLAC__byte)(val32 >> 8);
data_out[0] = (FLAC__byte)val32;
}
data_out += target_bps/8;
i++;
coeff++;
if(coeff >= 32)
coeff = 0;
}
}
else {
FLAC__int64 val64;
FLAC__int32 val32;
coeff = 0;
for(i = 0; i < samples; i++, coeff++) {
if(coeff >= 32)
coeff = 0;
sample = (double)input[i] * multi_scale;
if(hard_limit) {
/* hard 6dB limiting */
if(sample < -0.5)
sample = tanh((sample + 0.5) / (1-0.5)) * (1-0.5) - 0.5;
else if(sample > 0.5)
sample = tanh((sample - 0.5) / (1-0.5)) * (1-0.5) + 0.5;
}
sample *= 2147483647.f;
val64 = dither_output_(dither_context, do_dithering, noise_shaping, coeff, sample, 0) / conv_factor;
val32 = (FLAC__int32)val64;
if(val64 >= -hard_clip_factor)
val32 = (FLAC__int32)(-(hard_clip_factor+1));
else if(val64 < hard_clip_factor)
val32 = (FLAC__int32)hard_clip_factor;
switch(target_bps) {
case 8:
data_out[0] = val32 ^ 0x80;
break;
case 24:
data_out[2] = (FLAC__byte)(val32 >> 16);
/* fall through */
case 16:
data_out[1] = (FLAC__byte)(val32 >> 8);
data_out[0] = (FLAC__byte)val32;
}
data_out += target_bps/8;
}
}
#endif
dither_context->LastHistoryIndex = (last_history_index + wide_samples) % 32;
return data_out - start;
}

View File

@@ -22,6 +22,13 @@
#include "defs.h"
#include "FLAC/ordinals.h"
typedef enum {
NOISE_SHAPING_NONE = 0,
NOISE_SHAPING_LOW = 1,
NOISE_SHAPING_MEDUIM = 2,
NOISE_SHAPING_HIGH = 3
} NoiseShaping;
typedef struct {
const float* FilterCoeff;
FLAC__uint64 Mask;
@@ -30,18 +37,13 @@ typedef struct {
float ErrorHistory [FLAC_PLUGIN__MAX_SUPPORTED_CHANNELS] [16]; /* 16th order Noise shaping */
float DitherHistory [FLAC_PLUGIN__MAX_SUPPORTED_CHANNELS] [16];
int LastRandomNumber [FLAC_PLUGIN__MAX_SUPPORTED_CHANNELS];
unsigned LastHistoryIndex;
NoiseShaping ShapingType;
} DitherContext;
typedef enum {
NOISE_SHAPING_NONE = 0,
NOISE_SHAPING_LOW = 1,
NOISE_SHAPING_MEDUIM = 2,
NOISE_SHAPING_HIGH = 3
} NoiseShaping;
void FLAC__plugin_common__init_dither_context(DitherContext *dither, int bits, int shapingtype);
/* scale = (float) pow(10., (double)replaygain * 0.05); */
int FLAC__plugin_common__apply_gain(FLAC__byte *data_out, FLAC__int32 *input, unsigned wide_samples, unsigned channels, const unsigned source_bps, const unsigned target_bps, const float scale, const FLAC__bool hard_limit, FLAC__bool do_dithering, NoiseShaping noise_shaping, DitherContext *dither_context);
int FLAC__plugin_common__apply_gain(FLAC__byte *data_out, const FLAC__int32 * const input[], unsigned wide_samples, unsigned channels, const unsigned source_bps, const unsigned target_bps, const float scale, const FLAC__bool hard_limit, FLAC__bool do_dithering, DitherContext *dither_context);
#endif