From 29a28338c3c0aaba2e5b074a6e757a6cc3f0959e Mon Sep 17 00:00:00 2001 From: Martijn van Beurden Date: Sun, 10 Aug 2014 10:59:29 +0200 Subject: [PATCH] Add partial_tukey and punchout_tukey apodization functions Adds two new apodization functions that seem to perform better than the apodization functions currently in the codebase and fixes three existing windows as well. Its important to note that this patch only affects the encoder stage that evaluates various possible predictors. Audio encoded with these new windows will still decode with existing legacy decoders. = Theory = These functions are used to window the audio data at the predictor stage. These news functions enable the use of only part of the signal to generate a predictor. This helps because short transients can introduce noise into the predictor. The predictor becomes very good at prediciting one part of the signal, instead of mediocre for the whole block. Signed-off-by: Erik de Castro Lopo --- doc/html/documentation_tools_flac.html | 5 +- include/FLAC/stream_encoder.h | 25 ++++++- man/flac.1 | 6 +- man/flac.sgml | 4 +- src/libFLAC/include/private/window.h | 2 + .../include/protected/stream_encoder.h | 7 ++ src/libFLAC/stream_encoder.c | 48 +++++++++++++ src/libFLAC/window.c | 67 +++++++++++++++++-- 8 files changed, 152 insertions(+), 12 deletions(-) diff --git a/doc/html/documentation_tools_flac.html b/doc/html/documentation_tools_flac.html index 6fa18dad..9f85a259 100644 --- a/doc/html/documentation_tools_flac.html +++ b/doc/html/documentation_tools_flac.html @@ -852,10 +852,11 @@ -A "function", --apodization="function" - Window audio data with given the apodization function. The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), welch.
+ Window audio data with given the apodization function. The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), partial_tukey(n[/ov[/P]]), punchout_tukey(n[/ov[/P]]), welch.
For gauss(STDDEV), STDDEV is the standard deviation (0<STDDEV<=0.5).
For tukey(P), P specifies the fraction of the window that is tapered (0<=P<=1; P=0 corresponds to "rectangle" and P=1 corresponds to "hann").
- Please note that for both P as well as STDDEV, the use of a point or comma as decimal separator is locale-dependent.
+ For partial_tukey(n) and punchout_tukey(n), n apodization functions are added that span different parts of each block. Values of 2 to 6 seem to yield sane results. If necessary, an overlap can be specified, as can be the taper parameter, for example partial_tukey(2/0.2) or partial_tukey(2/0.2/0.5). ov should be smaller than 1 and can be negative.
+ Please note that P, STDDEV and ov are locale specific, so a comma as decimal separator might be required instead of a dot.
More than one -A option (up to 32) may be used. Any function that is specified erroneously is silently dropped. The encoder chooses suitable defaults in the absence of any -A options; any -A option specified replaces the default(s).
When more than one function is specified, then for every subframe the encoder will try each of them separately and choose the window that results in the smallest compressed subframe. Multiple functions can greatly increase the encoding time.
diff --git a/include/FLAC/stream_encoder.h b/include/FLAC/stream_encoder.h index 6f7796bb..dbfee63b 100644 --- a/include/FLAC/stream_encoder.h +++ b/include/FLAC/stream_encoder.h @@ -920,7 +920,8 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_loose_mid_side_stereo(FLAC__StreamE * The available functions are \c bartlett, \c bartlett_hann, * \c blackman, \c blackman_harris_4term_92db, \c connes, \c flattop, * \c gauss(STDDEV), \c hamming, \c hann, \c kaiser_bessel, \c nuttall, - * \c rectangle, \c triangle, \c tukey(P), \c welch. + * \c rectangle, \c triangle, \c tukey(P), \c partial_tukey(n[/ov[/P]]), + * \c punchout_tukey(n[/ov[/P]]), \c welch. * * For \c gauss(STDDEV), STDDEV specifies the standard deviation * (0 function, =function - Window audio data with given the apodization function. The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), welch. + Window audio data with given the apodization function. The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), partial_tukey(n[/ov[/P]]), punchout_tukey(n[/ov[/P]]), welch. For gauss(STDDEV), STDDEV is the standard deviation (0<STDDEV<=0.5). For tukey(P), P specifies the fraction of the window that is tapered (0<=P<=1; P=0 corresponds to "rectangle" and P=1 corresponds to "hann"). + For partial_tukey(n) and punchout_tukey(n), n apodization functions are added that span different parts of each block. Values of 2 to 6 seem to yield sane results. If necessary, an overlap can be specified, as can be the taper parameter, for example partial_tukey(2/0.2) or partial_tukey(2/0.2/0.5). ov should be smaller than 1 and can be negative. + Please note that P, STDDEV and ov are locale specific, so a comma as decimal separator might be required instead of a dot. More than one -A option (up to 32) may be used. Any function that is specified erroneously is silently dropped. The encoder chooses suitable defaults in the absence of any -A options; any -A option specified replaces the default(s). When more than one function is specified, then for every subframe the encoder will try each of them separately and choose the window that results in the smallest compressed subframe. Multiple functions can greatly increase the encoding time. diff --git a/src/libFLAC/include/private/window.h b/src/libFLAC/include/private/window.h index e712b4af..acf50860 100644 --- a/src/libFLAC/include/private/window.h +++ b/src/libFLAC/include/private/window.h @@ -65,6 +65,8 @@ void FLAC__window_nuttall(FLAC__real *window, const FLAC__int32 L); void FLAC__window_rectangle(FLAC__real *window, const FLAC__int32 L); void FLAC__window_triangle(FLAC__real *window, const FLAC__int32 L); void FLAC__window_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p); +void FLAC__window_partial_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end); +void FLAC__window_punchout_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end); void FLAC__window_welch(FLAC__real *window, const FLAC__int32 L); #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */ diff --git a/src/libFLAC/include/protected/stream_encoder.h b/src/libFLAC/include/protected/stream_encoder.h index 011adc08..38796cbb 100644 --- a/src/libFLAC/include/protected/stream_encoder.h +++ b/src/libFLAC/include/protected/stream_encoder.h @@ -59,6 +59,8 @@ typedef enum { FLAC__APODIZATION_RECTANGLE, FLAC__APODIZATION_TRIANGLE, FLAC__APODIZATION_TUKEY, + FLAC__APODIZATION_PARTIAL_TUKEY, + FLAC__APODIZATION_PUNCHOUT_TUKEY, FLAC__APODIZATION_WELCH } FLAC__ApodizationFunction; @@ -71,6 +73,11 @@ typedef struct { struct { FLAC__real p; } tukey; + struct { + FLAC__real p; + FLAC__real start; + FLAC__real end; + } multiple_tukey; } parameters; } FLAC__ApodizationSpecification; diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c index 3e33336d..6f46d784 100644 --- a/src/libFLAC/stream_encoder.c +++ b/src/libFLAC/stream_encoder.c @@ -1664,6 +1664,48 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_apodization(FLAC__StreamEncoder *en encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_TUKEY; } } + else if(n>15 && 0 == strncmp("partial_tukey(" , specification, 14)) { + FLAC__int32 tukey_parts = (FLAC__int32)strtod(specification+14, 0); + const char *si_1 = strchr(specification, '/'); + FLAC__real overlap = si_1?flac_min((FLAC__real)strtod(si_1+1, 0),0.99f):0.1f; + FLAC__real overlap_units = 1.0f/(1.0f - overlap) - 1.0f; + const char *si_2 = strchr((si_1?(si_1+1):specification), '/'); + FLAC__real tukey_p = si_2?(FLAC__real)strtod(si_2+1, 0):0.2f; + + if (tukey_parts <= 1) { + encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.tukey.p = tukey_p; + encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_TUKEY; + }else if (encoder->protected_->num_apodizations + tukey_parts < 32){ + FLAC__int32 m; + for(m = 0; m < tukey_parts; m++){ + encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.p = tukey_p; + encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.start = m/(tukey_parts+overlap_units); + encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.end = (m+1+overlap_units)/(tukey_parts+overlap_units); + encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_PARTIAL_TUKEY; + } + } + } + else if(n>16 && 0 == strncmp("punchout_tukey(" , specification, 15)) { + FLAC__int32 tukey_parts = (FLAC__int32)strtod(specification+15, 0); + const char *si_1 = strchr(specification, '/'); + FLAC__real overlap = si_1?flac_min((FLAC__real)strtod(si_1+1, 0),0.99f):0.2f; + FLAC__real overlap_units = 1.0f/(1.0f - overlap) - 1.0f; + const char *si_2 = strchr((si_1?(si_1+1):specification), '/'); + FLAC__real tukey_p = si_2?(FLAC__real)strtod(si_2+1, 0):0.2f; + + if (tukey_parts <= 1) { + encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.tukey.p = tukey_p; + encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_TUKEY; + }else if (encoder->protected_->num_apodizations + tukey_parts < 32){ + FLAC__int32 m; + for(m = 0; m < tukey_parts; m++){ + encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.p = tukey_p; + encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.start = m/(tukey_parts+overlap_units); + encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.end = (m+1+overlap_units)/(tukey_parts+overlap_units); + encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_PUNCHOUT_TUKEY; + } + } + } else if(n==5 && 0 == strncmp("welch" , specification, n)) encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_WELCH; if (encoder->protected_->num_apodizations == 32) @@ -2443,6 +2485,12 @@ FLAC__bool resize_buffers_(FLAC__StreamEncoder *encoder, unsigned new_blocksize) case FLAC__APODIZATION_TUKEY: FLAC__window_tukey(encoder->private_->window[i], new_blocksize, encoder->protected_->apodizations[i].parameters.tukey.p); break; + case FLAC__APODIZATION_PARTIAL_TUKEY: + FLAC__window_partial_tukey(encoder->private_->window[i], new_blocksize, encoder->protected_->apodizations[i].parameters.multiple_tukey.p, encoder->protected_->apodizations[i].parameters.multiple_tukey.start, encoder->protected_->apodizations[i].parameters.multiple_tukey.end); + break; + case FLAC__APODIZATION_PUNCHOUT_TUKEY: + FLAC__window_punchout_tukey(encoder->private_->window[i], new_blocksize, encoder->protected_->apodizations[i].parameters.multiple_tukey.p, encoder->protected_->apodizations[i].parameters.multiple_tukey.start, encoder->protected_->apodizations[i].parameters.multiple_tukey.end); + break; case FLAC__APODIZATION_WELCH: FLAC__window_welch(encoder->private_->window[i], new_blocksize); break; diff --git a/src/libFLAC/window.c b/src/libFLAC/window.c index b873368c..6acf66aa 100644 --- a/src/libFLAC/window.c +++ b/src/libFLAC/window.c @@ -58,7 +58,7 @@ void FLAC__window_bartlett(FLAC__real *window, const FLAC__int32 L) for (n = 0; n <= L/2-1; n++) window[n] = 2.0f * n / (float)N; for (; n <= N; n++) - window[n] = 2.0f - 2.0f * (N-n) / (float)N; + window[n] = 2.0f - 2.0f * n / (float)N; } } @@ -68,7 +68,7 @@ void FLAC__window_bartlett_hann(FLAC__real *window, const FLAC__int32 L) FLAC__int32 n; for (n = 0; n < L; n++) - window[n] = (FLAC__real)(0.62f - 0.48f * fabs((float)n/(float)N+0.5f) + 0.38f * cos(2.0f * M_PI * ((float)n/(float)N+0.5f))); + window[n] = (FLAC__real)(0.62f - 0.48f * fabs((float)n/(float)N-0.5f) - 0.38f * cos(2.0f * M_PI * ((float)n/(float)N))); } void FLAC__window_blackman(FLAC__real *window, const FLAC__int32 L) @@ -173,16 +173,16 @@ void FLAC__window_triangle(FLAC__real *window, const FLAC__int32 L) FLAC__int32 n; if (L & 1) { - for (n = 1; n <= L+1/2; n++) + for (n = 1; n <= (L+1)/2; n++) window[n-1] = 2.0f * n / ((float)L + 1.0f); for (; n <= L; n++) - window[n-1] = - (float)(2 * (L - n + 1)) / ((float)L + 1.0f); + window[n-1] = (float)(2 * (L - n + 1)) / ((float)L + 1.0f); } else { for (n = 1; n <= L/2; n++) - window[n-1] = 2.0f * n / (float)L; + window[n-1] = 2.0f * n / ((float)L + 1.0f); for (; n <= L; n++) - window[n-1] = ((float)(2 * (L - n)) + 1.0f) / (float)L; + window[n-1] = (float)(2 * (L - n + 1)) / ((float)L + 1.0f); } } @@ -207,6 +207,61 @@ void FLAC__window_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__rea } } +void FLAC__window_partial_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end) +{ + const FLAC__int32 start_n = (FLAC__int32)(start * L); + const FLAC__int32 end_n = (FLAC__int32)(end * L); + const FLAC__int32 N = end_n - start_n; + FLAC__int32 Np, n, i; + + if (p <= 0.0) + FLAC__window_partial_tukey(window, L, 0.01, start, end); + else if (p >= 1.0) + FLAC__window_partial_tukey(window, L, 1, start, end); + + Np = (FLAC__int32)(p / 2.0f * N) - 1; + + for (n = 0; n < start_n; n++) + window[n] = 0.0f; + for (i = 1; n < (start_n+Np); n++, i++) + window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Np)); + for (; n < (end_n-Np); n++) + window[n] = 1.0f; + for (i = Np; n < end_n; n++, i--) + window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Np)); + for (; n < L; n++) + window[n] = 0.0f; +} +void FLAC__window_punchout_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end) +{ + const FLAC__int32 start_n = (FLAC__int32)(start * L); + const FLAC__int32 end_n = (FLAC__int32)(end * L); + FLAC__int32 Ns, Ne, n, i; + + if (p <= 0.0) + FLAC__window_partial_tukey(window, L, 0.01, start, end); + else if (p >= 1.0) + FLAC__window_partial_tukey(window, L, 1, start, end); + + Ns = (FLAC__int32)(p / 2.0f * start_n); + Ne = (FLAC__int32)(p / 2.0f * (L - end_n)); + + for (n = 0, i = 1; n < Ns; n++, i++) + window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ns)); + for (; n < start_n-Ns; n++) + window[n] = 1.0f; + for (i = Ns; n < start_n; n++, i--) + window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ns)); + for (; n < end_n; n++) + window[n] = 0.0f; + for (i = 1; n < end_n+Ne; n++, i++) + window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ne)); + for (; n < L - (Ne); n++) + window[n] = 1.0f; + for (i = Ne; n < L; n++, i--) + window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ne)); +} + void FLAC__window_welch(FLAC__real *window, const FLAC__int32 L) { const FLAC__int32 N = L - 1;