From 70b078cfd5f9d4b0692c33f018cac3c652b14f90 Mon Sep 17 00:00:00 2001
From: Erik de Castro Lopo <erikd@mega-nerd.com>
Date: Fri, 21 Mar 2014 19:25:55 +1100
Subject: [PATCH] Attempt to fix differences between x86 FPU and SSE
 calculations.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The x86 FPU holds intermediate results in larger registers than what
the SSE unit uses, resulting in slighlty different encodings of audio
data. Attempt to fix this by modifying libFLAC/lpc.c to store calculation
results in a FLAC__read before adding it to a sum.

At the moment this works, but I could easily imagine a new version of
the compiler optimising this store to the FLAC__real away leaving us
in the same situation we have now.

Patch-from: Oliver Stöneberg on sourceforge.net
Closes: https://sourceforge.net/p/flac/bugs/409/
---
 src/libFLAC/lpc.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/libFLAC/lpc.c b/src/libFLAC/lpc.c
index 22aab4a4..de56f52d 100644
--- a/src/libFLAC/lpc.c
+++ b/src/libFLAC/lpc.c
@@ -99,7 +99,7 @@ void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], unsigned data_le
 	 * this version tends to run faster because of better data locality
 	 * ('data_len' is usually much larger than 'lag')
 	 */
-	FLAC__real d;
+	FLAC__real d, tmp;
 	unsigned sample, coeff;
 	const unsigned limit = data_len - lag;
 
@@ -110,13 +110,17 @@ void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], unsigned data_le
 		autoc[coeff] = 0.0;
 	for(sample = 0; sample <= limit; sample++) {
 		d = data[sample];
-		for(coeff = 0; coeff < lag; coeff++)
-			autoc[coeff] += d * data[sample+coeff];
+		for(coeff = 0; coeff < lag; coeff++) {
+			tmp = d * data[sample+coeff];
+			autoc[coeff] += tmp;
+		}
 	}
 	for(; sample < data_len; sample++) {
 		d = data[sample];
-		for(coeff = 0; coeff < data_len - sample; coeff++)
-			autoc[coeff] += d * data[sample+coeff];
+		for(coeff = 0; coeff < data_len - sample; coeff++) {
+			tmp = d * data[sample+coeff];
+			autoc[coeff] += tmp;
+		}
 	}
 }