From 8e4a45ac863a63e4f6256be487e71e852f22fbba Mon Sep 17 00:00:00 2001
From: Erik de Castro Lopo <erikd@mega-nerd.com>
Date: Tue, 7 Jan 2014 21:25:03 +1100
Subject: [PATCH] libFLAC/ia32/lpc_asm.nasm : Match calls and returns.

According to Agner Fog, "...you must make sure that all calls
are matched with returns. Never jump out of a subroutine without
a return and never use a return as an indirect jump."

(see paragraph 3.15 in microarchitecture.pdf and
examples 3.5a and 3.5b in optimizing_assembly.pdf)

Patch-from: lvqcl <lvqcl.mail@gmail.com>
---
 src/libFLAC/ia32/lpc_asm.nasm | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/libFLAC/ia32/lpc_asm.nasm b/src/libFLAC/ia32/lpc_asm.nasm
index 66d07e2d..2b7a8019 100644
--- a/src/libFLAC/ia32/lpc_asm.nasm
+++ b/src/libFLAC/ia32/lpc_asm.nasm
@@ -115,9 +115,8 @@ cident FLAC__lpc_compute_autocorrelation_asm_ia32
 	lea	edx, [eax + eax*2]
 	neg	edx
 	lea	edx, [eax + edx*4 + .jumper1_0 - .get_eip1]
-	call	.get_eip1
+	call	.mov_eip_to_ebx
 .get_eip1:
-	pop	ebx
 	add	edx, ebx
 	inc	edx				; compensate for the shorter opcode on the last iteration
 	inc	edx				; compensate for the shorter opcode on the last iteration
@@ -128,6 +127,10 @@ cident FLAC__lpc_compute_autocorrelation_asm_ia32
 .loop1_start:
 	jmp	edx
 
+.mov_eip_to_ebx:
+	mov ebx, [esp]
+	ret
+
 	fld	st0				; ST = d d
 	fmul	dword [esi + (32*4)]		; ST = d*data[sample+32] d		WATCHOUT: not a byte displacement here!
 	fadd	dword [edi + (32*4)]		; ST = autoc[32]+d*data[sample+32] d	WATCHOUT: not a byte displacement here!
@@ -285,9 +288,8 @@ cident FLAC__lpc_compute_autocorrelation_asm_ia32
 	lea	edx, [eax + eax*2]
 	neg	edx
 	lea	edx, [eax + edx*4 + .jumper2_0 - .get_eip2]
-	call	.get_eip2
+	call	.mov_eip_to_ebx
 .get_eip2:
-	pop	ebx
 	add	edx, ebx
 	inc	edx				; compensate for the shorter opcode on the last iteration
 	inc	edx				; compensate for the shorter opcode on the last iteration
@@ -919,13 +921,16 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
 
 	jmp	.end
 
+.mov_eip_to_eax:
+	mov eax, [esp]
+	ret
+
 .i_32:
 	sub	edi, esi
 	neg	eax
 	lea	edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
-	call	.get_eip0
+	call	.mov_eip_to_eax
 .get_eip0:
-	pop	eax
 	add	edx, eax
 	inc	edx
 	mov	eax, [esp + 28]			; eax = qlp_coeff[]
@@ -1323,13 +1328,16 @@ cident FLAC__lpc_restore_signal_asm_ia32
 
 	jmp	.end
 
+.mov_eip_to_eax:
+	mov eax, [esp]
+	ret
+
 .x87_32:
 	sub	esi, edi
 	neg	eax
 	lea	edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
-	call	.get_eip0
+	call	.mov_eip_to_eax
 .get_eip0:
-	pop	eax
 	add	edx, eax
 	inc	edx				; compensate for the shorter opcode on the last iteration
 	mov	eax, [esp + 28]			; eax = qlp_coeff[]