lpc_asm.nasm : More 'mov cl' -> 'mov ecx' fixes.

According to Agner Fog in optimizing_assembly.pdf: "... write to a partial register may result in false dependencies between instructions, so it is better to avoid it." Patch-from: lvqcl <lvqcl.mail@gmail.com>
2025-12-16 18:54:26 +00:00 · 2014-01-18 07:31:41 +11:00
parent 7e9278934e
commit c2747bec1c
1 changed files with 10 additions and 8 deletions
--- a/src/libFLAC/ia32/lpc_asm.nasm
+++ b/src/libFLAC/ia32/lpc_asm.nasm
@@ -872,7 +872,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
 	mov	ecx, [esp + 28]
 	mov	edx, [ecx]			; edx = qlp_coeff[0]
 	mov	eax, [esi - 4]			; eax = data[-1]
-	mov	cl, [esp + 36]			; cl = lp_quantization
+	mov	ecx, [esp + 36]			; cl = lp_quantization
 	ALIGN	16
 .i_1_loop_i:
 	imul	eax, edx
@@ -910,7 +910,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
 	inc	ecx
 	jnz	short .i_32more_loop_j

-	mov	cl, [esp + 36]
+	mov	ecx, [esp + 36]
 	sar	ebp, cl
 	neg	ebp
 	add	ebp, [esi]
@@ -1037,7 +1037,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
 	add	ebp, ecx
 .jumper_0:

-	mov	cl, [esp + 36]
+	mov	ecx, [esp + 36]
 	sar	ebp, cl
 	neg	ebp
 	add	ebp, [esi]
@@ -1282,7 +1282,7 @@ cident FLAC__lpc_restore_signal_asm_ia32
 	mov	ecx, [esp + 28]
 	mov	edx, [ecx]
 	mov	eax, [edi - 4]
-	mov	cl, [esp + 36]
+	mov	ecx, [esp + 36]
 	ALIGN	16
 .x87_1_loop_i:
 	imul	eax, edx
@@ -1318,7 +1318,7 @@ cident FLAC__lpc_restore_signal_asm_ia32
 	inc	ecx
 	jnz	short .x87_32more_loop_j

-	mov	cl, [esp + 36]
+	mov	ecx, [esp + 36]
 	sar	ebp, cl
 	add	ebp, [esi]
 	mov	[edi], ebp
@@ -1444,7 +1444,7 @@ cident FLAC__lpc_restore_signal_asm_ia32
 	add	ebp, ecx			; sum += qlp_coeff[ 0] * data[i- 1]
 .jumper_0:

-	mov	cl, [esp + 36]
+	mov	ecx, [esp + 36]
 	sar	ebp, cl				; ebp = (sum >> lp_quantization)
 	add	ebp, [esi + edi]		; ebp = residual[i] + (sum >> lp_quantization)
 	mov	[edi], ebp			; data[i] = residual[i] + (sum >> lp_quantization)
@@ -1633,6 +1633,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32

 	;ASSERT(order > 0)
 	;ASSERT(order <= 32)
+	;ASSERT(lp_quantization <= 31)

 	push	ebp
 	push	ebx
@@ -1653,7 +1654,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
 	mov	ecx, [esp + 28]			; ecx = qlp_coeff[]
 	mov	ebp, [ecx]				; ebp = qlp_coeff[0]
 	mov	eax, [edi - 4]			; eax = data[-1]
-	mov	cl, [esp + 36]			; cl = lp_quantization
+	mov	ecx, [esp + 36]			; cl = lp_quantization
 	ALIGN	16
 .i_1_loop_i:
 	imul	ebp					; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1]
@@ -1913,6 +1914,7 @@ cident FLAC__lpc_restore_signal_wide_asm_ia32

 	;ASSERT(order > 0)
 	;ASSERT(order <= 32)
+	;ASSERT(lp_quantization <= 31)

 	push	ebp
 	push	ebx
@@ -1933,7 +1935,7 @@ cident FLAC__lpc_restore_signal_wide_asm_ia32
 	mov	ecx, [esp + 28]			; ecx = qlp_coeff[]
 	mov	ebp, [ecx]				; ebp = qlp_coeff[0]
 	mov	eax, [edi - 4]			; eax = data[-1]
-	mov	cl, [esp + 36]			; cl = lp_quantization
+	mov	ecx, [esp + 36]			; cl = lp_quantization
 	ALIGN	16
 .x87_1_loop_i:
 	imul	ebp					; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1]