lpc_asm.nasm : More 'mov cl' -> 'mov ecx' fixes.

According to Agner Fog in optimizing_assembly.pdf:

  "... write to a partial register may result in false dependencies
   between instructions, so it is better to avoid it."

Patch-from: lvqcl <lvqcl.mail@gmail.com>
This commit is contained in:
Erik de Castro Lopo
2014-01-18 07:31:41 +11:00
parent 7e9278934e
commit c2747bec1c

View File

@@ -872,7 +872,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
mov ecx, [esp + 28]
mov edx, [ecx] ; edx = qlp_coeff[0]
mov eax, [esi - 4] ; eax = data[-1]
mov cl, [esp + 36] ; cl = lp_quantization
mov ecx, [esp + 36] ; cl = lp_quantization
ALIGN 16
.i_1_loop_i:
imul eax, edx
@@ -910,7 +910,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
inc ecx
jnz short .i_32more_loop_j
mov cl, [esp + 36]
mov ecx, [esp + 36]
sar ebp, cl
neg ebp
add ebp, [esi]
@@ -1037,7 +1037,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
add ebp, ecx
.jumper_0:
mov cl, [esp + 36]
mov ecx, [esp + 36]
sar ebp, cl
neg ebp
add ebp, [esi]
@@ -1282,7 +1282,7 @@ cident FLAC__lpc_restore_signal_asm_ia32
mov ecx, [esp + 28]
mov edx, [ecx]
mov eax, [edi - 4]
mov cl, [esp + 36]
mov ecx, [esp + 36]
ALIGN 16
.x87_1_loop_i:
imul eax, edx
@@ -1318,7 +1318,7 @@ cident FLAC__lpc_restore_signal_asm_ia32
inc ecx
jnz short .x87_32more_loop_j
mov cl, [esp + 36]
mov ecx, [esp + 36]
sar ebp, cl
add ebp, [esi]
mov [edi], ebp
@@ -1444,7 +1444,7 @@ cident FLAC__lpc_restore_signal_asm_ia32
add ebp, ecx ; sum += qlp_coeff[ 0] * data[i- 1]
.jumper_0:
mov cl, [esp + 36]
mov ecx, [esp + 36]
sar ebp, cl ; ebp = (sum >> lp_quantization)
add ebp, [esi + edi] ; ebp = residual[i] + (sum >> lp_quantization)
mov [edi], ebp ; data[i] = residual[i] + (sum >> lp_quantization)
@@ -1633,6 +1633,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
;ASSERT(order > 0)
;ASSERT(order <= 32)
;ASSERT(lp_quantization <= 31)
push ebp
push ebx
@@ -1653,7 +1654,7 @@ cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
mov ecx, [esp + 28] ; ecx = qlp_coeff[]
mov ebp, [ecx] ; ebp = qlp_coeff[0]
mov eax, [edi - 4] ; eax = data[-1]
mov cl, [esp + 36] ; cl = lp_quantization
mov ecx, [esp + 36] ; cl = lp_quantization
ALIGN 16
.i_1_loop_i:
imul ebp ; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1]
@@ -1913,6 +1914,7 @@ cident FLAC__lpc_restore_signal_wide_asm_ia32
;ASSERT(order > 0)
;ASSERT(order <= 32)
;ASSERT(lp_quantization <= 31)
push ebp
push ebx
@@ -1933,7 +1935,7 @@ cident FLAC__lpc_restore_signal_wide_asm_ia32
mov ecx, [esp + 28] ; ecx = qlp_coeff[]
mov ebp, [ecx] ; ebp = qlp_coeff[0]
mov eax, [edi - 4] ; eax = data[-1]
mov cl, [esp + 36] ; cl = lp_quantization
mov ecx, [esp + 36] ; cl = lp_quantization
ALIGN 16
.x87_1_loop_i:
imul ebp ; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1]