more SSE2

This commit is contained in:
chudov
2008-10-14 01:37:43 +00:00
parent f1f8e8e308
commit 3878d2527d
3 changed files with 17 additions and 24 deletions

View File

@@ -30,19 +30,16 @@ proc Adapt
cmp r8d, byte 0 ; nDirection cmp r8d, byte 0 ; nDirection
jle short AdaptSub jle short AdaptSub
AdaptAddLoop: AdaptAddLoop:
movq mm0, [rcx] movdqu xmm0, [rcx]
paddw mm0, [rdx] movdqu xmm1, [rdx]
movq [rcx], mm0 paddw xmm0, xmm1
movq mm1, [rcx + 8] movdqu [rcx], xmm0
paddw mm1, [rdx + 8] movdqu xmm2, [rcx + 16]
movq [rcx + 8], mm1 movdqu xmm3, [rdx + 16]
movq mm2, [rcx + 16] paddw xmm2, xmm3
paddw mm2, [rdx + 16] movdqu [rcx + 16], xmm2
movq [rcx + 16], mm2
movq mm3, [rcx + 24]
paddw mm3, [rdx + 24]
movq [rcx + 24], mm3
add rcx, byte 32 add rcx, byte 32
add rdx, byte 32 add rdx, byte 32
dec r9d dec r9d
@@ -70,18 +67,14 @@ AdaptAddLoop:
AdaptSub: je short AdaptDone AdaptSub: je short AdaptDone
AdaptSubLoop: AdaptSubLoop:
movq mm0, [rcx] movdqu xmm0, [rcx]
psubw mm0, [rdx] movdqu xmm1, [rdx]
movq [rcx], mm0 psubw xmm0, xmm1
movq mm1, [rcx + 8] movdqu [rcx], xmm0
psubw mm1, [rdx + 8] movdqu xmm2, [rcx + 16]
movq [rcx + 8], mm1 movdqu xmm3, [rdx + 16]
movq mm2, [rcx + 16] psubw xmm2, xmm3
psubw mm2, [rdx + 16] movdqu [rcx + 16], xmm2
movq [rcx + 16], mm2
movq mm3, [rcx + 24]
psubw mm3, [rdx + 24]
movq [rcx + 24], mm3
add rcx, byte 32 add rcx, byte 32
add rdx, byte 32 add rdx, byte 32
dec r9d dec r9d