optimized

This commit is contained in:
chudov
2008-10-14 01:45:15 +00:00
parent 3878d2527d
commit 8b7ab762d5

View File

@@ -31,15 +31,18 @@ proc Adapt
jle short AdaptSub jle short AdaptSub
mov r8, rdx
; and edx, 0xfffffff0
and r8b, 0xf
AdaptAddLoop: AdaptAddLoop:
movdqu xmm0, [rcx] movdqa xmm0, [rcx]
movdqu xmm1, [rdx] movdqu xmm1, [rdx]
paddw xmm0, xmm1 paddw xmm0, xmm1
movdqu [rcx], xmm0 movdqa [rcx], xmm0
movdqu xmm2, [rcx + 16] movdqa xmm2, [rcx + 16]
movdqu xmm3, [rdx + 16] movdqu xmm3, [rdx + 16]
paddw xmm2, xmm3 paddw xmm2, xmm3
movdqu [rcx + 16], xmm2 movdqa [rcx + 16], xmm2
add rcx, byte 32 add rcx, byte 32
add rdx, byte 32 add rdx, byte 32
dec r9d dec r9d
@@ -67,14 +70,14 @@ AdaptAddLoop:
AdaptSub: je short AdaptDone AdaptSub: je short AdaptDone
AdaptSubLoop: AdaptSubLoop:
movdqu xmm0, [rcx] movdqa xmm0, [rcx]
movdqu xmm1, [rdx] movdqu xmm1, [rdx]
psubw xmm0, xmm1 psubw xmm0, xmm1
movdqu [rcx], xmm0 movdqa [rcx], xmm0
movdqu xmm2, [rcx + 16] movdqa xmm2, [rcx + 16]
movdqu xmm3, [rdx + 16] movdqu xmm3, [rdx + 16]
psubw xmm2, xmm3 psubw xmm2, xmm3
movdqu [rcx + 16], xmm2 movdqa [rcx + 16], xmm2
add rcx, byte 32 add rcx, byte 32
add rdx, byte 32 add rdx, byte 32
dec r9d dec r9d