From 3878d2527dce22a8c66eddc16d8afa5fa3e6fac0 Mon Sep 17 00:00:00 2001 From: chudov Date: Tue, 14 Oct 2008 01:37:43 +0000 Subject: [PATCH] more SSE2 --- MAC_SDK/Source/MACLib/Assembly/Assembly.obj | Bin 836 -> 836 bytes MAC_SDK/Source/MACLib/Assembly/Assembly64.nas | 41 ++++++++---------- MAC_SDK/Source/MACLib/Assembly/Assembly64.obj | Bin 841 -> 825 bytes 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/MAC_SDK/Source/MACLib/Assembly/Assembly.obj b/MAC_SDK/Source/MACLib/Assembly/Assembly.obj index 85de600eb0a99cb06288d03520c17b3d4163f1d9..65a09fdff51cc9a8d098ef24ffa92c003cd0356a 100644 GIT binary patch delta 15 WcmX@Yc7%=1hmnz?^7BSEJ7xeRPy|f? delta 15 WcmX@Yc7%=1hmn!t*3OM=cFX`JbOdw& diff --git a/MAC_SDK/Source/MACLib/Assembly/Assembly64.nas b/MAC_SDK/Source/MACLib/Assembly/Assembly64.nas index bfe5017..751cc55 100644 --- a/MAC_SDK/Source/MACLib/Assembly/Assembly64.nas +++ b/MAC_SDK/Source/MACLib/Assembly/Assembly64.nas @@ -30,19 +30,16 @@ proc Adapt cmp r8d, byte 0 ; nDirection jle short AdaptSub + AdaptAddLoop: - movq mm0, [rcx] - paddw mm0, [rdx] - movq [rcx], mm0 - movq mm1, [rcx + 8] - paddw mm1, [rdx + 8] - movq [rcx + 8], mm1 - movq mm2, [rcx + 16] - paddw mm2, [rdx + 16] - movq [rcx + 16], mm2 - movq mm3, [rcx + 24] - paddw mm3, [rdx + 24] - movq [rcx + 24], mm3 + movdqu xmm0, [rcx] + movdqu xmm1, [rdx] + paddw xmm0, xmm1 + movdqu [rcx], xmm0 + movdqu xmm2, [rcx + 16] + movdqu xmm3, [rdx + 16] + paddw xmm2, xmm3 + movdqu [rcx + 16], xmm2 add rcx, byte 32 add rdx, byte 32 dec r9d @@ -70,18 +67,14 @@ AdaptAddLoop: AdaptSub: je short AdaptDone AdaptSubLoop: - movq mm0, [rcx] - psubw mm0, [rdx] - movq [rcx], mm0 - movq mm1, [rcx + 8] - psubw mm1, [rdx + 8] - movq [rcx + 8], mm1 - movq mm2, [rcx + 16] - psubw mm2, [rdx + 16] - movq [rcx + 16], mm2 - movq mm3, [rcx + 24] - psubw mm3, [rdx + 24] - movq [rcx + 24], mm3 + movdqu xmm0, [rcx] + movdqu xmm1, [rdx] + psubw xmm0, xmm1 + movdqu [rcx], xmm0 + movdqu xmm2, [rcx + 16] + movdqu xmm3, [rdx + 16] + psubw xmm2, xmm3 + movdqu [rcx + 16], xmm2 add rcx, byte 32 add rdx, byte 32 dec r9d diff --git a/MAC_SDK/Source/MACLib/Assembly/Assembly64.obj b/MAC_SDK/Source/MACLib/Assembly/Assembly64.obj index 5993c5db5be1c318d18367e58813060435e985d4..2b37dcd5f27a2c8c4947a66d4a72796dd1030221 100644 GIT binary patch delta 303 zcmX@fwv&xBrHzr{$LG%;ofA2wIei%!7;G3A7&<5FDw%kF=FeyR%%9Jd#{c)=Xa0H+ zJ5b;=e}0re8vozRpZV(p1w5J$DtI&>QgHl#vh)If`QeEZ#Z60$5E_4CX#9ywqvXUN z*BN~$UtmnEk7Hn9U}RumU}gXTK?Vkf1QZcD1_p*nC?XmR3=Hd_B1|An47L~|feZ`` nhoNei8C>#9+%oeLbJQ4-?TTe!U~m_L3PB9Y0|`w&#MBM|V*^Q9 delta 320 zcmdnVc9M-VrHzq+`{`$onG-psIb#_a7;G3A7-mk?RkDua&u8TS%fw&L$e-`Y!T;Bb zgTLOBgFin|fd6lh0Dpa;0Dpd@1pnVC3I6&>36JK33Lede6deDbEIl@{L2BXyHk%R~ zkP$!OM*KuF;wO?3Kd~AiGV#?_#@NXR7!&LB7#J8B85kIt89+dgfq|g_MMRE)fng1b zhz0`#!#=186G#(-Erv)S0|Ucls2XMlm;4g9%)G=LGe%^)Vi_11!iAth5Q9LD37@=) GsT}~6IYNm5