mirror of
https://github.com/aaru-dps/libaaruformat.git
synced 2025-12-16 11:14:39 +00:00
Add files that were wrongly ignored by .gitignore.
This commit is contained in:
271
3rdparty/lzma-21.03beta/Asm/x86/7zAsm.asm
vendored
Normal file
271
3rdparty/lzma-21.03beta/Asm/x86/7zAsm.asm
vendored
Normal file
@@ -0,0 +1,271 @@
|
||||
; 7zAsm.asm -- ASM macros
|
||||
; 2021-07-13 : Igor Pavlov : Public domain
|
||||
|
||||
ifdef RAX
|
||||
x64 equ 1
|
||||
endif
|
||||
|
||||
ifdef x64
|
||||
IS_X64 equ 1
|
||||
else
|
||||
IS_X64 equ 0
|
||||
endif
|
||||
|
||||
ifdef ABI_LINUX
|
||||
IS_LINUX equ 1
|
||||
else
|
||||
IS_LINUX equ 0
|
||||
endif
|
||||
|
||||
ifndef x64
|
||||
; Use ABI_CDECL for x86 (32-bit) only
|
||||
; if ABI_CDECL is not defined, we use fastcall abi
|
||||
ifdef ABI_CDECL
|
||||
IS_CDECL equ 1
|
||||
else
|
||||
IS_CDECL equ 0
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
MY_ASM_START macro
|
||||
ifdef x64
|
||||
.code
|
||||
else
|
||||
.386
|
||||
.model flat
|
||||
_TEXT$00 SEGMENT PARA PUBLIC 'CODE'
|
||||
endif
|
||||
endm
|
||||
|
||||
MY_PROC macro name:req, numParams:req
|
||||
align 16
|
||||
proc_numParams = numParams
|
||||
if (IS_X64 gt 0)
|
||||
proc_name equ name
|
||||
elseif (IS_LINUX gt 0)
|
||||
proc_name equ name
|
||||
elseif (IS_CDECL gt 0)
|
||||
proc_name equ @CatStr(_,name)
|
||||
else
|
||||
proc_name equ @CatStr(@,name,@, %numParams * 4)
|
||||
endif
|
||||
proc_name PROC
|
||||
endm
|
||||
|
||||
MY_ENDP macro
|
||||
if (IS_X64 gt 0)
|
||||
ret
|
||||
elseif (IS_CDECL gt 0)
|
||||
ret
|
||||
elseif (proc_numParams LT 3)
|
||||
ret
|
||||
else
|
||||
ret (proc_numParams - 2) * 4
|
||||
endif
|
||||
proc_name ENDP
|
||||
endm
|
||||
|
||||
|
||||
ifdef x64
|
||||
REG_SIZE equ 8
|
||||
REG_LOGAR_SIZE equ 3
|
||||
else
|
||||
REG_SIZE equ 4
|
||||
REG_LOGAR_SIZE equ 2
|
||||
endif
|
||||
|
||||
x0 equ EAX
|
||||
x1 equ ECX
|
||||
x2 equ EDX
|
||||
x3 equ EBX
|
||||
x4 equ ESP
|
||||
x5 equ EBP
|
||||
x6 equ ESI
|
||||
x7 equ EDI
|
||||
|
||||
x0_W equ AX
|
||||
x1_W equ CX
|
||||
x2_W equ DX
|
||||
x3_W equ BX
|
||||
|
||||
x5_W equ BP
|
||||
x6_W equ SI
|
||||
x7_W equ DI
|
||||
|
||||
x0_L equ AL
|
||||
x1_L equ CL
|
||||
x2_L equ DL
|
||||
x3_L equ BL
|
||||
|
||||
x0_H equ AH
|
||||
x1_H equ CH
|
||||
x2_H equ DH
|
||||
x3_H equ BH
|
||||
|
||||
ifdef x64
|
||||
x5_L equ BPL
|
||||
x6_L equ SIL
|
||||
x7_L equ DIL
|
||||
|
||||
r0 equ RAX
|
||||
r1 equ RCX
|
||||
r2 equ RDX
|
||||
r3 equ RBX
|
||||
r4 equ RSP
|
||||
r5 equ RBP
|
||||
r6 equ RSI
|
||||
r7 equ RDI
|
||||
x8 equ r8d
|
||||
x9 equ r9d
|
||||
x10 equ r10d
|
||||
x11 equ r11d
|
||||
x12 equ r12d
|
||||
x13 equ r13d
|
||||
x14 equ r14d
|
||||
x15 equ r15d
|
||||
else
|
||||
r0 equ x0
|
||||
r1 equ x1
|
||||
r2 equ x2
|
||||
r3 equ x3
|
||||
r4 equ x4
|
||||
r5 equ x5
|
||||
r6 equ x6
|
||||
r7 equ x7
|
||||
endif
|
||||
|
||||
|
||||
ifdef x64
|
||||
ifdef ABI_LINUX
|
||||
|
||||
MY_PUSH_2_REGS macro
|
||||
push r3
|
||||
push r5
|
||||
endm
|
||||
|
||||
MY_POP_2_REGS macro
|
||||
pop r5
|
||||
pop r3
|
||||
endm
|
||||
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
MY_PUSH_4_REGS macro
|
||||
push r3
|
||||
push r5
|
||||
push r6
|
||||
push r7
|
||||
endm
|
||||
|
||||
MY_POP_4_REGS macro
|
||||
pop r7
|
||||
pop r6
|
||||
pop r5
|
||||
pop r3
|
||||
endm
|
||||
|
||||
|
||||
; for fastcall and for WIN-x64
|
||||
REG_PARAM_0_x equ x1
|
||||
REG_PARAM_0 equ r1
|
||||
REG_PARAM_1_x equ x2
|
||||
REG_PARAM_1 equ r2
|
||||
|
||||
ifndef x64
|
||||
; for x86-fastcall
|
||||
|
||||
REG_ABI_PARAM_0_x equ REG_PARAM_0_x
|
||||
REG_ABI_PARAM_0 equ REG_PARAM_0
|
||||
REG_ABI_PARAM_1_x equ REG_PARAM_1_x
|
||||
REG_ABI_PARAM_1 equ REG_PARAM_1
|
||||
|
||||
else
|
||||
; x64
|
||||
|
||||
if (IS_LINUX eq 0)
|
||||
|
||||
; for WIN-x64:
|
||||
REG_PARAM_2_x equ x8
|
||||
REG_PARAM_2 equ r8
|
||||
REG_PARAM_3 equ r9
|
||||
|
||||
REG_ABI_PARAM_0_x equ REG_PARAM_0_x
|
||||
REG_ABI_PARAM_0 equ REG_PARAM_0
|
||||
REG_ABI_PARAM_1_x equ REG_PARAM_1_x
|
||||
REG_ABI_PARAM_1 equ REG_PARAM_1
|
||||
REG_ABI_PARAM_2_x equ REG_PARAM_2_x
|
||||
REG_ABI_PARAM_2 equ REG_PARAM_2
|
||||
REG_ABI_PARAM_3 equ REG_PARAM_3
|
||||
|
||||
else
|
||||
; for LINUX-x64:
|
||||
REG_LINUX_PARAM_0_x equ x7
|
||||
REG_LINUX_PARAM_0 equ r7
|
||||
REG_LINUX_PARAM_1_x equ x6
|
||||
REG_LINUX_PARAM_1 equ r6
|
||||
REG_LINUX_PARAM_2 equ r2
|
||||
REG_LINUX_PARAM_3 equ r1
|
||||
REG_LINUX_PARAM_4_x equ x8
|
||||
REG_LINUX_PARAM_4 equ r8
|
||||
REG_LINUX_PARAM_5 equ r9
|
||||
|
||||
REG_ABI_PARAM_0_x equ REG_LINUX_PARAM_0_x
|
||||
REG_ABI_PARAM_0 equ REG_LINUX_PARAM_0
|
||||
REG_ABI_PARAM_1_x equ REG_LINUX_PARAM_1_x
|
||||
REG_ABI_PARAM_1 equ REG_LINUX_PARAM_1
|
||||
REG_ABI_PARAM_2 equ REG_LINUX_PARAM_2
|
||||
REG_ABI_PARAM_3 equ REG_LINUX_PARAM_3
|
||||
REG_ABI_PARAM_4_x equ REG_LINUX_PARAM_4_x
|
||||
REG_ABI_PARAM_4 equ REG_LINUX_PARAM_4
|
||||
REG_ABI_PARAM_5 equ REG_LINUX_PARAM_5
|
||||
|
||||
MY_ABI_LINUX_TO_WIN_2 macro
|
||||
mov r2, r6
|
||||
mov r1, r7
|
||||
endm
|
||||
|
||||
MY_ABI_LINUX_TO_WIN_3 macro
|
||||
mov r8, r2
|
||||
mov r2, r6
|
||||
mov r1, r7
|
||||
endm
|
||||
|
||||
MY_ABI_LINUX_TO_WIN_4 macro
|
||||
mov r9, r1
|
||||
mov r8, r2
|
||||
mov r2, r6
|
||||
mov r1, r7
|
||||
endm
|
||||
|
||||
endif ; IS_LINUX
|
||||
|
||||
|
||||
MY_PUSH_PRESERVED_ABI_REGS macro
|
||||
if (IS_LINUX gt 0)
|
||||
MY_PUSH_2_REGS
|
||||
else
|
||||
MY_PUSH_4_REGS
|
||||
endif
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
endm
|
||||
|
||||
|
||||
MY_POP_PRESERVED_ABI_REGS macro
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
if (IS_LINUX gt 0)
|
||||
MY_POP_2_REGS
|
||||
else
|
||||
MY_POP_4_REGS
|
||||
endif
|
||||
endm
|
||||
|
||||
endif ; x64
|
||||
180
3rdparty/lzma-21.03beta/Asm/x86/7zCrcOpt.asm
vendored
Normal file
180
3rdparty/lzma-21.03beta/Asm/x86/7zCrcOpt.asm
vendored
Normal file
@@ -0,0 +1,180 @@
|
||||
; 7zCrcOpt.asm -- CRC32 calculation : optimized version
|
||||
; 2021-02-07 : Igor Pavlov : Public domain
|
||||
|
||||
include 7zAsm.asm
|
||||
|
||||
MY_ASM_START
|
||||
|
||||
rD equ r2
|
||||
rN equ r7
|
||||
rT equ r5
|
||||
|
||||
ifdef x64
|
||||
num_VAR equ r8
|
||||
table_VAR equ r9
|
||||
else
|
||||
if (IS_CDECL gt 0)
|
||||
crc_OFFS equ (REG_SIZE * 5)
|
||||
data_OFFS equ (REG_SIZE + crc_OFFS)
|
||||
size_OFFS equ (REG_SIZE + data_OFFS)
|
||||
else
|
||||
size_OFFS equ (REG_SIZE * 5)
|
||||
endif
|
||||
table_OFFS equ (REG_SIZE + size_OFFS)
|
||||
num_VAR equ [r4 + size_OFFS]
|
||||
table_VAR equ [r4 + table_OFFS]
|
||||
endif
|
||||
|
||||
SRCDAT equ rD + rN * 1 + 4 *
|
||||
|
||||
CRC macro op:req, dest:req, src:req, t:req
|
||||
op dest, DWORD PTR [rT + src * 4 + 0400h * t]
|
||||
endm
|
||||
|
||||
CRC_XOR macro dest:req, src:req, t:req
|
||||
CRC xor, dest, src, t
|
||||
endm
|
||||
|
||||
CRC_MOV macro dest:req, src:req, t:req
|
||||
CRC mov, dest, src, t
|
||||
endm
|
||||
|
||||
CRC1b macro
|
||||
movzx x6, BYTE PTR [rD]
|
||||
inc rD
|
||||
movzx x3, x0_L
|
||||
xor x6, x3
|
||||
shr x0, 8
|
||||
CRC xor, x0, r6, 0
|
||||
dec rN
|
||||
endm
|
||||
|
||||
MY_PROLOG macro crc_end:req
|
||||
|
||||
ifdef x64
|
||||
if (IS_LINUX gt 0)
|
||||
MY_PUSH_2_REGS
|
||||
mov x0, REG_ABI_PARAM_0_x ; x0 = x7
|
||||
mov rT, REG_ABI_PARAM_3 ; r5 = r1
|
||||
mov rN, REG_ABI_PARAM_2 ; r7 = r2
|
||||
mov rD, REG_ABI_PARAM_1 ; r2 = r6
|
||||
else
|
||||
MY_PUSH_4_REGS
|
||||
mov x0, REG_ABI_PARAM_0_x ; x0 = x1
|
||||
mov rT, REG_ABI_PARAM_3 ; r5 = r9
|
||||
mov rN, REG_ABI_PARAM_2 ; r7 = r8
|
||||
; mov rD, REG_ABI_PARAM_1 ; r2 = r2
|
||||
endif
|
||||
else
|
||||
MY_PUSH_4_REGS
|
||||
if (IS_CDECL gt 0)
|
||||
mov x0, [r4 + crc_OFFS]
|
||||
mov rD, [r4 + data_OFFS]
|
||||
else
|
||||
mov x0, REG_ABI_PARAM_0_x
|
||||
endif
|
||||
mov rN, num_VAR
|
||||
mov rT, table_VAR
|
||||
endif
|
||||
|
||||
test rN, rN
|
||||
jz crc_end
|
||||
@@:
|
||||
test rD, 7
|
||||
jz @F
|
||||
CRC1b
|
||||
jnz @B
|
||||
@@:
|
||||
cmp rN, 16
|
||||
jb crc_end
|
||||
add rN, rD
|
||||
mov num_VAR, rN
|
||||
sub rN, 8
|
||||
and rN, NOT 7
|
||||
sub rD, rN
|
||||
xor x0, [SRCDAT 0]
|
||||
endm
|
||||
|
||||
MY_EPILOG macro crc_end:req
|
||||
xor x0, [SRCDAT 0]
|
||||
mov rD, rN
|
||||
mov rN, num_VAR
|
||||
sub rN, rD
|
||||
crc_end:
|
||||
test rN, rN
|
||||
jz @F
|
||||
CRC1b
|
||||
jmp crc_end
|
||||
@@:
|
||||
if (IS_X64 gt 0) and (IS_LINUX gt 0)
|
||||
MY_POP_2_REGS
|
||||
else
|
||||
MY_POP_4_REGS
|
||||
endif
|
||||
endm
|
||||
|
||||
MY_PROC CrcUpdateT8, 4
|
||||
MY_PROLOG crc_end_8
|
||||
mov x1, [SRCDAT 1]
|
||||
align 16
|
||||
main_loop_8:
|
||||
mov x6, [SRCDAT 2]
|
||||
movzx x3, x1_L
|
||||
CRC_XOR x6, r3, 3
|
||||
movzx x3, x1_H
|
||||
CRC_XOR x6, r3, 2
|
||||
shr x1, 16
|
||||
movzx x3, x1_L
|
||||
movzx x1, x1_H
|
||||
CRC_XOR x6, r3, 1
|
||||
movzx x3, x0_L
|
||||
CRC_XOR x6, r1, 0
|
||||
|
||||
mov x1, [SRCDAT 3]
|
||||
CRC_XOR x6, r3, 7
|
||||
movzx x3, x0_H
|
||||
shr x0, 16
|
||||
CRC_XOR x6, r3, 6
|
||||
movzx x3, x0_L
|
||||
CRC_XOR x6, r3, 5
|
||||
movzx x3, x0_H
|
||||
CRC_MOV x0, r3, 4
|
||||
xor x0, x6
|
||||
add rD, 8
|
||||
jnz main_loop_8
|
||||
|
||||
MY_EPILOG crc_end_8
|
||||
MY_ENDP
|
||||
|
||||
MY_PROC CrcUpdateT4, 4
|
||||
MY_PROLOG crc_end_4
|
||||
align 16
|
||||
main_loop_4:
|
||||
movzx x1, x0_L
|
||||
movzx x3, x0_H
|
||||
shr x0, 16
|
||||
movzx x6, x0_H
|
||||
and x0, 0FFh
|
||||
CRC_MOV x1, r1, 3
|
||||
xor x1, [SRCDAT 1]
|
||||
CRC_XOR x1, r3, 2
|
||||
CRC_XOR x1, r6, 0
|
||||
CRC_XOR x1, r0, 1
|
||||
|
||||
movzx x0, x1_L
|
||||
movzx x3, x1_H
|
||||
shr x1, 16
|
||||
movzx x6, x1_H
|
||||
and x1, 0FFh
|
||||
CRC_MOV x0, r0, 3
|
||||
xor x0, [SRCDAT 2]
|
||||
CRC_XOR x0, r3, 2
|
||||
CRC_XOR x0, r6, 0
|
||||
CRC_XOR x0, r1, 1
|
||||
add rD, 8
|
||||
jnz main_loop_4
|
||||
|
||||
MY_EPILOG crc_end_4
|
||||
MY_ENDP
|
||||
|
||||
end
|
||||
734
3rdparty/lzma-21.03beta/Asm/x86/AesOpt.asm
vendored
Normal file
734
3rdparty/lzma-21.03beta/Asm/x86/AesOpt.asm
vendored
Normal file
@@ -0,0 +1,734 @@
|
||||
; AesOpt.asm -- AES optimized code for x86 AES hardware instructions
|
||||
; 2021-03-10 : Igor Pavlov : Public domain
|
||||
|
||||
include 7zAsm.asm
|
||||
|
||||
ifdef ymm0
|
||||
use_vaes_256 equ 1
|
||||
ECHO "++ VAES 256"
|
||||
else
|
||||
ECHO "-- NO VAES 256"
|
||||
endif
|
||||
|
||||
ifdef x64
|
||||
ECHO "x86-64"
|
||||
else
|
||||
ECHO "x86"
|
||||
if (IS_CDECL gt 0)
|
||||
ECHO "ABI : CDECL"
|
||||
else
|
||||
ECHO "ABI : no CDECL : FASTCALL"
|
||||
endif
|
||||
endif
|
||||
|
||||
if (IS_LINUX gt 0)
|
||||
ECHO "ABI : LINUX"
|
||||
else
|
||||
ECHO "ABI : WINDOWS"
|
||||
endif
|
||||
|
||||
MY_ASM_START
|
||||
|
||||
ifndef x64
|
||||
.686
|
||||
.xmm
|
||||
endif
|
||||
|
||||
|
||||
; MY_ALIGN EQU ALIGN(64)
|
||||
MY_ALIGN EQU
|
||||
|
||||
SEG_ALIGN EQU MY_ALIGN
|
||||
|
||||
MY_SEG_PROC macro name:req, numParams:req
|
||||
; seg_name equ @CatStr(_TEXT$, name)
|
||||
; seg_name SEGMENT SEG_ALIGN 'CODE'
|
||||
MY_PROC name, numParams
|
||||
endm
|
||||
|
||||
MY_SEG_ENDP macro
|
||||
; seg_name ENDS
|
||||
endm
|
||||
|
||||
|
||||
NUM_AES_KEYS_MAX equ 15
|
||||
|
||||
; the number of push operators in function PROLOG
|
||||
if (IS_LINUX eq 0) or (IS_X64 eq 0)
|
||||
num_regs_push equ 2
|
||||
stack_param_offset equ (REG_SIZE * (1 + num_regs_push))
|
||||
endif
|
||||
|
||||
ifdef x64
|
||||
num_param equ REG_ABI_PARAM_2
|
||||
else
|
||||
if (IS_CDECL gt 0)
|
||||
; size_t size
|
||||
; void * data
|
||||
; UInt32 * aes
|
||||
; ret-ip <- (r4)
|
||||
aes_OFFS equ (stack_param_offset)
|
||||
data_OFFS equ (REG_SIZE + aes_OFFS)
|
||||
size_OFFS equ (REG_SIZE + data_OFFS)
|
||||
num_param equ [r4 + size_OFFS]
|
||||
else
|
||||
num_param equ [r4 + stack_param_offset]
|
||||
endif
|
||||
endif
|
||||
|
||||
keys equ REG_PARAM_0 ; r1
|
||||
rD equ REG_PARAM_1 ; r2
|
||||
rN equ r0
|
||||
|
||||
koffs_x equ x7
|
||||
koffs_r equ r7
|
||||
|
||||
ksize_x equ x6
|
||||
ksize_r equ r6
|
||||
|
||||
keys2 equ r3
|
||||
|
||||
state equ xmm0
|
||||
key equ xmm0
|
||||
key_ymm equ ymm0
|
||||
key_ymm_n equ 0
|
||||
|
||||
ifdef x64
|
||||
ways = 11
|
||||
else
|
||||
ways = 4
|
||||
endif
|
||||
|
||||
ways_start_reg equ 1
|
||||
|
||||
iv equ @CatStr(xmm, %(ways_start_reg + ways))
|
||||
iv_ymm equ @CatStr(ymm, %(ways_start_reg + ways))
|
||||
|
||||
|
||||
WOP macro op, op2
|
||||
i = 0
|
||||
rept ways
|
||||
op @CatStr(xmm, %(ways_start_reg + i)), op2
|
||||
i = i + 1
|
||||
endm
|
||||
endm
|
||||
|
||||
|
||||
ifndef ABI_LINUX
|
||||
ifdef x64
|
||||
|
||||
; we use 32 bytes of home space in stack in WIN64-x64
|
||||
NUM_HOME_MM_REGS equ (32 / 16)
|
||||
; we preserve xmm registers starting from xmm6 in WIN64-x64
|
||||
MM_START_SAVE_REG equ 6
|
||||
|
||||
SAVE_XMM macro num_used_mm_regs:req
|
||||
num_save_mm_regs = num_used_mm_regs - MM_START_SAVE_REG
|
||||
if num_save_mm_regs GT 0
|
||||
num_save_mm_regs2 = num_save_mm_regs - NUM_HOME_MM_REGS
|
||||
; RSP is (16*x + 8) after entering the function in WIN64-x64
|
||||
stack_offset = 16 * num_save_mm_regs2 + (stack_param_offset mod 16)
|
||||
|
||||
i = 0
|
||||
rept num_save_mm_regs
|
||||
|
||||
if i eq NUM_HOME_MM_REGS
|
||||
sub r4, stack_offset
|
||||
endif
|
||||
|
||||
if i lt NUM_HOME_MM_REGS
|
||||
movdqa [r4 + stack_param_offset + i * 16], @CatStr(xmm, %(MM_START_SAVE_REG + i))
|
||||
else
|
||||
movdqa [r4 + (i - NUM_HOME_MM_REGS) * 16], @CatStr(xmm, %(MM_START_SAVE_REG + i))
|
||||
endif
|
||||
|
||||
i = i + 1
|
||||
endm
|
||||
endif
|
||||
endm
|
||||
|
||||
RESTORE_XMM macro num_used_mm_regs:req
|
||||
if num_save_mm_regs GT 0
|
||||
i = 0
|
||||
if num_save_mm_regs2 GT 0
|
||||
rept num_save_mm_regs2
|
||||
movdqa @CatStr(xmm, %(MM_START_SAVE_REG + NUM_HOME_MM_REGS + i)), [r4 + i * 16]
|
||||
i = i + 1
|
||||
endm
|
||||
add r4, stack_offset
|
||||
endif
|
||||
|
||||
num_low_regs = num_save_mm_regs - i
|
||||
i = 0
|
||||
rept num_low_regs
|
||||
movdqa @CatStr(xmm, %(MM_START_SAVE_REG + i)), [r4 + stack_param_offset + i * 16]
|
||||
i = i + 1
|
||||
endm
|
||||
endif
|
||||
endm
|
||||
|
||||
endif ; x64
|
||||
endif ; ABI_LINUX
|
||||
|
||||
|
||||
MY_PROLOG macro num_used_mm_regs:req
|
||||
; num_regs_push: must be equal to the number of push operators
|
||||
; push r3
|
||||
; push r5
|
||||
if (IS_LINUX eq 0) or (IS_X64 eq 0)
|
||||
push r6
|
||||
push r7
|
||||
endif
|
||||
|
||||
mov rN, num_param ; don't move it; num_param can use stack pointer (r4)
|
||||
|
||||
if (IS_X64 eq 0)
|
||||
if (IS_CDECL gt 0)
|
||||
mov rD, [r4 + data_OFFS]
|
||||
mov keys, [r4 + aes_OFFS]
|
||||
endif
|
||||
elseif (IS_LINUX gt 0)
|
||||
MY_ABI_LINUX_TO_WIN_2
|
||||
endif
|
||||
|
||||
|
||||
ifndef ABI_LINUX
|
||||
ifdef x64
|
||||
SAVE_XMM num_used_mm_regs
|
||||
endif
|
||||
endif
|
||||
|
||||
mov ksize_x, [keys + 16]
|
||||
shl ksize_x, 5
|
||||
endm
|
||||
|
||||
|
||||
MY_EPILOG macro
|
||||
ifndef ABI_LINUX
|
||||
ifdef x64
|
||||
RESTORE_XMM num_save_mm_regs
|
||||
endif
|
||||
endif
|
||||
|
||||
if (IS_LINUX eq 0) or (IS_X64 eq 0)
|
||||
pop r7
|
||||
pop r6
|
||||
endif
|
||||
; pop r5
|
||||
; pop r3
|
||||
MY_ENDP
|
||||
endm
|
||||
|
||||
|
||||
OP_KEY macro op:req, offs:req
|
||||
op state, [keys + offs]
|
||||
endm
|
||||
|
||||
|
||||
WOP_KEY macro op:req, offs:req
|
||||
movdqa key, [keys + offs]
|
||||
WOP op, key
|
||||
endm
|
||||
|
||||
|
||||
; ---------- AES-CBC Decode ----------
|
||||
|
||||
|
||||
XOR_WITH_DATA macro reg, _ppp_
|
||||
pxor reg, [rD + i * 16]
|
||||
endm
|
||||
|
||||
WRITE_TO_DATA macro reg, _ppp_
|
||||
movdqa [rD + i * 16], reg
|
||||
endm
|
||||
|
||||
|
||||
; state0 equ @CatStr(xmm, %(ways_start_reg))
|
||||
|
||||
key0 equ @CatStr(xmm, %(ways_start_reg + ways + 1))
|
||||
key0_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 1))
|
||||
|
||||
key_last equ @CatStr(xmm, %(ways_start_reg + ways + 2))
|
||||
key_last_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 2))
|
||||
key_last_ymm_n equ (ways_start_reg + ways + 2)
|
||||
|
||||
NUM_CBC_REGS equ (ways_start_reg + ways + 3)
|
||||
|
||||
|
||||
MY_SEG_PROC AesCbc_Decode_HW, 3
|
||||
|
||||
AesCbc_Decode_HW_start::
|
||||
MY_PROLOG NUM_CBC_REGS
|
||||
|
||||
AesCbc_Decode_HW_start_2::
|
||||
movdqa iv, [keys]
|
||||
add keys, 32
|
||||
|
||||
movdqa key0, [keys + 1 * ksize_r]
|
||||
movdqa key_last, [keys]
|
||||
sub ksize_x, 16
|
||||
|
||||
jmp check2
|
||||
align 16
|
||||
nextBlocks2:
|
||||
WOP movdqa, [rD + i * 16]
|
||||
mov koffs_x, ksize_x
|
||||
; WOP_KEY pxor, ksize_r + 16
|
||||
WOP pxor, key0
|
||||
; align 16
|
||||
@@:
|
||||
WOP_KEY aesdec, 1 * koffs_r
|
||||
sub koffs_r, 16
|
||||
jnz @B
|
||||
; WOP_KEY aesdeclast, 0
|
||||
WOP aesdeclast, key_last
|
||||
|
||||
pxor @CatStr(xmm, %(ways_start_reg)), iv
|
||||
i = 1
|
||||
rept ways - 1
|
||||
pxor @CatStr(xmm, %(ways_start_reg + i)), [rD + i * 16 - 16]
|
||||
i = i + 1
|
||||
endm
|
||||
movdqa iv, [rD + ways * 16 - 16]
|
||||
WOP WRITE_TO_DATA
|
||||
|
||||
add rD, ways * 16
|
||||
AesCbc_Decode_HW_start_3::
|
||||
check2:
|
||||
sub rN, ways
|
||||
jnc nextBlocks2
|
||||
add rN, ways
|
||||
|
||||
sub ksize_x, 16
|
||||
|
||||
jmp check
|
||||
nextBlock:
|
||||
movdqa state, [rD]
|
||||
mov koffs_x, ksize_x
|
||||
; OP_KEY pxor, 1 * ksize_r + 32
|
||||
pxor state, key0
|
||||
; movdqa state0, [rD]
|
||||
; movdqa state, key0
|
||||
; pxor state, state0
|
||||
@@:
|
||||
OP_KEY aesdec, 1 * koffs_r + 16
|
||||
OP_KEY aesdec, 1 * koffs_r
|
||||
sub koffs_r, 32
|
||||
jnz @B
|
||||
OP_KEY aesdec, 16
|
||||
; OP_KEY aesdeclast, 0
|
||||
aesdeclast state, key_last
|
||||
|
||||
pxor state, iv
|
||||
movdqa iv, [rD]
|
||||
; movdqa iv, state0
|
||||
movdqa [rD], state
|
||||
|
||||
add rD, 16
|
||||
check:
|
||||
sub rN, 1
|
||||
jnc nextBlock
|
||||
|
||||
movdqa [keys - 32], iv
|
||||
MY_EPILOG
|
||||
|
||||
|
||||
|
||||
|
||||
; ---------- AVX ----------
|
||||
|
||||
|
||||
AVX__WOP_n macro op
|
||||
i = 0
|
||||
rept ways
|
||||
op (ways_start_reg + i)
|
||||
i = i + 1
|
||||
endm
|
||||
endm
|
||||
|
||||
AVX__WOP macro op
|
||||
i = 0
|
||||
rept ways
|
||||
op @CatStr(ymm, %(ways_start_reg + i))
|
||||
i = i + 1
|
||||
endm
|
||||
endm
|
||||
|
||||
|
||||
AVX__WOP_KEY macro op:req, offs:req
|
||||
vmovdqa key_ymm, ymmword ptr [keys2 + offs]
|
||||
AVX__WOP_n op
|
||||
endm
|
||||
|
||||
|
||||
AVX__CBC_START macro reg
|
||||
; vpxor reg, key_ymm, ymmword ptr [rD + 32 * i]
|
||||
vpxor reg, key0_ymm, ymmword ptr [rD + 32 * i]
|
||||
endm
|
||||
|
||||
AVX__CBC_END macro reg
|
||||
if i eq 0
|
||||
vpxor reg, reg, iv_ymm
|
||||
else
|
||||
vpxor reg, reg, ymmword ptr [rD + i * 32 - 16]
|
||||
endif
|
||||
endm
|
||||
|
||||
|
||||
AVX__WRITE_TO_DATA macro reg
|
||||
vmovdqu ymmword ptr [rD + 32 * i], reg
|
||||
endm
|
||||
|
||||
AVX__XOR_WITH_DATA macro reg
|
||||
vpxor reg, reg, ymmword ptr [rD + 32 * i]
|
||||
endm
|
||||
|
||||
AVX__CTR_START macro reg
|
||||
vpaddq iv_ymm, iv_ymm, one_ymm
|
||||
; vpxor reg, iv_ymm, key_ymm
|
||||
vpxor reg, iv_ymm, key0_ymm
|
||||
endm
|
||||
|
||||
|
||||
MY_VAES_INSTR_2 macro cmd, dest, a1, a2
|
||||
db 0c4H
|
||||
db 2 + 040H + 020h * (1 - (a2) / 8) + 080h * (1 - (dest) / 8)
|
||||
db 5 + 8 * ((not (a1)) and 15)
|
||||
db cmd
|
||||
db 0c0H + 8 * ((dest) and 7) + ((a2) and 7)
|
||||
endm
|
||||
|
||||
MY_VAES_INSTR macro cmd, dest, a
|
||||
MY_VAES_INSTR_2 cmd, dest, dest, a
|
||||
endm
|
||||
|
||||
MY_vaesenc macro dest, a
|
||||
MY_VAES_INSTR 0dcH, dest, a
|
||||
endm
|
||||
MY_vaesenclast macro dest, a
|
||||
MY_VAES_INSTR 0ddH, dest, a
|
||||
endm
|
||||
MY_vaesdec macro dest, a
|
||||
MY_VAES_INSTR 0deH, dest, a
|
||||
endm
|
||||
MY_vaesdeclast macro dest, a
|
||||
MY_VAES_INSTR 0dfH, dest, a
|
||||
endm
|
||||
|
||||
|
||||
AVX__VAES_DEC macro reg
|
||||
MY_vaesdec reg, key_ymm_n
|
||||
endm
|
||||
|
||||
AVX__VAES_DEC_LAST_key_last macro reg
|
||||
; MY_vaesdeclast reg, key_ymm_n
|
||||
MY_vaesdeclast reg, key_last_ymm_n
|
||||
endm
|
||||
|
||||
AVX__VAES_ENC macro reg
|
||||
MY_vaesenc reg, key_ymm_n
|
||||
endm
|
||||
|
||||
AVX__VAES_ENC_LAST macro reg
|
||||
MY_vaesenclast reg, key_ymm_n
|
||||
endm
|
||||
|
||||
AVX__vinserti128_TO_HIGH macro dest, src
|
||||
vinserti128 dest, dest, src, 1
|
||||
endm
|
||||
|
||||
|
||||
MY_PROC AesCbc_Decode_HW_256, 3
|
||||
ifdef use_vaes_256
|
||||
MY_PROLOG NUM_CBC_REGS
|
||||
|
||||
cmp rN, ways * 2
|
||||
jb AesCbc_Decode_HW_start_2
|
||||
|
||||
vmovdqa iv, xmmword ptr [keys]
|
||||
add keys, 32
|
||||
|
||||
vbroadcasti128 key0_ymm, xmmword ptr [keys + 1 * ksize_r]
|
||||
vbroadcasti128 key_last_ymm, xmmword ptr [keys]
|
||||
sub ksize_x, 16
|
||||
mov koffs_x, ksize_x
|
||||
add ksize_x, ksize_x
|
||||
|
||||
AVX_STACK_SUB = ((NUM_AES_KEYS_MAX + 1 - 2) * 32)
|
||||
push keys2
|
||||
sub r4, AVX_STACK_SUB
|
||||
; sub r4, 32
|
||||
; sub r4, ksize_r
|
||||
; lea keys2, [r4 + 32]
|
||||
mov keys2, r4
|
||||
and keys2, -32
|
||||
broad:
|
||||
vbroadcasti128 key_ymm, xmmword ptr [keys + 1 * koffs_r]
|
||||
vmovdqa ymmword ptr [keys2 + koffs_r * 2], key_ymm
|
||||
sub koffs_r, 16
|
||||
; jnc broad
|
||||
jnz broad
|
||||
|
||||
sub rN, ways * 2
|
||||
|
||||
align 16
|
||||
avx_cbcdec_nextBlock2:
|
||||
mov koffs_x, ksize_x
|
||||
; AVX__WOP_KEY AVX__CBC_START, 1 * koffs_r + 32
|
||||
AVX__WOP AVX__CBC_START
|
||||
@@:
|
||||
AVX__WOP_KEY AVX__VAES_DEC, 1 * koffs_r
|
||||
sub koffs_r, 32
|
||||
jnz @B
|
||||
; AVX__WOP_KEY AVX__VAES_DEC_LAST, 0
|
||||
AVX__WOP_n AVX__VAES_DEC_LAST_key_last
|
||||
|
||||
AVX__vinserti128_TO_HIGH iv_ymm, xmmword ptr [rD]
|
||||
AVX__WOP AVX__CBC_END
|
||||
|
||||
vmovdqa iv, xmmword ptr [rD + ways * 32 - 16]
|
||||
AVX__WOP AVX__WRITE_TO_DATA
|
||||
|
||||
add rD, ways * 32
|
||||
sub rN, ways * 2
|
||||
jnc avx_cbcdec_nextBlock2
|
||||
add rN, ways * 2
|
||||
|
||||
shr ksize_x, 1
|
||||
|
||||
; lea r4, [r4 + 1 * ksize_r + 32]
|
||||
add r4, AVX_STACK_SUB
|
||||
pop keys2
|
||||
|
||||
vzeroupper
|
||||
jmp AesCbc_Decode_HW_start_3
|
||||
else
|
||||
jmp AesCbc_Decode_HW_start
|
||||
endif
|
||||
MY_ENDP
|
||||
MY_SEG_ENDP
|
||||
|
||||
|
||||
|
||||
|
||||
; ---------- AES-CBC Encode ----------
|
||||
|
||||
e0 equ xmm1
|
||||
|
||||
CENC_START_KEY equ 2
|
||||
CENC_NUM_REG_KEYS equ (3 * 2)
|
||||
; last_key equ @CatStr(xmm, %(CENC_START_KEY + CENC_NUM_REG_KEYS))
|
||||
|
||||
MY_SEG_PROC AesCbc_Encode_HW, 3
|
||||
MY_PROLOG (CENC_START_KEY + CENC_NUM_REG_KEYS + 0)
|
||||
|
||||
movdqa state, [keys]
|
||||
add keys, 32
|
||||
|
||||
i = 0
|
||||
rept CENC_NUM_REG_KEYS
|
||||
movdqa @CatStr(xmm, %(CENC_START_KEY + i)), [keys + i * 16]
|
||||
i = i + 1
|
||||
endm
|
||||
|
||||
add keys, ksize_r
|
||||
neg ksize_r
|
||||
add ksize_r, (16 * CENC_NUM_REG_KEYS)
|
||||
; movdqa last_key, [keys]
|
||||
jmp check_e
|
||||
|
||||
align 16
|
||||
nextBlock_e:
|
||||
movdqa e0, [rD]
|
||||
mov koffs_r, ksize_r
|
||||
pxor e0, @CatStr(xmm, %(CENC_START_KEY))
|
||||
pxor state, e0
|
||||
|
||||
i = 1
|
||||
rept (CENC_NUM_REG_KEYS - 1)
|
||||
aesenc state, @CatStr(xmm, %(CENC_START_KEY + i))
|
||||
i = i + 1
|
||||
endm
|
||||
|
||||
@@:
|
||||
OP_KEY aesenc, 1 * koffs_r
|
||||
OP_KEY aesenc, 1 * koffs_r + 16
|
||||
add koffs_r, 32
|
||||
jnz @B
|
||||
OP_KEY aesenclast, 0
|
||||
; aesenclast state, last_key
|
||||
|
||||
movdqa [rD], state
|
||||
add rD, 16
|
||||
check_e:
|
||||
sub rN, 1
|
||||
jnc nextBlock_e
|
||||
|
||||
; movdqa [keys - 32], state
|
||||
movdqa [keys + 1 * ksize_r - (16 * CENC_NUM_REG_KEYS) - 32], state
|
||||
MY_EPILOG
|
||||
MY_SEG_ENDP
|
||||
|
||||
|
||||
|
||||
; ---------- AES-CTR ----------
|
||||
|
||||
ifdef x64
|
||||
; ways = 11
|
||||
endif
|
||||
|
||||
|
||||
one equ @CatStr(xmm, %(ways_start_reg + ways + 1))
|
||||
one_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 1))
|
||||
key0 equ @CatStr(xmm, %(ways_start_reg + ways + 2))
|
||||
key0_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 2))
|
||||
NUM_CTR_REGS equ (ways_start_reg + ways + 3)
|
||||
|
||||
INIT_CTR macro reg, _ppp_
|
||||
paddq iv, one
|
||||
movdqa reg, iv
|
||||
endm
|
||||
|
||||
|
||||
MY_SEG_PROC AesCtr_Code_HW, 3
|
||||
Ctr_start::
|
||||
MY_PROLOG NUM_CTR_REGS
|
||||
|
||||
Ctr_start_2::
|
||||
movdqa iv, [keys]
|
||||
add keys, 32
|
||||
movdqa key0, [keys]
|
||||
|
||||
add keys, ksize_r
|
||||
neg ksize_r
|
||||
add ksize_r, 16
|
||||
|
||||
Ctr_start_3::
|
||||
mov koffs_x, 1
|
||||
movd one, koffs_x
|
||||
jmp check2_c
|
||||
|
||||
align 16
|
||||
nextBlocks2_c:
|
||||
WOP INIT_CTR, 0
|
||||
mov koffs_r, ksize_r
|
||||
; WOP_KEY pxor, 1 * koffs_r -16
|
||||
WOP pxor, key0
|
||||
@@:
|
||||
WOP_KEY aesenc, 1 * koffs_r
|
||||
add koffs_r, 16
|
||||
jnz @B
|
||||
WOP_KEY aesenclast, 0
|
||||
|
||||
WOP XOR_WITH_DATA
|
||||
WOP WRITE_TO_DATA
|
||||
add rD, ways * 16
|
||||
check2_c:
|
||||
sub rN, ways
|
||||
jnc nextBlocks2_c
|
||||
add rN, ways
|
||||
|
||||
sub keys, 16
|
||||
add ksize_r, 16
|
||||
|
||||
jmp check_c
|
||||
|
||||
; align 16
|
||||
nextBlock_c:
|
||||
paddq iv, one
|
||||
; movdqa state, [keys + 1 * koffs_r - 16]
|
||||
movdqa state, key0
|
||||
mov koffs_r, ksize_r
|
||||
pxor state, iv
|
||||
|
||||
@@:
|
||||
OP_KEY aesenc, 1 * koffs_r
|
||||
OP_KEY aesenc, 1 * koffs_r + 16
|
||||
add koffs_r, 32
|
||||
jnz @B
|
||||
OP_KEY aesenc, 0
|
||||
OP_KEY aesenclast, 16
|
||||
|
||||
pxor state, [rD]
|
||||
movdqa [rD], state
|
||||
add rD, 16
|
||||
check_c:
|
||||
sub rN, 1
|
||||
jnc nextBlock_c
|
||||
|
||||
; movdqa [keys - 32], iv
|
||||
movdqa [keys + 1 * ksize_r - 16 - 32], iv
|
||||
MY_EPILOG
|
||||
|
||||
|
||||
MY_PROC AesCtr_Code_HW_256, 3
|
||||
ifdef use_vaes_256
|
||||
MY_PROLOG NUM_CTR_REGS
|
||||
|
||||
cmp rN, ways * 2
|
||||
jb Ctr_start_2
|
||||
|
||||
vbroadcasti128 iv_ymm, xmmword ptr [keys]
|
||||
add keys, 32
|
||||
vbroadcasti128 key0_ymm, xmmword ptr [keys]
|
||||
mov koffs_x, 1
|
||||
vmovd one, koffs_x
|
||||
vpsubq iv_ymm, iv_ymm, one_ymm
|
||||
vpaddq one, one, one
|
||||
AVX__vinserti128_TO_HIGH one_ymm, one
|
||||
|
||||
add keys, ksize_r
|
||||
sub ksize_x, 16
|
||||
neg ksize_r
|
||||
mov koffs_r, ksize_r
|
||||
add ksize_r, ksize_r
|
||||
|
||||
AVX_STACK_SUB = ((NUM_AES_KEYS_MAX + 1 - 1) * 32)
|
||||
push keys2
|
||||
lea keys2, [r4 - 32]
|
||||
sub r4, AVX_STACK_SUB
|
||||
and keys2, -32
|
||||
vbroadcasti128 key_ymm, xmmword ptr [keys]
|
||||
vmovdqa ymmword ptr [keys2], key_ymm
|
||||
@@:
|
||||
vbroadcasti128 key_ymm, xmmword ptr [keys + 1 * koffs_r]
|
||||
vmovdqa ymmword ptr [keys2 + koffs_r * 2], key_ymm
|
||||
add koffs_r, 16
|
||||
jnz @B
|
||||
|
||||
sub rN, ways * 2
|
||||
|
||||
align 16
|
||||
avx_ctr_nextBlock2:
|
||||
mov koffs_r, ksize_r
|
||||
AVX__WOP AVX__CTR_START
|
||||
; AVX__WOP_KEY AVX__CTR_START, 1 * koffs_r - 32
|
||||
@@:
|
||||
AVX__WOP_KEY AVX__VAES_ENC, 1 * koffs_r
|
||||
add koffs_r, 32
|
||||
jnz @B
|
||||
AVX__WOP_KEY AVX__VAES_ENC_LAST, 0
|
||||
|
||||
AVX__WOP AVX__XOR_WITH_DATA
|
||||
AVX__WOP AVX__WRITE_TO_DATA
|
||||
|
||||
add rD, ways * 32
|
||||
sub rN, ways * 2
|
||||
jnc avx_ctr_nextBlock2
|
||||
add rN, ways * 2
|
||||
|
||||
vextracti128 iv, iv_ymm, 1
|
||||
sar ksize_r, 1
|
||||
|
||||
add r4, AVX_STACK_SUB
|
||||
pop keys2
|
||||
|
||||
vzeroupper
|
||||
jmp Ctr_start_3
|
||||
else
|
||||
jmp Ctr_start
|
||||
endif
|
||||
MY_ENDP
|
||||
MY_SEG_ENDP
|
||||
|
||||
end
|
||||
513
3rdparty/lzma-21.03beta/Asm/x86/LzFindOpt.asm
vendored
Normal file
513
3rdparty/lzma-21.03beta/Asm/x86/LzFindOpt.asm
vendored
Normal file
@@ -0,0 +1,513 @@
|
||||
; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function
|
||||
; 2021-07-13: Igor Pavlov : Public domain
|
||||
;
|
||||
|
||||
ifndef x64
|
||||
; x64=1
|
||||
; .err <x64_IS_REQUIRED>
|
||||
endif
|
||||
|
||||
include 7zAsm.asm
|
||||
|
||||
MY_ASM_START
|
||||
|
||||
_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'
|
||||
|
||||
MY_ALIGN macro num:req
|
||||
align num
|
||||
endm
|
||||
|
||||
MY_ALIGN_32 macro
|
||||
MY_ALIGN 32
|
||||
endm
|
||||
|
||||
MY_ALIGN_64 macro
|
||||
MY_ALIGN 64
|
||||
endm
|
||||
|
||||
|
||||
t0_L equ x0_L
|
||||
t0_x equ x0
|
||||
t0 equ r0
|
||||
t1_x equ x3
|
||||
t1 equ r3
|
||||
|
||||
cp_x equ t1_x
|
||||
cp_r equ t1
|
||||
m equ x5
|
||||
m_r equ r5
|
||||
len_x equ x6
|
||||
len equ r6
|
||||
diff_x equ x7
|
||||
diff equ r7
|
||||
len0 equ r10
|
||||
len1_x equ x11
|
||||
len1 equ r11
|
||||
maxLen_x equ x12
|
||||
maxLen equ r12
|
||||
d equ r13
|
||||
ptr0 equ r14
|
||||
ptr1 equ r15
|
||||
|
||||
d_lim equ m_r
|
||||
cycSize equ len_x
|
||||
hash_lim equ len0
|
||||
delta1_x equ len1_x
|
||||
delta1_r equ len1
|
||||
delta_x equ maxLen_x
|
||||
delta_r equ maxLen
|
||||
hash equ ptr0
|
||||
src equ ptr1
|
||||
|
||||
|
||||
|
||||
if (IS_LINUX gt 0)
|
||||
|
||||
; r1 r2 r8 r9 : win32
|
||||
; r7 r6 r2 r1 r8 r9 : linux
|
||||
|
||||
lenLimit equ r8
|
||||
lenLimit_x equ x8
|
||||
; pos_r equ r2
|
||||
pos equ x2
|
||||
cur equ r1
|
||||
son equ r9
|
||||
|
||||
else
|
||||
|
||||
lenLimit equ REG_ABI_PARAM_2
|
||||
lenLimit_x equ REG_ABI_PARAM_2_x
|
||||
pos equ REG_ABI_PARAM_1_x
|
||||
cur equ REG_ABI_PARAM_0
|
||||
son equ REG_ABI_PARAM_3
|
||||
|
||||
endif
|
||||
|
||||
|
||||
if (IS_LINUX gt 0)
|
||||
maxLen_OFFS equ (REG_SIZE * (6 + 1))
|
||||
else
|
||||
cutValue_OFFS equ (REG_SIZE * (8 + 1 + 4))
|
||||
d_OFFS equ (REG_SIZE + cutValue_OFFS)
|
||||
maxLen_OFFS equ (REG_SIZE + d_OFFS)
|
||||
endif
|
||||
hash_OFFS equ (REG_SIZE + maxLen_OFFS)
|
||||
limit_OFFS equ (REG_SIZE + hash_OFFS)
|
||||
size_OFFS equ (REG_SIZE + limit_OFFS)
|
||||
cycPos_OFFS equ (REG_SIZE + size_OFFS)
|
||||
cycSize_OFFS equ (REG_SIZE + cycPos_OFFS)
|
||||
posRes_OFFS equ (REG_SIZE + cycSize_OFFS)
|
||||
|
||||
if (IS_LINUX gt 0)
|
||||
else
|
||||
cutValue_PAR equ [r0 + cutValue_OFFS]
|
||||
d_PAR equ [r0 + d_OFFS]
|
||||
endif
|
||||
maxLen_PAR equ [r0 + maxLen_OFFS]
|
||||
hash_PAR equ [r0 + hash_OFFS]
|
||||
limit_PAR equ [r0 + limit_OFFS]
|
||||
size_PAR equ [r0 + size_OFFS]
|
||||
cycPos_PAR equ [r0 + cycPos_OFFS]
|
||||
cycSize_PAR equ [r0 + cycSize_OFFS]
|
||||
posRes_PAR equ [r0 + posRes_OFFS]
|
||||
|
||||
|
||||
cutValue_VAR equ DWORD PTR [r4 + 8 * 0]
|
||||
cutValueCur_VAR equ DWORD PTR [r4 + 8 * 0 + 4]
|
||||
cycPos_VAR equ DWORD PTR [r4 + 8 * 1 + 0]
|
||||
cycSize_VAR equ DWORD PTR [r4 + 8 * 1 + 4]
|
||||
hash_VAR equ QWORD PTR [r4 + 8 * 2]
|
||||
limit_VAR equ QWORD PTR [r4 + 8 * 3]
|
||||
size_VAR equ QWORD PTR [r4 + 8 * 4]
|
||||
distances equ QWORD PTR [r4 + 8 * 5]
|
||||
maxLen_VAR equ QWORD PTR [r4 + 8 * 6]
|
||||
|
||||
Old_RSP equ QWORD PTR [r4 + 8 * 7]
|
||||
LOCAL_SIZE equ 8 * 8
|
||||
|
||||
COPY_VAR_32 macro dest_var, src_var
|
||||
mov x3, src_var
|
||||
mov dest_var, x3
|
||||
endm
|
||||
|
||||
COPY_VAR_64 macro dest_var, src_var
|
||||
mov r3, src_var
|
||||
mov dest_var, r3
|
||||
endm
|
||||
|
||||
|
||||
; MY_ALIGN_64
|
||||
MY_PROC GetMatchesSpecN_2, 13
|
||||
MY_PUSH_PRESERVED_ABI_REGS
|
||||
mov r0, RSP
|
||||
lea r3, [r0 - LOCAL_SIZE]
|
||||
and r3, -64
|
||||
mov RSP, r3
|
||||
mov Old_RSP, r0
|
||||
|
||||
if (IS_LINUX gt 0)
|
||||
mov d, REG_ABI_PARAM_5 ; r13 = r9
|
||||
mov cutValue_VAR, REG_ABI_PARAM_4_x ; = r8
|
||||
mov son, REG_ABI_PARAM_3 ; r9 = r1
|
||||
mov r8, REG_ABI_PARAM_2 ; r8 = r2
|
||||
mov pos, REG_ABI_PARAM_1_x ; r2 = x6
|
||||
mov r1, REG_ABI_PARAM_0 ; r1 = r7
|
||||
else
|
||||
COPY_VAR_32 cutValue_VAR, cutValue_PAR
|
||||
mov d, d_PAR
|
||||
endif
|
||||
|
||||
COPY_VAR_64 limit_VAR, limit_PAR
|
||||
|
||||
mov hash_lim, size_PAR
|
||||
mov size_VAR, hash_lim
|
||||
|
||||
mov cp_x, cycPos_PAR
|
||||
mov hash, hash_PAR
|
||||
|
||||
mov cycSize, cycSize_PAR
|
||||
mov cycSize_VAR, cycSize
|
||||
|
||||
; we want cur in (rcx). So we change the cur and lenLimit variables
|
||||
sub lenLimit, cur
|
||||
neg lenLimit_x
|
||||
inc lenLimit_x
|
||||
|
||||
mov t0_x, maxLen_PAR
|
||||
sub t0, lenLimit
|
||||
mov maxLen_VAR, t0
|
||||
|
||||
jmp main_loop
|
||||
|
||||
MY_ALIGN_64
|
||||
fill_empty:
|
||||
; ptr0 = *ptr1 = kEmptyHashValue;
|
||||
mov QWORD PTR [ptr1], 0
|
||||
inc pos
|
||||
inc cp_x
|
||||
mov DWORD PTR [d - 4], 0
|
||||
cmp d, limit_VAR
|
||||
jae fin
|
||||
cmp hash, hash_lim
|
||||
je fin
|
||||
|
||||
; MY_ALIGN_64
|
||||
main_loop:
|
||||
; UInt32 delta = *hash++;
|
||||
mov diff_x, [hash] ; delta
|
||||
add hash, 4
|
||||
; mov cycPos_VAR, cp_x
|
||||
|
||||
inc cur
|
||||
add d, 4
|
||||
mov m, pos
|
||||
sub m, diff_x; ; matchPos
|
||||
|
||||
; CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
|
||||
lea ptr1, [son + 8 * cp_r]
|
||||
; mov cycSize, cycSize_VAR
|
||||
cmp pos, cycSize
|
||||
jb directMode ; if (pos < cycSize_VAR)
|
||||
|
||||
; CYC MODE
|
||||
|
||||
cmp diff_x, cycSize
|
||||
jae fill_empty ; if (delta >= cycSize_VAR)
|
||||
|
||||
xor t0_x, t0_x
|
||||
mov cycPos_VAR, cp_x
|
||||
sub cp_x, diff_x
|
||||
; jae prepare_for_tree_loop
|
||||
; add cp_x, cycSize
|
||||
cmovb t0_x, cycSize
|
||||
add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0)
|
||||
jmp prepare_for_tree_loop
|
||||
|
||||
|
||||
directMode:
|
||||
cmp diff_x, pos
|
||||
je fill_empty ; if (delta == pos)
|
||||
jae fin_error ; if (delta >= pos)
|
||||
|
||||
mov cycPos_VAR, cp_x
|
||||
mov cp_x, m
|
||||
|
||||
prepare_for_tree_loop:
|
||||
mov len0, lenLimit
|
||||
mov hash_VAR, hash
|
||||
; CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
|
||||
lea ptr0, [ptr1 + 4]
|
||||
; UInt32 *_distances = ++d;
|
||||
mov distances, d
|
||||
|
||||
neg len0
|
||||
mov len1, len0
|
||||
|
||||
mov t0_x, cutValue_VAR
|
||||
mov maxLen, maxLen_VAR
|
||||
mov cutValueCur_VAR, t0_x
|
||||
|
||||
MY_ALIGN_32
|
||||
tree_loop:
|
||||
neg diff
|
||||
mov len, len0
|
||||
cmp len1, len0
|
||||
cmovb len, len1 ; len = (len1 < len0 ? len1 : len0);
|
||||
add diff, cur
|
||||
|
||||
mov t0_x, [son + cp_r * 8] ; prefetch
|
||||
movzx t0_x, BYTE PTR [diff + 1 * len]
|
||||
lea cp_r, [son + cp_r * 8]
|
||||
cmp [cur + 1 * len], t0_L
|
||||
je matched_1
|
||||
|
||||
jb left_0
|
||||
|
||||
mov [ptr1], m
|
||||
mov m, [cp_r + 4]
|
||||
lea ptr1, [cp_r + 4]
|
||||
sub diff, cur ; FIX32
|
||||
jmp next_node
|
||||
|
||||
MY_ALIGN_32
|
||||
left_0:
|
||||
mov [ptr0], m
|
||||
mov m, [cp_r]
|
||||
mov ptr0, cp_r
|
||||
sub diff, cur ; FIX32
|
||||
; jmp next_node
|
||||
|
||||
; ------------ NEXT NODE ------------
|
||||
; MY_ALIGN_32
|
||||
next_node:
|
||||
mov cycSize, cycSize_VAR
|
||||
dec cutValueCur_VAR
|
||||
je finish_tree
|
||||
|
||||
add diff_x, pos ; prev_match = pos + diff
|
||||
cmp m, diff_x
|
||||
jae fin_error ; if (new_match >= prev_match)
|
||||
|
||||
mov diff_x, pos
|
||||
sub diff_x, m ; delta = pos - new_match
|
||||
cmp pos, cycSize
|
||||
jae cyc_mode_2 ; if (pos >= cycSize)
|
||||
|
||||
mov cp_x, m
|
||||
test m, m
|
||||
jne tree_loop ; if (m != 0)
|
||||
|
||||
finish_tree:
|
||||
; ptr0 = *ptr1 = kEmptyHashValue;
|
||||
mov DWORD PTR [ptr0], 0
|
||||
mov DWORD PTR [ptr1], 0
|
||||
|
||||
inc pos
|
||||
|
||||
; _distances[-1] = (UInt32)(d - _distances);
|
||||
mov t0, distances
|
||||
mov t1, d
|
||||
sub t1, t0
|
||||
shr t1_x, 2
|
||||
mov [t0 - 4], t1_x
|
||||
|
||||
cmp d, limit_VAR
|
||||
jae fin ; if (d >= limit)
|
||||
|
||||
mov cp_x, cycPos_VAR
|
||||
mov hash, hash_VAR
|
||||
mov hash_lim, size_VAR
|
||||
inc cp_x
|
||||
cmp hash, hash_lim
|
||||
jne main_loop ; if (hash != size)
|
||||
jmp fin
|
||||
|
||||
|
||||
MY_ALIGN_32
|
||||
cyc_mode_2:
|
||||
cmp diff_x, cycSize
|
||||
jae finish_tree ; if (delta >= cycSize)
|
||||
|
||||
mov cp_x, cycPos_VAR
|
||||
xor t0_x, t0_x
|
||||
sub cp_x, diff_x ; cp_x = cycPos - delta
|
||||
cmovb t0_x, cycSize
|
||||
add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0)
|
||||
jmp tree_loop
|
||||
|
||||
|
||||
MY_ALIGN_32
|
||||
matched_1:
|
||||
|
||||
inc len
|
||||
; cmp len_x, lenLimit_x
|
||||
je short lenLimit_reach
|
||||
movzx t0_x, BYTE PTR [diff + 1 * len]
|
||||
cmp [cur + 1 * len], t0_L
|
||||
jne mismatch
|
||||
|
||||
|
||||
MY_ALIGN_32
|
||||
match_loop:
|
||||
; while (++len != lenLimit) (len[diff] != len[0]) ;
|
||||
|
||||
inc len
|
||||
; cmp len_x, lenLimit_x
|
||||
je short lenLimit_reach
|
||||
movzx t0_x, BYTE PTR [diff + 1 * len]
|
||||
cmp BYTE PTR [cur + 1 * len], t0_L
|
||||
je match_loop
|
||||
|
||||
mismatch:
|
||||
jb left_2
|
||||
|
||||
mov [ptr1], m
|
||||
mov m, [cp_r + 4]
|
||||
lea ptr1, [cp_r + 4]
|
||||
mov len1, len
|
||||
|
||||
jmp max_update
|
||||
|
||||
MY_ALIGN_32
|
||||
left_2:
|
||||
mov [ptr0], m
|
||||
mov m, [cp_r]
|
||||
mov ptr0, cp_r
|
||||
mov len0, len
|
||||
|
||||
max_update:
|
||||
sub diff, cur ; restore diff
|
||||
|
||||
cmp maxLen, len
|
||||
jae next_node
|
||||
|
||||
mov maxLen, len
|
||||
add len, lenLimit
|
||||
mov [d], len_x
|
||||
mov t0_x, diff_x
|
||||
not t0_x
|
||||
mov [d + 4], t0_x
|
||||
add d, 8
|
||||
|
||||
jmp next_node
|
||||
|
||||
|
||||
|
||||
MY_ALIGN_32
|
||||
lenLimit_reach:
|
||||
|
||||
mov delta_r, cur
|
||||
sub delta_r, diff
|
||||
lea delta1_r, [delta_r - 1]
|
||||
|
||||
mov t0_x, [cp_r]
|
||||
mov [ptr1], t0_x
|
||||
mov t0_x, [cp_r + 4]
|
||||
mov [ptr0], t0_x
|
||||
|
||||
mov [d], lenLimit_x
|
||||
mov [d + 4], delta1_x
|
||||
add d, 8
|
||||
|
||||
; _distances[-1] = (UInt32)(d - _distances);
|
||||
mov t0, distances
|
||||
mov t1, d
|
||||
sub t1, t0
|
||||
shr t1_x, 2
|
||||
mov [t0 - 4], t1_x
|
||||
|
||||
mov hash, hash_VAR
|
||||
mov hash_lim, size_VAR
|
||||
|
||||
inc pos
|
||||
mov cp_x, cycPos_VAR
|
||||
inc cp_x
|
||||
|
||||
mov d_lim, limit_VAR
|
||||
mov cycSize, cycSize_VAR
|
||||
; if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
|
||||
; break;
|
||||
cmp hash, hash_lim
|
||||
je fin
|
||||
cmp d, d_lim
|
||||
jae fin
|
||||
cmp delta_x, [hash]
|
||||
jne main_loop
|
||||
movzx t0_x, BYTE PTR [diff]
|
||||
cmp [cur], t0_L
|
||||
jne main_loop
|
||||
|
||||
; jmp main_loop ; bypass for debug
|
||||
|
||||
mov cycPos_VAR, cp_x
|
||||
shl len, 3 ; cycSize * 8
|
||||
sub diff, cur ; restore diff
|
||||
xor t0_x, t0_x
|
||||
cmp cp_x, delta_x ; cmp (cycPos_VAR, delta)
|
||||
lea cp_r, [son + 8 * cp_r] ; dest
|
||||
lea src, [cp_r + 8 * diff]
|
||||
cmovb t0, len ; t0 = (cycPos_VAR < delta ? cycSize * 8 : 0)
|
||||
add src, t0
|
||||
add len, son ; len = son + cycSize * 8
|
||||
|
||||
|
||||
MY_ALIGN_32
|
||||
long_loop:
|
||||
add hash, 4
|
||||
|
||||
; *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
|
||||
|
||||
mov t0, [src]
|
||||
add src, 8
|
||||
mov [cp_r], t0
|
||||
add cp_r, 8
|
||||
cmp src, len
|
||||
cmove src, son ; if end of (son) buffer is reached, we wrap to begin
|
||||
|
||||
mov DWORD PTR [d], 2
|
||||
mov [d + 4], lenLimit_x
|
||||
mov [d + 8], delta1_x
|
||||
add d, 12
|
||||
|
||||
inc cur
|
||||
|
||||
cmp hash, hash_lim
|
||||
je long_footer
|
||||
cmp delta_x, [hash]
|
||||
jne long_footer
|
||||
movzx t0_x, BYTE PTR [diff + cur]
|
||||
cmp [cur], t0_L
|
||||
jne long_footer
|
||||
cmp d, d_lim
|
||||
jb long_loop
|
||||
|
||||
long_footer:
|
||||
sub cp_r, son
|
||||
shr cp_r, 3
|
||||
add pos, cp_x
|
||||
sub pos, cycPos_VAR
|
||||
mov cycSize, cycSize_VAR
|
||||
|
||||
cmp d, d_lim
|
||||
jae fin
|
||||
cmp hash, hash_lim
|
||||
jne main_loop
|
||||
jmp fin
|
||||
|
||||
|
||||
|
||||
fin_error:
|
||||
xor d, d
|
||||
|
||||
fin:
|
||||
mov RSP, Old_RSP
|
||||
mov t0, [r4 + posRes_OFFS]
|
||||
mov [t0], pos
|
||||
mov r0, d
|
||||
|
||||
MY_POP_PRESERVED_ABI_REGS
|
||||
MY_ENDP
|
||||
|
||||
_TEXT$LZFINDOPT ENDS
|
||||
|
||||
end
|
||||
1303
3rdparty/lzma-21.03beta/Asm/x86/LzmaDecOpt.asm
vendored
Normal file
1303
3rdparty/lzma-21.03beta/Asm/x86/LzmaDecOpt.asm
vendored
Normal file
File diff suppressed because it is too large
Load Diff
263
3rdparty/lzma-21.03beta/Asm/x86/Sha256Opt.asm
vendored
Normal file
263
3rdparty/lzma-21.03beta/Asm/x86/Sha256Opt.asm
vendored
Normal file
@@ -0,0 +1,263 @@
|
||||
; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions
|
||||
; 2021-03-10 : Igor Pavlov : Public domain
|
||||
|
||||
include 7zAsm.asm
|
||||
|
||||
MY_ASM_START
|
||||
|
||||
; .data
|
||||
; public K
|
||||
|
||||
; we can use external SHA256_K_ARRAY defined in Sha256.c
|
||||
; but we must guarantee that SHA256_K_ARRAY is aligned for 16-bytes
|
||||
|
||||
COMMENT @
|
||||
ifdef x64
|
||||
K_CONST equ SHA256_K_ARRAY
|
||||
else
|
||||
K_CONST equ _SHA256_K_ARRAY
|
||||
endif
|
||||
EXTRN K_CONST:xmmword
|
||||
@
|
||||
|
||||
CONST SEGMENT
|
||||
|
||||
align 16
|
||||
Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
|
||||
|
||||
; COMMENT @
|
||||
align 16
|
||||
K_CONST \
|
||||
DD 0428a2f98H, 071374491H, 0b5c0fbcfH, 0e9b5dba5H
|
||||
DD 03956c25bH, 059f111f1H, 0923f82a4H, 0ab1c5ed5H
|
||||
DD 0d807aa98H, 012835b01H, 0243185beH, 0550c7dc3H
|
||||
DD 072be5d74H, 080deb1feH, 09bdc06a7H, 0c19bf174H
|
||||
DD 0e49b69c1H, 0efbe4786H, 00fc19dc6H, 0240ca1ccH
|
||||
DD 02de92c6fH, 04a7484aaH, 05cb0a9dcH, 076f988daH
|
||||
DD 0983e5152H, 0a831c66dH, 0b00327c8H, 0bf597fc7H
|
||||
DD 0c6e00bf3H, 0d5a79147H, 006ca6351H, 014292967H
|
||||
DD 027b70a85H, 02e1b2138H, 04d2c6dfcH, 053380d13H
|
||||
DD 0650a7354H, 0766a0abbH, 081c2c92eH, 092722c85H
|
||||
DD 0a2bfe8a1H, 0a81a664bH, 0c24b8b70H, 0c76c51a3H
|
||||
DD 0d192e819H, 0d6990624H, 0f40e3585H, 0106aa070H
|
||||
DD 019a4c116H, 01e376c08H, 02748774cH, 034b0bcb5H
|
||||
DD 0391c0cb3H, 04ed8aa4aH, 05b9cca4fH, 0682e6ff3H
|
||||
DD 0748f82eeH, 078a5636fH, 084c87814H, 08cc70208H
|
||||
DD 090befffaH, 0a4506cebH, 0bef9a3f7H, 0c67178f2H
|
||||
; @
|
||||
|
||||
CONST ENDS
|
||||
|
||||
; _TEXT$SHA256OPT SEGMENT 'CODE'
|
||||
|
||||
ifndef x64
|
||||
.686
|
||||
.xmm
|
||||
endif
|
||||
|
||||
ifdef x64
|
||||
rNum equ REG_ABI_PARAM_2
|
||||
if (IS_LINUX eq 0)
|
||||
LOCAL_SIZE equ (16 * 2)
|
||||
endif
|
||||
else
|
||||
rNum equ r0
|
||||
LOCAL_SIZE equ (16 * 1)
|
||||
endif
|
||||
|
||||
rState equ REG_ABI_PARAM_0
|
||||
rData equ REG_ABI_PARAM_1
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
MY_SHA_INSTR macro cmd, a1, a2
|
||||
db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2)
|
||||
endm
|
||||
|
||||
cmd_sha256rnds2 equ 0cbH
|
||||
cmd_sha256msg1 equ 0ccH
|
||||
cmd_sha256msg2 equ 0cdH
|
||||
|
||||
MY_sha256rnds2 macro a1, a2
|
||||
MY_SHA_INSTR cmd_sha256rnds2, a1, a2
|
||||
endm
|
||||
|
||||
MY_sha256msg1 macro a1, a2
|
||||
MY_SHA_INSTR cmd_sha256msg1, a1, a2
|
||||
endm
|
||||
|
||||
MY_sha256msg2 macro a1, a2
|
||||
MY_SHA_INSTR cmd_sha256msg2, a1, a2
|
||||
endm
|
||||
|
||||
MY_PROLOG macro
|
||||
ifdef x64
|
||||
if (IS_LINUX eq 0)
|
||||
movdqa [r4 + 8], xmm6
|
||||
movdqa [r4 + 8 + 16], xmm7
|
||||
sub r4, LOCAL_SIZE + 8
|
||||
movdqa [r4 ], xmm8
|
||||
movdqa [r4 + 16], xmm9
|
||||
endif
|
||||
else ; x86
|
||||
if (IS_CDECL gt 0)
|
||||
mov rState, [r4 + REG_SIZE * 1]
|
||||
mov rData, [r4 + REG_SIZE * 2]
|
||||
mov rNum, [r4 + REG_SIZE * 3]
|
||||
else ; fastcall
|
||||
mov rNum, [r4 + REG_SIZE * 1]
|
||||
endif
|
||||
push r5
|
||||
mov r5, r4
|
||||
and r4, -16
|
||||
sub r4, LOCAL_SIZE
|
||||
endif
|
||||
endm
|
||||
|
||||
MY_EPILOG macro
|
||||
ifdef x64
|
||||
if (IS_LINUX eq 0)
|
||||
movdqa xmm8, [r4]
|
||||
movdqa xmm9, [r4 + 16]
|
||||
add r4, LOCAL_SIZE + 8
|
||||
movdqa xmm6, [r4 + 8]
|
||||
movdqa xmm7, [r4 + 8 + 16]
|
||||
endif
|
||||
else ; x86
|
||||
mov r4, r5
|
||||
pop r5
|
||||
endif
|
||||
MY_ENDP
|
||||
endm
|
||||
|
||||
|
||||
msg equ xmm0
|
||||
tmp equ xmm0
|
||||
state0_N equ 2
|
||||
state1_N equ 3
|
||||
w_regs equ 4
|
||||
|
||||
|
||||
state1_save equ xmm1
|
||||
state0 equ @CatStr(xmm, %state0_N)
|
||||
state1 equ @CatStr(xmm, %state1_N)
|
||||
|
||||
|
||||
ifdef x64
|
||||
state0_save equ xmm8
|
||||
mask2 equ xmm9
|
||||
else
|
||||
state0_save equ [r4]
|
||||
mask2 equ xmm0
|
||||
endif
|
||||
|
||||
LOAD_MASK macro
|
||||
movdqa mask2, XMMWORD PTR Reverse_Endian_Mask
|
||||
endm
|
||||
|
||||
LOAD_W macro k:req
|
||||
movdqu @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))]
|
||||
pshufb @CatStr(xmm, %(w_regs + k)), mask2
|
||||
endm
|
||||
|
||||
|
||||
; pre1 <= 4 && pre2 >= 1 && pre1 > pre2 && (pre1 - pre2) <= 1
|
||||
pre1 equ 3
|
||||
pre2 equ 2
|
||||
|
||||
|
||||
|
||||
RND4 macro k
|
||||
movdqa msg, xmmword ptr [K_CONST + (k) * 16]
|
||||
paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4)))
|
||||
MY_sha256rnds2 state0_N, state1_N
|
||||
pshufd msg, msg, 0eH
|
||||
|
||||
if (k GE (4 - pre1)) AND (k LT (16 - pre1))
|
||||
; w4[0] = msg1(w4[-4], w4[-3])
|
||||
MY_sha256msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4))
|
||||
endif
|
||||
|
||||
MY_sha256rnds2 state1_N, state0_N
|
||||
|
||||
if (k GE (4 - pre2)) AND (k LT (16 - pre2))
|
||||
movdqa tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 1) mod 4)))
|
||||
palignr tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4))), 4
|
||||
paddd @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), tmp
|
||||
; w4[0] = msg2(w4[0], w4[-1])
|
||||
MY_sha256msg2 %(w_regs + ((k + pre2) mod 4)), %(w_regs + ((k + pre2 - 1) mod 4))
|
||||
endif
|
||||
endm
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
REVERSE_STATE macro
|
||||
; state0 ; dcba
|
||||
; state1 ; hgfe
|
||||
pshufd tmp, state0, 01bH ; abcd
|
||||
pshufd state0, state1, 01bH ; efgh
|
||||
movdqa state1, state0 ; efgh
|
||||
punpcklqdq state0, tmp ; cdgh
|
||||
punpckhqdq state1, tmp ; abef
|
||||
endm
|
||||
|
||||
|
||||
MY_PROC Sha256_UpdateBlocks_HW, 3
|
||||
MY_PROLOG
|
||||
|
||||
cmp rNum, 0
|
||||
je end_c
|
||||
|
||||
movdqu state0, [rState] ; dcba
|
||||
movdqu state1, [rState + 16] ; hgfe
|
||||
|
||||
REVERSE_STATE
|
||||
|
||||
ifdef x64
|
||||
LOAD_MASK
|
||||
endif
|
||||
|
||||
align 16
|
||||
nextBlock:
|
||||
movdqa state0_save, state0
|
||||
movdqa state1_save, state1
|
||||
|
||||
ifndef x64
|
||||
LOAD_MASK
|
||||
endif
|
||||
|
||||
LOAD_W 0
|
||||
LOAD_W 1
|
||||
LOAD_W 2
|
||||
LOAD_W 3
|
||||
|
||||
|
||||
k = 0
|
||||
rept 16
|
||||
RND4 k
|
||||
k = k + 1
|
||||
endm
|
||||
|
||||
paddd state0, state0_save
|
||||
paddd state1, state1_save
|
||||
|
||||
add rData, 64
|
||||
sub rNum, 1
|
||||
jnz nextBlock
|
||||
|
||||
REVERSE_STATE
|
||||
|
||||
movdqu [rState], state0
|
||||
movdqu [rState + 16], state1
|
||||
|
||||
end_c:
|
||||
MY_EPILOG
|
||||
|
||||
; _TEXT$SHA256OPT ENDS
|
||||
|
||||
end
|
||||
239
3rdparty/lzma-21.03beta/Asm/x86/XzCrc64Opt.asm
vendored
Normal file
239
3rdparty/lzma-21.03beta/Asm/x86/XzCrc64Opt.asm
vendored
Normal file
@@ -0,0 +1,239 @@
|
||||
; XzCrc64Opt.asm -- CRC64 calculation : optimized version
|
||||
; 2021-02-06 : Igor Pavlov : Public domain
|
||||
|
||||
include 7zAsm.asm
|
||||
|
||||
MY_ASM_START
|
||||
|
||||
ifdef x64
|
||||
|
||||
rD equ r9
|
||||
rN equ r10
|
||||
rT equ r5
|
||||
num_VAR equ r8
|
||||
|
||||
SRCDAT4 equ dword ptr [rD + rN * 1]
|
||||
|
||||
CRC_XOR macro dest:req, src:req, t:req
|
||||
xor dest, QWORD PTR [rT + src * 8 + 0800h * t]
|
||||
endm
|
||||
|
||||
CRC1b macro
|
||||
movzx x6, BYTE PTR [rD]
|
||||
inc rD
|
||||
movzx x3, x0_L
|
||||
xor x6, x3
|
||||
shr r0, 8
|
||||
CRC_XOR r0, r6, 0
|
||||
dec rN
|
||||
endm
|
||||
|
||||
MY_PROLOG macro crc_end:req
|
||||
ifdef ABI_LINUX
|
||||
MY_PUSH_2_REGS
|
||||
else
|
||||
MY_PUSH_4_REGS
|
||||
endif
|
||||
mov r0, REG_ABI_PARAM_0
|
||||
mov rN, REG_ABI_PARAM_2
|
||||
mov rT, REG_ABI_PARAM_3
|
||||
mov rD, REG_ABI_PARAM_1
|
||||
test rN, rN
|
||||
jz crc_end
|
||||
@@:
|
||||
test rD, 3
|
||||
jz @F
|
||||
CRC1b
|
||||
jnz @B
|
||||
@@:
|
||||
cmp rN, 8
|
||||
jb crc_end
|
||||
add rN, rD
|
||||
mov num_VAR, rN
|
||||
sub rN, 4
|
||||
and rN, NOT 3
|
||||
sub rD, rN
|
||||
mov x1, SRCDAT4
|
||||
xor r0, r1
|
||||
add rN, 4
|
||||
endm
|
||||
|
||||
MY_EPILOG macro crc_end:req
|
||||
sub rN, 4
|
||||
mov x1, SRCDAT4
|
||||
xor r0, r1
|
||||
mov rD, rN
|
||||
mov rN, num_VAR
|
||||
sub rN, rD
|
||||
crc_end:
|
||||
test rN, rN
|
||||
jz @F
|
||||
CRC1b
|
||||
jmp crc_end
|
||||
@@:
|
||||
ifdef ABI_LINUX
|
||||
MY_POP_2_REGS
|
||||
else
|
||||
MY_POP_4_REGS
|
||||
endif
|
||||
endm
|
||||
|
||||
MY_PROC XzCrc64UpdateT4, 4
|
||||
MY_PROLOG crc_end_4
|
||||
align 16
|
||||
main_loop_4:
|
||||
mov x1, SRCDAT4
|
||||
movzx x2, x0_L
|
||||
movzx x3, x0_H
|
||||
shr r0, 16
|
||||
movzx x6, x0_L
|
||||
movzx x7, x0_H
|
||||
shr r0, 16
|
||||
CRC_XOR r1, r2, 3
|
||||
CRC_XOR r0, r3, 2
|
||||
CRC_XOR r1, r6, 1
|
||||
CRC_XOR r0, r7, 0
|
||||
xor r0, r1
|
||||
|
||||
add rD, 4
|
||||
jnz main_loop_4
|
||||
|
||||
MY_EPILOG crc_end_4
|
||||
MY_ENDP
|
||||
|
||||
else
|
||||
; x86 (32-bit)
|
||||
|
||||
rD equ r1
|
||||
rN equ r7
|
||||
rT equ r5
|
||||
|
||||
crc_OFFS equ (REG_SIZE * 5)
|
||||
|
||||
if (IS_CDECL gt 0) or (IS_LINUX gt 0)
|
||||
; cdecl or (GNU fastcall) stack:
|
||||
; (UInt32 *) table
|
||||
; size_t size
|
||||
; void * data
|
||||
; (UInt64) crc
|
||||
; ret-ip <-(r4)
|
||||
data_OFFS equ (8 + crc_OFFS)
|
||||
size_OFFS equ (REG_SIZE + data_OFFS)
|
||||
table_OFFS equ (REG_SIZE + size_OFFS)
|
||||
num_VAR equ [r4 + size_OFFS]
|
||||
table_VAR equ [r4 + table_OFFS]
|
||||
else
|
||||
; Windows fastcall:
|
||||
; r1 = data, r2 = size
|
||||
; stack:
|
||||
; (UInt32 *) table
|
||||
; (UInt64) crc
|
||||
; ret-ip <-(r4)
|
||||
table_OFFS equ (8 + crc_OFFS)
|
||||
table_VAR equ [r4 + table_OFFS]
|
||||
num_VAR equ table_VAR
|
||||
endif
|
||||
|
||||
SRCDAT4 equ dword ptr [rD + rN * 1]
|
||||
|
||||
CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req
|
||||
op0 dest0, DWORD PTR [rT + src * 8 + 0800h * t]
|
||||
op1 dest1, DWORD PTR [rT + src * 8 + 0800h * t + 4]
|
||||
endm
|
||||
|
||||
CRC_XOR macro dest0:req, dest1:req, src:req, t:req
|
||||
CRC xor, xor, dest0, dest1, src, t
|
||||
endm
|
||||
|
||||
|
||||
CRC1b macro
|
||||
movzx x6, BYTE PTR [rD]
|
||||
inc rD
|
||||
movzx x3, x0_L
|
||||
xor x6, x3
|
||||
shrd r0, r2, 8
|
||||
shr r2, 8
|
||||
CRC_XOR r0, r2, r6, 0
|
||||
dec rN
|
||||
endm
|
||||
|
||||
MY_PROLOG macro crc_end:req
|
||||
MY_PUSH_4_REGS
|
||||
|
||||
if (IS_CDECL gt 0) or (IS_LINUX gt 0)
|
||||
proc_numParams = proc_numParams + 2 ; for ABI_LINUX
|
||||
mov rN, [r4 + size_OFFS]
|
||||
mov rD, [r4 + data_OFFS]
|
||||
else
|
||||
mov rN, r2
|
||||
endif
|
||||
|
||||
mov x0, [r4 + crc_OFFS]
|
||||
mov x2, [r4 + crc_OFFS + 4]
|
||||
mov rT, table_VAR
|
||||
test rN, rN
|
||||
jz crc_end
|
||||
@@:
|
||||
test rD, 3
|
||||
jz @F
|
||||
CRC1b
|
||||
jnz @B
|
||||
@@:
|
||||
cmp rN, 8
|
||||
jb crc_end
|
||||
add rN, rD
|
||||
|
||||
mov num_VAR, rN
|
||||
|
||||
sub rN, 4
|
||||
and rN, NOT 3
|
||||
sub rD, rN
|
||||
xor r0, SRCDAT4
|
||||
add rN, 4
|
||||
endm
|
||||
|
||||
MY_EPILOG macro crc_end:req
|
||||
sub rN, 4
|
||||
xor r0, SRCDAT4
|
||||
|
||||
mov rD, rN
|
||||
mov rN, num_VAR
|
||||
sub rN, rD
|
||||
crc_end:
|
||||
test rN, rN
|
||||
jz @F
|
||||
CRC1b
|
||||
jmp crc_end
|
||||
@@:
|
||||
MY_POP_4_REGS
|
||||
endm
|
||||
|
||||
MY_PROC XzCrc64UpdateT4, 5
|
||||
MY_PROLOG crc_end_4
|
||||
movzx x6, x0_L
|
||||
align 16
|
||||
main_loop_4:
|
||||
mov r3, SRCDAT4
|
||||
xor r3, r2
|
||||
|
||||
CRC xor, mov, r3, r2, r6, 3
|
||||
movzx x6, x0_H
|
||||
shr r0, 16
|
||||
CRC_XOR r3, r2, r6, 2
|
||||
|
||||
movzx x6, x0_L
|
||||
movzx x0, x0_H
|
||||
CRC_XOR r3, r2, r6, 1
|
||||
CRC_XOR r3, r2, r0, 0
|
||||
movzx x6, x3_L
|
||||
mov r0, r3
|
||||
|
||||
add rD, 4
|
||||
jnz main_loop_4
|
||||
|
||||
MY_EPILOG crc_end_4
|
||||
MY_ENDP
|
||||
|
||||
endif ; ! x64
|
||||
|
||||
end
|
||||
Reference in New Issue
Block a user