1; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions
2; 2021-03-10 : Igor Pavlov : Public domain
3
4include 7zAsm.asm
5
6MY_ASM_START
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23CONST   SEGMENT
24
25align 16
26Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49CONST   ENDS
50
51; _TEXT$SHA1OPT SEGMENT 'CODE'
52
53ifndef x64
54    .686
55    .xmm
56endif
57
58ifdef x64
59        rNum    equ REG_ABI_PARAM_2
60    if (IS_LINUX eq 0)
61        LOCAL_SIZE equ (16 * 2)
62    endif
63else
64        rNum    equ r0
65        LOCAL_SIZE equ (16 * 1)
66endif
67
68rState equ REG_ABI_PARAM_0
69rData  equ REG_ABI_PARAM_1
70
71
72MY_sha1rnds4 macro a1, a2, imm
73        db 0fH, 03aH, 0ccH, (0c0H + a1 * 8 + a2), imm
74endm
75
76MY_SHA_INSTR macro cmd, a1, a2
77        db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2)
78endm
79
80cmd_sha1nexte   equ 0c8H
81cmd_sha1msg1    equ 0c9H
82cmd_sha1msg2    equ 0caH
83
84MY_sha1nexte macro a1, a2
85        MY_SHA_INSTR  cmd_sha1nexte, a1, a2
86endm
87
88MY_sha1msg1 macro a1, a2
89        MY_SHA_INSTR  cmd_sha1msg1, a1, a2
90endm
91
92MY_sha1msg2 macro a1, a2
93        MY_SHA_INSTR  cmd_sha1msg2, a1, a2
94endm
95
96MY_PROLOG macro
97    ifdef x64
98      if (IS_LINUX eq 0)
99        movdqa  [r4 + 8], xmm6
100        movdqa  [r4 + 8 + 16], xmm7
101        sub     r4, LOCAL_SIZE + 8
102        movdqa  [r4     ], xmm8
103        movdqa  [r4 + 16], xmm9
104      endif
105    else ; x86
106      if (IS_CDECL gt 0)
107        mov     rState, [r4 + REG_SIZE * 1]
108        mov     rData,  [r4 + REG_SIZE * 2]
109        mov     rNum,   [r4 + REG_SIZE * 3]
110      else ; fastcall
111        mov     rNum,   [r4 + REG_SIZE * 1]
112      endif
113        push    r5
114        mov     r5, r4
115        and     r4, -16
116        sub     r4, LOCAL_SIZE
117    endif
118endm
119
120MY_EPILOG macro
121    ifdef x64
122      if (IS_LINUX eq 0)
123        movdqa  xmm8, [r4]
124        movdqa  xmm9, [r4 + 16]
125        add     r4, LOCAL_SIZE + 8
126        movdqa  xmm6, [r4 + 8]
127        movdqa  xmm7, [r4 + 8 + 16]
128      endif
129    else ; x86
130        mov     r4, r5
131        pop     r5
132    endif
133    MY_ENDP
134endm
135
136
137e0_N       equ 0
138e1_N       equ 1
139abcd_N     equ 2
140e0_save_N  equ 3
141w_regs     equ 4
142
143e0      equ @CatStr(xmm, %e0_N)
144e1      equ @CatStr(xmm, %e1_N)
145abcd    equ @CatStr(xmm, %abcd_N)
146e0_save equ @CatStr(xmm, %e0_save_N)
147
148
149ifdef x64
150        abcd_save    equ  xmm8
151        mask2        equ  xmm9
152else
153        abcd_save    equ  [r4]
154        mask2        equ  e1
155endif
156
157LOAD_MASK macro
158        movdqa  mask2, XMMWORD PTR Reverse_Endian_Mask
159endm
160
161LOAD_W macro k:req
162        movdqu  @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))]
163        pshufb  @CatStr(xmm, %(w_regs + k)), mask2
164endm
165
166
167; pre2 can be 2 or 3 (recommended)
168pre2 equ 3
169pre1 equ (pre2 + 1)
170
171NUM_ROUNDS4 equ 20
172
173RND4 macro k
174        movdqa  @CatStr(xmm, %(e0_N + ((k + 1) mod 2))), abcd
175        MY_sha1rnds4 abcd_N, (e0_N + (k mod 2)), k / 5
176
177        nextM = (w_regs + ((k + 1) mod 4))
178
179    if (k EQ NUM_ROUNDS4 - 1)
180        nextM = e0_save_N
181    endif
182
183        MY_sha1nexte (e0_N + ((k + 1) mod 2)), nextM
184
185    if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2))
186        pxor @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4)))
187    endif
188
189    if (k GE (4 - pre1)) AND (k LT (NUM_ROUNDS4 - pre1))
190        MY_sha1msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4))
191    endif
192
193    if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2))
194        MY_sha1msg2 (w_regs + ((k + pre2) mod 4)), (w_regs + ((k + pre2 - 1) mod 4))
195    endif
196endm
197
198
199REVERSE_STATE macro
200                               ; abcd   ; dcba
201                               ; e0     ; 000e
202        pshufd  abcd, abcd, 01bH        ; abcd
203        pshufd    e0,   e0, 01bH        ; e000
204endm
205
206
207
208
209
210MY_PROC Sha1_UpdateBlocks_HW, 3
211    MY_PROLOG
212
213        cmp     rNum, 0
214        je      end_c
215
216        movdqu   abcd, [rState]               ; dcba
217        movd     e0, dword ptr [rState + 16]  ; 000e
218
219        REVERSE_STATE
220
221        ifdef x64
222        LOAD_MASK
223        endif
224
225    align 16
226    nextBlock:
227        movdqa  abcd_save, abcd
228        movdqa  e0_save, e0
229
230        ifndef x64
231        LOAD_MASK
232        endif
233
234        LOAD_W 0
235        LOAD_W 1
236        LOAD_W 2
237        LOAD_W 3
238
239        paddd   e0, @CatStr(xmm, %(w_regs))
240        k = 0
241        rept NUM_ROUNDS4
242          RND4 k
243          k = k + 1
244        endm
245
246        paddd   abcd, abcd_save
247
248
249        add     rData, 64
250        sub     rNum, 1
251        jnz     nextBlock
252
253        REVERSE_STATE
254
255        movdqu  [rState], abcd
256        movd    dword ptr [rState + 16], e0
257
258  end_c:
259MY_EPILOG
260
261; _TEXT$SHA1OPT ENDS
262
263end
264