1; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions 2; 2021-03-10 : Igor Pavlov : Public domain 3 4include 7zAsm.asm 5 6MY_ASM_START 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23CONST SEGMENT 24 25align 16 26Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49CONST ENDS 50 51; _TEXT$SHA1OPT SEGMENT 'CODE' 52 53ifndef x64 54 .686 55 .xmm 56endif 57 58ifdef x64 59 rNum equ REG_ABI_PARAM_2 60 if (IS_LINUX eq 0) 61 LOCAL_SIZE equ (16 * 2) 62 endif 63else 64 rNum equ r0 65 LOCAL_SIZE equ (16 * 1) 66endif 67 68rState equ REG_ABI_PARAM_0 69rData equ REG_ABI_PARAM_1 70 71 72MY_sha1rnds4 macro a1, a2, imm 73 db 0fH, 03aH, 0ccH, (0c0H + a1 * 8 + a2), imm 74endm 75 76MY_SHA_INSTR macro cmd, a1, a2 77 db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2) 78endm 79 80cmd_sha1nexte equ 0c8H 81cmd_sha1msg1 equ 0c9H 82cmd_sha1msg2 equ 0caH 83 84MY_sha1nexte macro a1, a2 85 MY_SHA_INSTR cmd_sha1nexte, a1, a2 86endm 87 88MY_sha1msg1 macro a1, a2 89 MY_SHA_INSTR cmd_sha1msg1, a1, a2 90endm 91 92MY_sha1msg2 macro a1, a2 93 MY_SHA_INSTR cmd_sha1msg2, a1, a2 94endm 95 96MY_PROLOG macro 97 ifdef x64 98 if (IS_LINUX eq 0) 99 movdqa [r4 + 8], xmm6 100 movdqa [r4 + 8 + 16], xmm7 101 sub r4, LOCAL_SIZE + 8 102 movdqa [r4 ], xmm8 103 movdqa [r4 + 16], xmm9 104 endif 105 else ; x86 106 if (IS_CDECL gt 0) 107 mov rState, [r4 + REG_SIZE * 1] 108 mov rData, [r4 + REG_SIZE * 2] 109 mov rNum, [r4 + REG_SIZE * 3] 110 else ; fastcall 111 mov rNum, [r4 + REG_SIZE * 1] 112 endif 113 push r5 114 mov r5, r4 115 and r4, -16 116 sub r4, LOCAL_SIZE 117 endif 118endm 119 120MY_EPILOG macro 121 ifdef x64 122 if (IS_LINUX eq 0) 123 movdqa xmm8, [r4] 124 movdqa xmm9, [r4 + 16] 125 add r4, LOCAL_SIZE + 8 126 movdqa xmm6, [r4 + 8] 127 movdqa xmm7, [r4 + 8 + 16] 128 endif 129 else ; x86 130 mov r4, r5 131 pop r5 132 endif 133 MY_ENDP 134endm 135 136 137e0_N equ 0 138e1_N equ 1 139abcd_N equ 2 140e0_save_N equ 3 141w_regs equ 4 142 143e0 equ @CatStr(xmm, %e0_N) 144e1 equ @CatStr(xmm, %e1_N) 145abcd equ @CatStr(xmm, %abcd_N) 146e0_save equ @CatStr(xmm, %e0_save_N) 147 148 149ifdef x64 150 abcd_save equ xmm8 151 mask2 equ xmm9 152else 153 abcd_save equ [r4] 154 mask2 equ e1 155endif 156 157LOAD_MASK macro 158 movdqa mask2, XMMWORD PTR Reverse_Endian_Mask 159endm 160 161LOAD_W macro k:req 162 movdqu @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))] 163 pshufb @CatStr(xmm, %(w_regs + k)), mask2 164endm 165 166 167; pre2 can be 2 or 3 (recommended) 168pre2 equ 3 169pre1 equ (pre2 + 1) 170 171NUM_ROUNDS4 equ 20 172 173RND4 macro k 174 movdqa @CatStr(xmm, %(e0_N + ((k + 1) mod 2))), abcd 175 MY_sha1rnds4 abcd_N, (e0_N + (k mod 2)), k / 5 176 177 nextM = (w_regs + ((k + 1) mod 4)) 178 179 if (k EQ NUM_ROUNDS4 - 1) 180 nextM = e0_save_N 181 endif 182 183 MY_sha1nexte (e0_N + ((k + 1) mod 2)), nextM 184 185 if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2)) 186 pxor @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4))) 187 endif 188 189 if (k GE (4 - pre1)) AND (k LT (NUM_ROUNDS4 - pre1)) 190 MY_sha1msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4)) 191 endif 192 193 if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2)) 194 MY_sha1msg2 (w_regs + ((k + pre2) mod 4)), (w_regs + ((k + pre2 - 1) mod 4)) 195 endif 196endm 197 198 199REVERSE_STATE macro 200 ; abcd ; dcba 201 ; e0 ; 000e 202 pshufd abcd, abcd, 01bH ; abcd 203 pshufd e0, e0, 01bH ; e000 204endm 205 206 207 208 209 210MY_PROC Sha1_UpdateBlocks_HW, 3 211 MY_PROLOG 212 213 cmp rNum, 0 214 je end_c 215 216 movdqu abcd, [rState] ; dcba 217 movd e0, dword ptr [rState + 16] ; 000e 218 219 REVERSE_STATE 220 221 ifdef x64 222 LOAD_MASK 223 endif 224 225 align 16 226 nextBlock: 227 movdqa abcd_save, abcd 228 movdqa e0_save, e0 229 230 ifndef x64 231 LOAD_MASK 232 endif 233 234 LOAD_W 0 235 LOAD_W 1 236 LOAD_W 2 237 LOAD_W 3 238 239 paddd e0, @CatStr(xmm, %(w_regs)) 240 k = 0 241 rept NUM_ROUNDS4 242 RND4 k 243 k = k + 1 244 endm 245 246 paddd abcd, abcd_save 247 248 249 add rData, 64 250 sub rNum, 1 251 jnz nextBlock 252 253 REVERSE_STATE 254 255 movdqu [rState], abcd 256 movd dword ptr [rState + 16], e0 257 258 end_c: 259MY_EPILOG 260 261; _TEXT$SHA1OPT ENDS 262 263end 264