1#include "x86_arch.h" 2.text 3 4.type _mul_1x1,@function 5.align 16 6_mul_1x1: 7 subq $128+8,%rsp 8 movq $-1,%r9 9 leaq (%rax,%rax,1),%rsi 10 shrq $3,%r9 11 leaq (,%rax,4),%rdi 12 andq %rax,%r9 13 leaq (,%rax,8),%r12 14 sarq $63,%rax 15 leaq (%r9,%r9,1),%r10 16 sarq $63,%rsi 17 leaq (,%r9,4),%r11 18 andq %rbp,%rax 19 sarq $63,%rdi 20 movq %rax,%rdx 21 shlq $63,%rax 22 andq %rbp,%rsi 23 shrq $1,%rdx 24 movq %rsi,%rcx 25 shlq $62,%rsi 26 andq %rbp,%rdi 27 shrq $2,%rcx 28 xorq %rsi,%rax 29 movq %rdi,%rbx 30 shlq $61,%rdi 31 xorq %rcx,%rdx 32 shrq $3,%rbx 33 xorq %rdi,%rax 34 xorq %rbx,%rdx 35 36 movq %r9,%r13 37 movq $0,0(%rsp) 38 xorq %r10,%r13 39 movq %r9,8(%rsp) 40 movq %r11,%r14 41 movq %r10,16(%rsp) 42 xorq %r12,%r14 43 movq %r13,24(%rsp) 44 45 xorq %r11,%r9 46 movq %r11,32(%rsp) 47 xorq %r11,%r10 48 movq %r9,40(%rsp) 49 xorq %r11,%r13 50 movq %r10,48(%rsp) 51 xorq %r14,%r9 52 movq %r13,56(%rsp) 53 xorq %r14,%r10 54 55 movq %r12,64(%rsp) 56 xorq %r14,%r13 57 movq %r9,72(%rsp) 58 xorq %r11,%r9 59 movq %r10,80(%rsp) 60 xorq %r11,%r10 61 movq %r13,88(%rsp) 62 63 xorq %r11,%r13 64 movq %r14,96(%rsp) 65 movq %r8,%rsi 66 movq %r9,104(%rsp) 67 andq %rbp,%rsi 68 movq %r10,112(%rsp) 69 shrq $4,%rbp 70 movq %r13,120(%rsp) 71 movq %r8,%rdi 72 andq %rbp,%rdi 73 shrq $4,%rbp 74 75 movq (%rsp,%rsi,8),%xmm0 76 movq %r8,%rsi 77 andq %rbp,%rsi 78 shrq $4,%rbp 79 movq (%rsp,%rdi,8),%rcx 80 movq %r8,%rdi 81 movq %rcx,%rbx 82 shlq $4,%rcx 83 andq %rbp,%rdi 84 movq (%rsp,%rsi,8),%xmm1 85 shrq $60,%rbx 86 xorq %rcx,%rax 87 pslldq $1,%xmm1 88 movq %r8,%rsi 89 shrq $4,%rbp 90 xorq %rbx,%rdx 91 andq %rbp,%rsi 92 shrq $4,%rbp 93 pxor %xmm1,%xmm0 94 movq (%rsp,%rdi,8),%rcx 95 movq %r8,%rdi 96 movq %rcx,%rbx 97 shlq $12,%rcx 98 andq %rbp,%rdi 99 movq (%rsp,%rsi,8),%xmm1 100 shrq $52,%rbx 101 xorq %rcx,%rax 102 pslldq $2,%xmm1 103 movq %r8,%rsi 104 shrq $4,%rbp 105 xorq %rbx,%rdx 106 andq %rbp,%rsi 107 shrq $4,%rbp 108 pxor %xmm1,%xmm0 109 movq (%rsp,%rdi,8),%rcx 110 movq %r8,%rdi 111 movq %rcx,%rbx 112 shlq $20,%rcx 113 andq %rbp,%rdi 114 movq (%rsp,%rsi,8),%xmm1 115 shrq $44,%rbx 116 xorq %rcx,%rax 117 pslldq $3,%xmm1 118 movq %r8,%rsi 119 shrq $4,%rbp 120 xorq %rbx,%rdx 121 andq %rbp,%rsi 122 shrq $4,%rbp 123 pxor %xmm1,%xmm0 124 movq (%rsp,%rdi,8),%rcx 125 movq %r8,%rdi 126 movq %rcx,%rbx 127 shlq $28,%rcx 128 andq %rbp,%rdi 129 movq (%rsp,%rsi,8),%xmm1 130 shrq $36,%rbx 131 xorq %rcx,%rax 132 pslldq $4,%xmm1 133 movq %r8,%rsi 134 shrq $4,%rbp 135 xorq %rbx,%rdx 136 andq %rbp,%rsi 137 shrq $4,%rbp 138 pxor %xmm1,%xmm0 139 movq (%rsp,%rdi,8),%rcx 140 movq %r8,%rdi 141 movq %rcx,%rbx 142 shlq $36,%rcx 143 andq %rbp,%rdi 144 movq (%rsp,%rsi,8),%xmm1 145 shrq $28,%rbx 146 xorq %rcx,%rax 147 pslldq $5,%xmm1 148 movq %r8,%rsi 149 shrq $4,%rbp 150 xorq %rbx,%rdx 151 andq %rbp,%rsi 152 shrq $4,%rbp 153 pxor %xmm1,%xmm0 154 movq (%rsp,%rdi,8),%rcx 155 movq %r8,%rdi 156 movq %rcx,%rbx 157 shlq $44,%rcx 158 andq %rbp,%rdi 159 movq (%rsp,%rsi,8),%xmm1 160 shrq $20,%rbx 161 xorq %rcx,%rax 162 pslldq $6,%xmm1 163 movq %r8,%rsi 164 shrq $4,%rbp 165 xorq %rbx,%rdx 166 andq %rbp,%rsi 167 shrq $4,%rbp 168 pxor %xmm1,%xmm0 169 movq (%rsp,%rdi,8),%rcx 170 movq %r8,%rdi 171 movq %rcx,%rbx 172 shlq $52,%rcx 173 andq %rbp,%rdi 174 movq (%rsp,%rsi,8),%xmm1 175 shrq $12,%rbx 176 xorq %rcx,%rax 177 pslldq $7,%xmm1 178 movq %r8,%rsi 179 shrq $4,%rbp 180 xorq %rbx,%rdx 181 andq %rbp,%rsi 182 shrq $4,%rbp 183 pxor %xmm1,%xmm0 184 movq (%rsp,%rdi,8),%rcx 185 movq %rcx,%rbx 186 shlq $60,%rcx 187 movd %xmm0,%rsi 188 shrq $4,%rbx 189 xorq %rcx,%rax 190 psrldq $8,%xmm0 191 xorq %rbx,%rdx 192 movd %xmm0,%rdi 193 xorq %rsi,%rax 194 xorq %rdi,%rdx 195 196 addq $128+8,%rsp 197 retq 198.Lend_mul_1x1: 199.size _mul_1x1,.-_mul_1x1 200 201.hidden OPENSSL_ia32cap_P 202.globl bn_GF2m_mul_2x2 203.type bn_GF2m_mul_2x2,@function 204.align 16 205bn_GF2m_mul_2x2: 206 movl OPENSSL_ia32cap_P+4(%rip),%eax 207 btl $IA32CAP_BIT1_PCLMUL,%eax 208 jnc .Lvanilla_mul_2x2 209 210 movd %rsi,%xmm0 211 movd %rcx,%xmm1 212 movd %rdx,%xmm2 213 movd %r8,%xmm3 214 movdqa %xmm0,%xmm4 215 movdqa %xmm1,%xmm5 216.byte 102,15,58,68,193,0 217 pxor %xmm2,%xmm4 218 pxor %xmm3,%xmm5 219.byte 102,15,58,68,211,0 220.byte 102,15,58,68,229,0 221 xorps %xmm0,%xmm4 222 xorps %xmm2,%xmm4 223 movdqa %xmm4,%xmm5 224 pslldq $8,%xmm4 225 psrldq $8,%xmm5 226 pxor %xmm4,%xmm2 227 pxor %xmm5,%xmm0 228 movdqu %xmm2,0(%rdi) 229 movdqu %xmm0,16(%rdi) 230 retq 231 232.align 16 233.Lvanilla_mul_2x2: 234 leaq -136(%rsp),%rsp 235 movq %r14,80(%rsp) 236 movq %r13,88(%rsp) 237 movq %r12,96(%rsp) 238 movq %rbp,104(%rsp) 239 movq %rbx,112(%rsp) 240.Lbody_mul_2x2: 241 movq %rdi,32(%rsp) 242 movq %rsi,40(%rsp) 243 movq %rdx,48(%rsp) 244 movq %rcx,56(%rsp) 245 movq %r8,64(%rsp) 246 247 movq $15,%r8 248 movq %rsi,%rax 249 movq %rcx,%rbp 250 call _mul_1x1 251 movq %rax,16(%rsp) 252 movq %rdx,24(%rsp) 253 254 movq 48(%rsp),%rax 255 movq 64(%rsp),%rbp 256 call _mul_1x1 257 movq %rax,0(%rsp) 258 movq %rdx,8(%rsp) 259 260 movq 40(%rsp),%rax 261 movq 56(%rsp),%rbp 262 xorq 48(%rsp),%rax 263 xorq 64(%rsp),%rbp 264 call _mul_1x1 265 movq 0(%rsp),%rbx 266 movq 8(%rsp),%rcx 267 movq 16(%rsp),%rdi 268 movq 24(%rsp),%rsi 269 movq 32(%rsp),%rbp 270 271 xorq %rdx,%rax 272 xorq %rcx,%rdx 273 xorq %rbx,%rax 274 movq %rbx,0(%rbp) 275 xorq %rdi,%rdx 276 movq %rsi,24(%rbp) 277 xorq %rsi,%rax 278 xorq %rsi,%rdx 279 xorq %rdx,%rax 280 movq %rdx,16(%rbp) 281 movq %rax,8(%rbp) 282 283 movq 80(%rsp),%r14 284 movq 88(%rsp),%r13 285 movq 96(%rsp),%r12 286 movq 104(%rsp),%rbp 287 movq 112(%rsp),%rbx 288 leaq 136(%rsp),%rsp 289 retq 290.Lend_mul_2x2: 291.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 292.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 293.align 16 294#if defined(HAVE_GNU_STACK) 295.section .note.GNU-stack,"",%progbits 296#endif 297