1 2# qhasm: int64 input_0 3 4# qhasm: int64 input_1 5 6# qhasm: int64 input_2 7 8# qhasm: int64 input_3 9 10# qhasm: int64 input_4 11 12# qhasm: int64 input_5 13 14# qhasm: stack64 input_6 15 16# qhasm: stack64 input_7 17 18# qhasm: int64 caller_r11 19 20# qhasm: int64 caller_r12 21 22# qhasm: int64 caller_r13 23 24# qhasm: int64 caller_r14 25 26# qhasm: int64 caller_r15 27 28# qhasm: int64 caller_rbx 29 30# qhasm: int64 caller_rbp 31 32# qhasm: int64 b64 33 34# qhasm: int64 synd 35 36# qhasm: int64 addr 37 38# qhasm: int64 c 39 40# qhasm: int64 c_all 41 42# qhasm: int64 row 43 44# qhasm: int64 p 45 46# qhasm: int64 e 47 48# qhasm: int64 s 49 50# qhasm: reg256 pp 51 52# qhasm: reg256 ee 53 54# qhasm: reg256 ss 55 56# qhasm: int64 buf_ptr 57 58# qhasm: stack256 buf 59 60# qhasm: enter syndrome_asm 61.p2align 5 62.global _PQCLEAN_MCELIECE6688128F_AVX_syndrome_asm 63.global PQCLEAN_MCELIECE6688128F_AVX_syndrome_asm 64_PQCLEAN_MCELIECE6688128F_AVX_syndrome_asm: 65PQCLEAN_MCELIECE6688128F_AVX_syndrome_asm: 66mov %rsp,%r11 67and $31,%r11 68add $32,%r11 69sub %r11,%rsp 70 71# qhasm: input_1 += 1044364 72# asm 1: add $1044364,<input_1=int64#2 73# asm 2: add $1044364,<input_1=%rsi 74add $1044364,%rsi 75 76# qhasm: buf_ptr = &buf 77# asm 1: leaq <buf=stack256#1,>buf_ptr=int64#4 78# asm 2: leaq <buf=0(%rsp),>buf_ptr=%rcx 79leaq 0(%rsp),%rcx 80 81# qhasm: row = 1664 82# asm 1: mov $1664,>row=int64#5 83# asm 2: mov $1664,>row=%r8 84mov $1664,%r8 85 86# qhasm: loop: 87._loop: 88 89# qhasm: row -= 1 90# asm 1: sub $1,<row=int64#5 91# asm 2: sub $1,<row=%r8 92sub $1,%r8 93 94# qhasm: ss = mem256[ input_1 + 0 ] 95# asm 1: vmovupd 0(<input_1=int64#2),>ss=reg256#1 96# asm 2: vmovupd 0(<input_1=%rsi),>ss=%ymm0 97vmovupd 0(%rsi),%ymm0 98 99# qhasm: ee = mem256[ input_2 + 208 ] 100# asm 1: vmovupd 208(<input_2=int64#3),>ee=reg256#2 101# asm 2: vmovupd 208(<input_2=%rdx),>ee=%ymm1 102vmovupd 208(%rdx),%ymm1 103 104# qhasm: ss &= ee 105# asm 1: vpand <ee=reg256#2,<ss=reg256#1,<ss=reg256#1 106# asm 2: vpand <ee=%ymm1,<ss=%ymm0,<ss=%ymm0 107vpand %ymm1,%ymm0,%ymm0 108 109# qhasm: pp = mem256[ input_1 + 32 ] 110# asm 1: vmovupd 32(<input_1=int64#2),>pp=reg256#2 111# asm 2: vmovupd 32(<input_1=%rsi),>pp=%ymm1 112vmovupd 32(%rsi),%ymm1 113 114# qhasm: ee = mem256[ input_2 + 240 ] 115# asm 1: vmovupd 240(<input_2=int64#3),>ee=reg256#3 116# asm 2: vmovupd 240(<input_2=%rdx),>ee=%ymm2 117vmovupd 240(%rdx),%ymm2 118 119# qhasm: pp &= ee 120# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 121# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 122vpand %ymm2,%ymm1,%ymm1 123 124# qhasm: ss ^= pp 125# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 126# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 127vpxor %ymm1,%ymm0,%ymm0 128 129# qhasm: pp = mem256[ input_1 + 64 ] 130# asm 1: vmovupd 64(<input_1=int64#2),>pp=reg256#2 131# asm 2: vmovupd 64(<input_1=%rsi),>pp=%ymm1 132vmovupd 64(%rsi),%ymm1 133 134# qhasm: ee = mem256[ input_2 + 272 ] 135# asm 1: vmovupd 272(<input_2=int64#3),>ee=reg256#3 136# asm 2: vmovupd 272(<input_2=%rdx),>ee=%ymm2 137vmovupd 272(%rdx),%ymm2 138 139# qhasm: pp &= ee 140# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 141# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 142vpand %ymm2,%ymm1,%ymm1 143 144# qhasm: ss ^= pp 145# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 146# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 147vpxor %ymm1,%ymm0,%ymm0 148 149# qhasm: pp = mem256[ input_1 + 96 ] 150# asm 1: vmovupd 96(<input_1=int64#2),>pp=reg256#2 151# asm 2: vmovupd 96(<input_1=%rsi),>pp=%ymm1 152vmovupd 96(%rsi),%ymm1 153 154# qhasm: ee = mem256[ input_2 + 304 ] 155# asm 1: vmovupd 304(<input_2=int64#3),>ee=reg256#3 156# asm 2: vmovupd 304(<input_2=%rdx),>ee=%ymm2 157vmovupd 304(%rdx),%ymm2 158 159# qhasm: pp &= ee 160# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 161# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 162vpand %ymm2,%ymm1,%ymm1 163 164# qhasm: ss ^= pp 165# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 166# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 167vpxor %ymm1,%ymm0,%ymm0 168 169# qhasm: pp = mem256[ input_1 + 128 ] 170# asm 1: vmovupd 128(<input_1=int64#2),>pp=reg256#2 171# asm 2: vmovupd 128(<input_1=%rsi),>pp=%ymm1 172vmovupd 128(%rsi),%ymm1 173 174# qhasm: ee = mem256[ input_2 + 336 ] 175# asm 1: vmovupd 336(<input_2=int64#3),>ee=reg256#3 176# asm 2: vmovupd 336(<input_2=%rdx),>ee=%ymm2 177vmovupd 336(%rdx),%ymm2 178 179# qhasm: pp &= ee 180# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 181# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 182vpand %ymm2,%ymm1,%ymm1 183 184# qhasm: ss ^= pp 185# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 186# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 187vpxor %ymm1,%ymm0,%ymm0 188 189# qhasm: pp = mem256[ input_1 + 160 ] 190# asm 1: vmovupd 160(<input_1=int64#2),>pp=reg256#2 191# asm 2: vmovupd 160(<input_1=%rsi),>pp=%ymm1 192vmovupd 160(%rsi),%ymm1 193 194# qhasm: ee = mem256[ input_2 + 368 ] 195# asm 1: vmovupd 368(<input_2=int64#3),>ee=reg256#3 196# asm 2: vmovupd 368(<input_2=%rdx),>ee=%ymm2 197vmovupd 368(%rdx),%ymm2 198 199# qhasm: pp &= ee 200# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 201# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 202vpand %ymm2,%ymm1,%ymm1 203 204# qhasm: ss ^= pp 205# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 206# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 207vpxor %ymm1,%ymm0,%ymm0 208 209# qhasm: pp = mem256[ input_1 + 192 ] 210# asm 1: vmovupd 192(<input_1=int64#2),>pp=reg256#2 211# asm 2: vmovupd 192(<input_1=%rsi),>pp=%ymm1 212vmovupd 192(%rsi),%ymm1 213 214# qhasm: ee = mem256[ input_2 + 400 ] 215# asm 1: vmovupd 400(<input_2=int64#3),>ee=reg256#3 216# asm 2: vmovupd 400(<input_2=%rdx),>ee=%ymm2 217vmovupd 400(%rdx),%ymm2 218 219# qhasm: pp &= ee 220# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 221# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 222vpand %ymm2,%ymm1,%ymm1 223 224# qhasm: ss ^= pp 225# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 226# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 227vpxor %ymm1,%ymm0,%ymm0 228 229# qhasm: pp = mem256[ input_1 + 224 ] 230# asm 1: vmovupd 224(<input_1=int64#2),>pp=reg256#2 231# asm 2: vmovupd 224(<input_1=%rsi),>pp=%ymm1 232vmovupd 224(%rsi),%ymm1 233 234# qhasm: ee = mem256[ input_2 + 432 ] 235# asm 1: vmovupd 432(<input_2=int64#3),>ee=reg256#3 236# asm 2: vmovupd 432(<input_2=%rdx),>ee=%ymm2 237vmovupd 432(%rdx),%ymm2 238 239# qhasm: pp &= ee 240# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 241# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 242vpand %ymm2,%ymm1,%ymm1 243 244# qhasm: ss ^= pp 245# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 246# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 247vpxor %ymm1,%ymm0,%ymm0 248 249# qhasm: pp = mem256[ input_1 + 256 ] 250# asm 1: vmovupd 256(<input_1=int64#2),>pp=reg256#2 251# asm 2: vmovupd 256(<input_1=%rsi),>pp=%ymm1 252vmovupd 256(%rsi),%ymm1 253 254# qhasm: ee = mem256[ input_2 + 464 ] 255# asm 1: vmovupd 464(<input_2=int64#3),>ee=reg256#3 256# asm 2: vmovupd 464(<input_2=%rdx),>ee=%ymm2 257vmovupd 464(%rdx),%ymm2 258 259# qhasm: pp &= ee 260# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 261# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 262vpand %ymm2,%ymm1,%ymm1 263 264# qhasm: ss ^= pp 265# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 266# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 267vpxor %ymm1,%ymm0,%ymm0 268 269# qhasm: pp = mem256[ input_1 + 288 ] 270# asm 1: vmovupd 288(<input_1=int64#2),>pp=reg256#2 271# asm 2: vmovupd 288(<input_1=%rsi),>pp=%ymm1 272vmovupd 288(%rsi),%ymm1 273 274# qhasm: ee = mem256[ input_2 + 496 ] 275# asm 1: vmovupd 496(<input_2=int64#3),>ee=reg256#3 276# asm 2: vmovupd 496(<input_2=%rdx),>ee=%ymm2 277vmovupd 496(%rdx),%ymm2 278 279# qhasm: pp &= ee 280# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 281# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 282vpand %ymm2,%ymm1,%ymm1 283 284# qhasm: ss ^= pp 285# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 286# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 287vpxor %ymm1,%ymm0,%ymm0 288 289# qhasm: pp = mem256[ input_1 + 320 ] 290# asm 1: vmovupd 320(<input_1=int64#2),>pp=reg256#2 291# asm 2: vmovupd 320(<input_1=%rsi),>pp=%ymm1 292vmovupd 320(%rsi),%ymm1 293 294# qhasm: ee = mem256[ input_2 + 528 ] 295# asm 1: vmovupd 528(<input_2=int64#3),>ee=reg256#3 296# asm 2: vmovupd 528(<input_2=%rdx),>ee=%ymm2 297vmovupd 528(%rdx),%ymm2 298 299# qhasm: pp &= ee 300# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 301# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 302vpand %ymm2,%ymm1,%ymm1 303 304# qhasm: ss ^= pp 305# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 306# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 307vpxor %ymm1,%ymm0,%ymm0 308 309# qhasm: pp = mem256[ input_1 + 352 ] 310# asm 1: vmovupd 352(<input_1=int64#2),>pp=reg256#2 311# asm 2: vmovupd 352(<input_1=%rsi),>pp=%ymm1 312vmovupd 352(%rsi),%ymm1 313 314# qhasm: ee = mem256[ input_2 + 560 ] 315# asm 1: vmovupd 560(<input_2=int64#3),>ee=reg256#3 316# asm 2: vmovupd 560(<input_2=%rdx),>ee=%ymm2 317vmovupd 560(%rdx),%ymm2 318 319# qhasm: pp &= ee 320# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 321# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 322vpand %ymm2,%ymm1,%ymm1 323 324# qhasm: ss ^= pp 325# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 326# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 327vpxor %ymm1,%ymm0,%ymm0 328 329# qhasm: pp = mem256[ input_1 + 384 ] 330# asm 1: vmovupd 384(<input_1=int64#2),>pp=reg256#2 331# asm 2: vmovupd 384(<input_1=%rsi),>pp=%ymm1 332vmovupd 384(%rsi),%ymm1 333 334# qhasm: ee = mem256[ input_2 + 592 ] 335# asm 1: vmovupd 592(<input_2=int64#3),>ee=reg256#3 336# asm 2: vmovupd 592(<input_2=%rdx),>ee=%ymm2 337vmovupd 592(%rdx),%ymm2 338 339# qhasm: pp &= ee 340# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 341# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 342vpand %ymm2,%ymm1,%ymm1 343 344# qhasm: ss ^= pp 345# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 346# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 347vpxor %ymm1,%ymm0,%ymm0 348 349# qhasm: pp = mem256[ input_1 + 416 ] 350# asm 1: vmovupd 416(<input_1=int64#2),>pp=reg256#2 351# asm 2: vmovupd 416(<input_1=%rsi),>pp=%ymm1 352vmovupd 416(%rsi),%ymm1 353 354# qhasm: ee = mem256[ input_2 + 624 ] 355# asm 1: vmovupd 624(<input_2=int64#3),>ee=reg256#3 356# asm 2: vmovupd 624(<input_2=%rdx),>ee=%ymm2 357vmovupd 624(%rdx),%ymm2 358 359# qhasm: pp &= ee 360# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 361# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 362vpand %ymm2,%ymm1,%ymm1 363 364# qhasm: ss ^= pp 365# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 366# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 367vpxor %ymm1,%ymm0,%ymm0 368 369# qhasm: pp = mem256[ input_1 + 448 ] 370# asm 1: vmovupd 448(<input_1=int64#2),>pp=reg256#2 371# asm 2: vmovupd 448(<input_1=%rsi),>pp=%ymm1 372vmovupd 448(%rsi),%ymm1 373 374# qhasm: ee = mem256[ input_2 + 656 ] 375# asm 1: vmovupd 656(<input_2=int64#3),>ee=reg256#3 376# asm 2: vmovupd 656(<input_2=%rdx),>ee=%ymm2 377vmovupd 656(%rdx),%ymm2 378 379# qhasm: pp &= ee 380# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 381# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 382vpand %ymm2,%ymm1,%ymm1 383 384# qhasm: ss ^= pp 385# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 386# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 387vpxor %ymm1,%ymm0,%ymm0 388 389# qhasm: pp = mem256[ input_1 + 480 ] 390# asm 1: vmovupd 480(<input_1=int64#2),>pp=reg256#2 391# asm 2: vmovupd 480(<input_1=%rsi),>pp=%ymm1 392vmovupd 480(%rsi),%ymm1 393 394# qhasm: ee = mem256[ input_2 + 688 ] 395# asm 1: vmovupd 688(<input_2=int64#3),>ee=reg256#3 396# asm 2: vmovupd 688(<input_2=%rdx),>ee=%ymm2 397vmovupd 688(%rdx),%ymm2 398 399# qhasm: pp &= ee 400# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 401# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 402vpand %ymm2,%ymm1,%ymm1 403 404# qhasm: ss ^= pp 405# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 406# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 407vpxor %ymm1,%ymm0,%ymm0 408 409# qhasm: pp = mem256[ input_1 + 512 ] 410# asm 1: vmovupd 512(<input_1=int64#2),>pp=reg256#2 411# asm 2: vmovupd 512(<input_1=%rsi),>pp=%ymm1 412vmovupd 512(%rsi),%ymm1 413 414# qhasm: ee = mem256[ input_2 + 720 ] 415# asm 1: vmovupd 720(<input_2=int64#3),>ee=reg256#3 416# asm 2: vmovupd 720(<input_2=%rdx),>ee=%ymm2 417vmovupd 720(%rdx),%ymm2 418 419# qhasm: pp &= ee 420# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 421# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 422vpand %ymm2,%ymm1,%ymm1 423 424# qhasm: ss ^= pp 425# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 426# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 427vpxor %ymm1,%ymm0,%ymm0 428 429# qhasm: pp = mem256[ input_1 + 544 ] 430# asm 1: vmovupd 544(<input_1=int64#2),>pp=reg256#2 431# asm 2: vmovupd 544(<input_1=%rsi),>pp=%ymm1 432vmovupd 544(%rsi),%ymm1 433 434# qhasm: ee = mem256[ input_2 + 752 ] 435# asm 1: vmovupd 752(<input_2=int64#3),>ee=reg256#3 436# asm 2: vmovupd 752(<input_2=%rdx),>ee=%ymm2 437vmovupd 752(%rdx),%ymm2 438 439# qhasm: pp &= ee 440# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 441# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 442vpand %ymm2,%ymm1,%ymm1 443 444# qhasm: ss ^= pp 445# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 446# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 447vpxor %ymm1,%ymm0,%ymm0 448 449# qhasm: pp = mem256[ input_1 + 576 ] 450# asm 1: vmovupd 576(<input_1=int64#2),>pp=reg256#2 451# asm 2: vmovupd 576(<input_1=%rsi),>pp=%ymm1 452vmovupd 576(%rsi),%ymm1 453 454# qhasm: ee = mem256[ input_2 + 784 ] 455# asm 1: vmovupd 784(<input_2=int64#3),>ee=reg256#3 456# asm 2: vmovupd 784(<input_2=%rdx),>ee=%ymm2 457vmovupd 784(%rdx),%ymm2 458 459# qhasm: pp &= ee 460# asm 1: vpand <ee=reg256#3,<pp=reg256#2,<pp=reg256#2 461# asm 2: vpand <ee=%ymm2,<pp=%ymm1,<pp=%ymm1 462vpand %ymm2,%ymm1,%ymm1 463 464# qhasm: ss ^= pp 465# asm 1: vpxor <pp=reg256#2,<ss=reg256#1,<ss=reg256#1 466# asm 2: vpxor <pp=%ymm1,<ss=%ymm0,<ss=%ymm0 467vpxor %ymm1,%ymm0,%ymm0 468 469# qhasm: buf = ss 470# asm 1: vmovapd <ss=reg256#1,>buf=stack256#1 471# asm 2: vmovapd <ss=%ymm0,>buf=0(%rsp) 472vmovapd %ymm0,0(%rsp) 473 474# qhasm: s = mem64[input_1 + 608] 475# asm 1: movq 608(<input_1=int64#2),>s=int64#6 476# asm 2: movq 608(<input_1=%rsi),>s=%r9 477movq 608(%rsi),%r9 478 479# qhasm: e = mem64[input_2 + 816] 480# asm 1: movq 816(<input_2=int64#3),>e=int64#7 481# asm 2: movq 816(<input_2=%rdx),>e=%rax 482movq 816(%rdx),%rax 483 484# qhasm: s &= e 485# asm 1: and <e=int64#7,<s=int64#6 486# asm 2: and <e=%rax,<s=%r9 487and %rax,%r9 488 489# qhasm: p = mem64[input_1 + 616] 490# asm 1: movq 616(<input_1=int64#2),>p=int64#7 491# asm 2: movq 616(<input_1=%rsi),>p=%rax 492movq 616(%rsi),%rax 493 494# qhasm: e = mem64[input_2 + 824] 495# asm 1: movq 824(<input_2=int64#3),>e=int64#8 496# asm 2: movq 824(<input_2=%rdx),>e=%r10 497movq 824(%rdx),%r10 498 499# qhasm: p &= e 500# asm 1: and <e=int64#8,<p=int64#7 501# asm 2: and <e=%r10,<p=%rax 502and %r10,%rax 503 504# qhasm: s ^= p 505# asm 1: xor <p=int64#7,<s=int64#6 506# asm 2: xor <p=%rax,<s=%r9 507xor %rax,%r9 508 509# qhasm: p = *(uint32 *)(input_1 + 624) 510# asm 1: movl 624(<input_1=int64#2),>p=int64#7d 511# asm 2: movl 624(<input_1=%rsi),>p=%eax 512movl 624(%rsi),%eax 513 514# qhasm: e = *(uint32 *)(input_2 + 832) 515# asm 1: movl 832(<input_2=int64#3),>e=int64#8d 516# asm 2: movl 832(<input_2=%rdx),>e=%r10d 517movl 832(%rdx),%r10d 518 519# qhasm: p &= e 520# asm 1: and <e=int64#8,<p=int64#7 521# asm 2: and <e=%r10,<p=%rax 522and %r10,%rax 523 524# qhasm: s ^= p 525# asm 1: xor <p=int64#7,<s=int64#6 526# asm 2: xor <p=%rax,<s=%r9 527xor %rax,%r9 528 529# qhasm: c_all = count(s) 530# asm 1: popcnt <s=int64#6, >c_all=int64#6 531# asm 2: popcnt <s=%r9, >c_all=%r9 532popcnt %r9, %r9 533 534# qhasm: b64 = mem64[ buf_ptr + 0 ] 535# asm 1: movq 0(<buf_ptr=int64#4),>b64=int64#7 536# asm 2: movq 0(<buf_ptr=%rcx),>b64=%rax 537movq 0(%rcx),%rax 538 539# qhasm: c = count(b64) 540# asm 1: popcnt <b64=int64#7, >c=int64#7 541# asm 2: popcnt <b64=%rax, >c=%rax 542popcnt %rax, %rax 543 544# qhasm: c_all ^= c 545# asm 1: xor <c=int64#7,<c_all=int64#6 546# asm 2: xor <c=%rax,<c_all=%r9 547xor %rax,%r9 548 549# qhasm: b64 = mem64[ buf_ptr + 8 ] 550# asm 1: movq 8(<buf_ptr=int64#4),>b64=int64#7 551# asm 2: movq 8(<buf_ptr=%rcx),>b64=%rax 552movq 8(%rcx),%rax 553 554# qhasm: c = count(b64) 555# asm 1: popcnt <b64=int64#7, >c=int64#7 556# asm 2: popcnt <b64=%rax, >c=%rax 557popcnt %rax, %rax 558 559# qhasm: c_all ^= c 560# asm 1: xor <c=int64#7,<c_all=int64#6 561# asm 2: xor <c=%rax,<c_all=%r9 562xor %rax,%r9 563 564# qhasm: b64 = mem64[ buf_ptr + 16 ] 565# asm 1: movq 16(<buf_ptr=int64#4),>b64=int64#7 566# asm 2: movq 16(<buf_ptr=%rcx),>b64=%rax 567movq 16(%rcx),%rax 568 569# qhasm: c = count(b64) 570# asm 1: popcnt <b64=int64#7, >c=int64#7 571# asm 2: popcnt <b64=%rax, >c=%rax 572popcnt %rax, %rax 573 574# qhasm: c_all ^= c 575# asm 1: xor <c=int64#7,<c_all=int64#6 576# asm 2: xor <c=%rax,<c_all=%r9 577xor %rax,%r9 578 579# qhasm: b64 = mem64[ buf_ptr + 24 ] 580# asm 1: movq 24(<buf_ptr=int64#4),>b64=int64#7 581# asm 2: movq 24(<buf_ptr=%rcx),>b64=%rax 582movq 24(%rcx),%rax 583 584# qhasm: c = count(b64) 585# asm 1: popcnt <b64=int64#7, >c=int64#7 586# asm 2: popcnt <b64=%rax, >c=%rax 587popcnt %rax, %rax 588 589# qhasm: c_all ^= c 590# asm 1: xor <c=int64#7,<c_all=int64#6 591# asm 2: xor <c=%rax,<c_all=%r9 592xor %rax,%r9 593 594# qhasm: addr = row 595# asm 1: mov <row=int64#5,>addr=int64#7 596# asm 2: mov <row=%r8,>addr=%rax 597mov %r8,%rax 598 599# qhasm: (uint64) addr >>= 3 600# asm 1: shr $3,<addr=int64#7 601# asm 2: shr $3,<addr=%rax 602shr $3,%rax 603 604# qhasm: addr += input_0 605# asm 1: add <input_0=int64#1,<addr=int64#7 606# asm 2: add <input_0=%rdi,<addr=%rax 607add %rdi,%rax 608 609# qhasm: synd = *(uint8 *) (addr + 0) 610# asm 1: movzbq 0(<addr=int64#7),>synd=int64#8 611# asm 2: movzbq 0(<addr=%rax),>synd=%r10 612movzbq 0(%rax),%r10 613 614# qhasm: synd <<= 1 615# asm 1: shl $1,<synd=int64#8 616# asm 2: shl $1,<synd=%r10 617shl $1,%r10 618 619# qhasm: (uint32) c_all &= 1 620# asm 1: and $1,<c_all=int64#6d 621# asm 2: and $1,<c_all=%r9d 622and $1,%r9d 623 624# qhasm: synd |= c_all 625# asm 1: or <c_all=int64#6,<synd=int64#8 626# asm 2: or <c_all=%r9,<synd=%r10 627or %r9,%r10 628 629# qhasm: *(uint8 *) (addr + 0) = synd 630# asm 1: movb <synd=int64#8b,0(<addr=int64#7) 631# asm 2: movb <synd=%r10b,0(<addr=%rax) 632movb %r10b,0(%rax) 633 634# qhasm: input_1 -= 628 635# asm 1: sub $628,<input_1=int64#2 636# asm 2: sub $628,<input_1=%rsi 637sub $628,%rsi 638 639# qhasm: =? row-0 640# asm 1: cmp $0,<row=int64#5 641# asm 2: cmp $0,<row=%r8 642cmp $0,%r8 643# comment:fp stack unchanged by jump 644 645# qhasm: goto loop if != 646jne ._loop 647 648# qhasm: ss = mem256[ input_0 + 0 ] 649# asm 1: vmovupd 0(<input_0=int64#1),>ss=reg256#1 650# asm 2: vmovupd 0(<input_0=%rdi),>ss=%ymm0 651vmovupd 0(%rdi),%ymm0 652 653# qhasm: ee = mem256[ input_2 + 0 ] 654# asm 1: vmovupd 0(<input_2=int64#3),>ee=reg256#2 655# asm 2: vmovupd 0(<input_2=%rdx),>ee=%ymm1 656vmovupd 0(%rdx),%ymm1 657 658# qhasm: ss ^= ee 659# asm 1: vpxor <ee=reg256#2,<ss=reg256#1,<ss=reg256#1 660# asm 2: vpxor <ee=%ymm1,<ss=%ymm0,<ss=%ymm0 661vpxor %ymm1,%ymm0,%ymm0 662 663# qhasm: mem256[ input_0 + 0 ] = ss 664# asm 1: vmovupd <ss=reg256#1,0(<input_0=int64#1) 665# asm 2: vmovupd <ss=%ymm0,0(<input_0=%rdi) 666vmovupd %ymm0,0(%rdi) 667 668# qhasm: ss = mem256[ input_0 + 32 ] 669# asm 1: vmovupd 32(<input_0=int64#1),>ss=reg256#1 670# asm 2: vmovupd 32(<input_0=%rdi),>ss=%ymm0 671vmovupd 32(%rdi),%ymm0 672 673# qhasm: ee = mem256[ input_2 + 32 ] 674# asm 1: vmovupd 32(<input_2=int64#3),>ee=reg256#2 675# asm 2: vmovupd 32(<input_2=%rdx),>ee=%ymm1 676vmovupd 32(%rdx),%ymm1 677 678# qhasm: ss ^= ee 679# asm 1: vpxor <ee=reg256#2,<ss=reg256#1,<ss=reg256#1 680# asm 2: vpxor <ee=%ymm1,<ss=%ymm0,<ss=%ymm0 681vpxor %ymm1,%ymm0,%ymm0 682 683# qhasm: mem256[ input_0 + 32 ] = ss 684# asm 1: vmovupd <ss=reg256#1,32(<input_0=int64#1) 685# asm 2: vmovupd <ss=%ymm0,32(<input_0=%rdi) 686vmovupd %ymm0,32(%rdi) 687 688# qhasm: ss = mem256[ input_0 + 64 ] 689# asm 1: vmovupd 64(<input_0=int64#1),>ss=reg256#1 690# asm 2: vmovupd 64(<input_0=%rdi),>ss=%ymm0 691vmovupd 64(%rdi),%ymm0 692 693# qhasm: ee = mem256[ input_2 + 64 ] 694# asm 1: vmovupd 64(<input_2=int64#3),>ee=reg256#2 695# asm 2: vmovupd 64(<input_2=%rdx),>ee=%ymm1 696vmovupd 64(%rdx),%ymm1 697 698# qhasm: ss ^= ee 699# asm 1: vpxor <ee=reg256#2,<ss=reg256#1,<ss=reg256#1 700# asm 2: vpxor <ee=%ymm1,<ss=%ymm0,<ss=%ymm0 701vpxor %ymm1,%ymm0,%ymm0 702 703# qhasm: mem256[ input_0 + 64 ] = ss 704# asm 1: vmovupd <ss=reg256#1,64(<input_0=int64#1) 705# asm 2: vmovupd <ss=%ymm0,64(<input_0=%rdi) 706vmovupd %ymm0,64(%rdi) 707 708# qhasm: ss = mem256[ input_0 + 96 ] 709# asm 1: vmovupd 96(<input_0=int64#1),>ss=reg256#1 710# asm 2: vmovupd 96(<input_0=%rdi),>ss=%ymm0 711vmovupd 96(%rdi),%ymm0 712 713# qhasm: ee = mem256[ input_2 + 96 ] 714# asm 1: vmovupd 96(<input_2=int64#3),>ee=reg256#2 715# asm 2: vmovupd 96(<input_2=%rdx),>ee=%ymm1 716vmovupd 96(%rdx),%ymm1 717 718# qhasm: ss ^= ee 719# asm 1: vpxor <ee=reg256#2,<ss=reg256#1,<ss=reg256#1 720# asm 2: vpxor <ee=%ymm1,<ss=%ymm0,<ss=%ymm0 721vpxor %ymm1,%ymm0,%ymm0 722 723# qhasm: mem256[ input_0 + 96 ] = ss 724# asm 1: vmovupd <ss=reg256#1,96(<input_0=int64#1) 725# asm 2: vmovupd <ss=%ymm0,96(<input_0=%rdi) 726vmovupd %ymm0,96(%rdi) 727 728# qhasm: ss = mem256[ input_0 + 128 ] 729# asm 1: vmovupd 128(<input_0=int64#1),>ss=reg256#1 730# asm 2: vmovupd 128(<input_0=%rdi),>ss=%ymm0 731vmovupd 128(%rdi),%ymm0 732 733# qhasm: ee = mem256[ input_2 + 128 ] 734# asm 1: vmovupd 128(<input_2=int64#3),>ee=reg256#2 735# asm 2: vmovupd 128(<input_2=%rdx),>ee=%ymm1 736vmovupd 128(%rdx),%ymm1 737 738# qhasm: ss ^= ee 739# asm 1: vpxor <ee=reg256#2,<ss=reg256#1,<ss=reg256#1 740# asm 2: vpxor <ee=%ymm1,<ss=%ymm0,<ss=%ymm0 741vpxor %ymm1,%ymm0,%ymm0 742 743# qhasm: mem256[ input_0 + 128 ] = ss 744# asm 1: vmovupd <ss=reg256#1,128(<input_0=int64#1) 745# asm 2: vmovupd <ss=%ymm0,128(<input_0=%rdi) 746vmovupd %ymm0,128(%rdi) 747 748# qhasm: ss = mem256[ input_0 + 160 ] 749# asm 1: vmovupd 160(<input_0=int64#1),>ss=reg256#1 750# asm 2: vmovupd 160(<input_0=%rdi),>ss=%ymm0 751vmovupd 160(%rdi),%ymm0 752 753# qhasm: ee = mem256[ input_2 + 160 ] 754# asm 1: vmovupd 160(<input_2=int64#3),>ee=reg256#2 755# asm 2: vmovupd 160(<input_2=%rdx),>ee=%ymm1 756vmovupd 160(%rdx),%ymm1 757 758# qhasm: ss ^= ee 759# asm 1: vpxor <ee=reg256#2,<ss=reg256#1,<ss=reg256#1 760# asm 2: vpxor <ee=%ymm1,<ss=%ymm0,<ss=%ymm0 761vpxor %ymm1,%ymm0,%ymm0 762 763# qhasm: mem256[ input_0 + 160 ] = ss 764# asm 1: vmovupd <ss=reg256#1,160(<input_0=int64#1) 765# asm 2: vmovupd <ss=%ymm0,160(<input_0=%rdi) 766vmovupd %ymm0,160(%rdi) 767 768# qhasm: s = mem64[ input_0 + 192 ] 769# asm 1: movq 192(<input_0=int64#1),>s=int64#2 770# asm 2: movq 192(<input_0=%rdi),>s=%rsi 771movq 192(%rdi),%rsi 772 773# qhasm: e = mem64[ input_2 + 192 ] 774# asm 1: movq 192(<input_2=int64#3),>e=int64#4 775# asm 2: movq 192(<input_2=%rdx),>e=%rcx 776movq 192(%rdx),%rcx 777 778# qhasm: s ^= e 779# asm 1: xor <e=int64#4,<s=int64#2 780# asm 2: xor <e=%rcx,<s=%rsi 781xor %rcx,%rsi 782 783# qhasm: mem64[ input_0 + 192 ] = s 784# asm 1: movq <s=int64#2,192(<input_0=int64#1) 785# asm 2: movq <s=%rsi,192(<input_0=%rdi) 786movq %rsi,192(%rdi) 787 788# qhasm: s = mem64[ input_0 + 200 ] 789# asm 1: movq 200(<input_0=int64#1),>s=int64#2 790# asm 2: movq 200(<input_0=%rdi),>s=%rsi 791movq 200(%rdi),%rsi 792 793# qhasm: e = mem64[ input_2 + 200 ] 794# asm 1: movq 200(<input_2=int64#3),>e=int64#3 795# asm 2: movq 200(<input_2=%rdx),>e=%rdx 796movq 200(%rdx),%rdx 797 798# qhasm: s ^= e 799# asm 1: xor <e=int64#3,<s=int64#2 800# asm 2: xor <e=%rdx,<s=%rsi 801xor %rdx,%rsi 802 803# qhasm: mem64[ input_0 + 200 ] = s 804# asm 1: movq <s=int64#2,200(<input_0=int64#1) 805# asm 2: movq <s=%rsi,200(<input_0=%rdi) 806movq %rsi,200(%rdi) 807 808# qhasm: return 809add %r11,%rsp 810ret 811