1# version 20090331 2# Peter Schwabe & Neil Costigan 3# Public domain. 4 5vec128 retp 6vec128 skp 7vec128 xp 8 9input retp 10input skp 11input xp 12 13vec128 sk 14 15vec128 x1_03 16vec128 x1_47 17vec128 x1_811 18vec128 x1_1215 19vec128 x1_1619 20 21vec128 x2_03 22vec128 x2_47 23vec128 x2_811 24vec128 x2_1215 25vec128 x2_1619 26vec128 z2_03 27vec128 z2_47 28vec128 z2_811 29vec128 z2_1215 30vec128 z2_1619 31 32vec128 x3_03 33vec128 x3_47 34vec128 x3_811 35vec128 x3_1215 36vec128 x3_1619 37vec128 z3_03 38vec128 z3_47 39vec128 z3_811 40vec128 z3_1215 41vec128 z3_1619 42 43vec128 z3_2023 44vec128 z3_2427 45vec128 z3_2831 46vec128 z3_3235 47vec128 z3_3639 48 49vec128 a_03 50vec128 a_47 51vec128 a_811 52vec128 a_1215 53vec128 a_1619 54 55vec128 b_03 56vec128 b_47 57vec128 b_811 58vec128 b_1215 59vec128 b_1619 60 61vec128 c_03 62vec128 c_47 63vec128 c_811 64vec128 c_1215 65vec128 c_1619 66 67vec128 d_03 68vec128 d_47 69vec128 d_811 70vec128 d_1215 71vec128 d_1619 72 73vec128 acbd0 74vec128 acbd1 75vec128 acbd2 76vec128 acbd3 77vec128 acbd4 78vec128 acbd5 79vec128 acbd6 80vec128 acbd7 81vec128 acbd8 82vec128 acbd9 83vec128 acbd10 84vec128 acbd11 85vec128 acbd12 86vec128 acbd13 87vec128 acbd14 88vec128 acbd15 89vec128 acbd16 90vec128 acbd17 91vec128 acbd18 92vec128 acbd19 93 94vec128 t20 95vec128 t21 96vec128 t22 97vec128 t23 98vec128 t24 99vec128 t25 100vec128 t26 101vec128 t27 102vec128 t28 103vec128 t29 104vec128 t210 105vec128 t211 106vec128 t212 107vec128 t213 108vec128 t214 109vec128 t215 110vec128 t216 111vec128 t217 112vec128 t218 113vec128 t219 114 115 116vec128 abba0 117vec128 abba1 118vec128 abba2 119vec128 abba3 120vec128 abba4 121vec128 abba5 122vec128 abba6 123vec128 abba7 124vec128 abba8 125vec128 abba9 126vec128 abba10 127vec128 abba11 128vec128 abba12 129vec128 abba13 130vec128 abba14 131vec128 abba15 132vec128 abba16 133vec128 abba17 134vec128 abba18 135vec128 abba19 136 137vec128 et4t1t00 138vec128 et4t1t01 139vec128 et4t1t02 140vec128 et4t1t03 141vec128 et4t1t04 142vec128 et4t1t05 143vec128 et4t1t06 144vec128 et4t1t07 145vec128 et4t1t08 146vec128 et4t1t09 147vec128 et4t1t010 148vec128 et4t1t011 149vec128 et4t1t012 150vec128 et4t1t013 151vec128 et4t1t014 152vec128 et4t1t015 153vec128 et4t1t016 154vec128 et4t1t017 155vec128 et4t1t018 156vec128 et4t1t019 157 158vec128 aa_a24aadada0 159vec128 aa_a24aadada1 160vec128 aa_a24aadada2 161vec128 aa_a24aadada3 162vec128 aa_a24aadada4 163vec128 aa_a24aadada5 164vec128 aa_a24aadada6 165vec128 aa_a24aadada7 166vec128 aa_a24aadada8 167vec128 aa_a24aadada9 168vec128 aa_a24aadada10 169vec128 aa_a24aadada11 170vec128 aa_a24aadada12 171vec128 aa_a24aadada13 172vec128 aa_a24aadada14 173vec128 aa_a24aadada15 174vec128 aa_a24aadada16 175vec128 aa_a24aadada17 176vec128 aa_a24aadada18 177vec128 aa_a24aadada19 178 179vec128 bb_a24m1bbcb0 180vec128 bb_a24m1bbcb1 181vec128 bb_a24m1bbcb2 182vec128 bb_a24m1bbcb3 183vec128 bb_a24m1bbcb4 184vec128 bb_a24m1bbcb5 185vec128 bb_a24m1bbcb6 186vec128 bb_a24m1bbcb7 187vec128 bb_a24m1bbcb8 188vec128 bb_a24m1bbcb9 189vec128 bb_a24m1bbcb10 190vec128 bb_a24m1bbcb11 191vec128 bb_a24m1bbcb12 192vec128 bb_a24m1bbcb13 193vec128 bb_a24m1bbcb14 194vec128 bb_a24m1bbcb15 195vec128 bb_a24m1bbcb16 196vec128 bb_a24m1bbcb17 197vec128 bb_a24m1bbcb18 198vec128 bb_a24m1bbcb19 199 200 201vec128 2p2p2pcb0 202vec128 2p2p2pcb1 203vec128 2p2p2pcb2 204vec128 2p2p2pcb3 205vec128 2p2p2pcb4 206vec128 2p2p2pcb5 207vec128 2p2p2pcb6 208vec128 2p2p2pcb7 209vec128 2p2p2pcb8 210vec128 2p2p2pcb9 211vec128 2p2p2pcb10 212vec128 2p2p2pcb11 213vec128 2p2p2pcb12 214vec128 2p2p2pcb13 215vec128 2p2p2pcb14 216vec128 2p2p2pcb15 217vec128 2p2p2pcb16 218vec128 2p2p2pcb17 219vec128 2p2p2pcb18 220vec128 2p2p2pcb19 221 222vec128 vec19 223 224vec128 tmp0 225vec128 tmp1 226vec128 tmp2 227vec128 tmp3 228vec128 tmp4 229vec128 tmp5 230vec128 tmp6 231vec128 tmp7 232vec128 tmp8 233vec128 tmp9 234vec128 tmp10 235vec128 tmp11 236vec128 tmp12 237vec128 tmp13 238vec128 tmp14 239vec128 tmp15 240vec128 tmp16 241vec128 tmp17 242vec128 tmp18 243vec128 tmp19 244 245vec128 et0aat10 246vec128 et0aat11 247vec128 et0aat12 248vec128 et0aat13 249vec128 et0aat14 250vec128 et0aat15 251vec128 et0aat16 252vec128 et0aat17 253vec128 et0aat18 254vec128 et0aat19 255vec128 et0aat110 256vec128 et0aat111 257vec128 et0aat112 258vec128 et0aat113 259vec128 et0aat114 260vec128 et0aat115 261vec128 et0aat116 262vec128 et0aat117 263vec128 et0aat118 264vec128 et0aat119 265 266vec128 t4t0bbt10 267vec128 t4t0bbt11 268vec128 t4t0bbt12 269vec128 t4t0bbt13 270vec128 t4t0bbt14 271vec128 t4t0bbt15 272vec128 t4t0bbt16 273vec128 t4t0bbt17 274vec128 t4t0bbt18 275vec128 t4t0bbt19 276vec128 t4t0bbt110 277vec128 t4t0bbt111 278vec128 t4t0bbt112 279vec128 t4t0bbt113 280vec128 t4t0bbt114 281vec128 t4t0bbt115 282vec128 t4t0bbt116 283vec128 t4t0bbt117 284vec128 t4t0bbt118 285vec128 t4t0bbt119 286 287vec128 aacbbbda0 288vec128 aacbbbda1 289vec128 aacbbbda2 290vec128 aacbbbda3 291vec128 aacbbbda4 292vec128 aacbbbda5 293vec128 aacbbbda6 294vec128 aacbbbda7 295vec128 aacbbbda8 296vec128 aacbbbda9 297vec128 aacbbbda10 298vec128 aacbbbda11 299vec128 aacbbbda12 300vec128 aacbbbda13 301vec128 aacbbbda14 302vec128 aacbbbda15 303vec128 aacbbbda16 304vec128 aacbbbda17 305vec128 aacbbbda18 306vec128 aacbbbda19 307vec128 aacbbbda20 308vec128 aacbbbda21 309vec128 aacbbbda22 310vec128 aacbbbda23 311vec128 aacbbbda24 312vec128 aacbbbda25 313vec128 aacbbbda26 314vec128 aacbbbda27 315vec128 aacbbbda28 316vec128 aacbbbda29 317vec128 aacbbbda30 318vec128 aacbbbda31 319vec128 aacbbbda32 320vec128 aacbbbda33 321vec128 aacbbbda34 322vec128 aacbbbda35 323vec128 aacbbbda36 324vec128 aacbbbda37 325vec128 aacbbbda38 326vec128 aacbbbda39 327 328vec128 z4x5x4t20 329vec128 z4x5x4t21 330vec128 z4x5x4t22 331vec128 z4x5x4t23 332vec128 z4x5x4t24 333vec128 z4x5x4t25 334vec128 z4x5x4t26 335vec128 z4x5x4t27 336vec128 z4x5x4t28 337vec128 z4x5x4t29 338vec128 z4x5x4t210 339vec128 z4x5x4t211 340vec128 z4x5x4t212 341vec128 z4x5x4t213 342vec128 z4x5x4t214 343vec128 z4x5x4t215 344vec128 z4x5x4t216 345vec128 z4x5x4t217 346vec128 z4x5x4t218 347vec128 z4x5x4t219 348vec128 z4x5x4t220 349vec128 z4x5x4t221 350vec128 z4x5x4t222 351vec128 z4x5x4t223 352vec128 z4x5x4t224 353vec128 z4x5x4t225 354vec128 z4x5x4t226 355vec128 z4x5x4t227 356vec128 z4x5x4t228 357vec128 z4x5x4t229 358vec128 z4x5x4t230 359vec128 z4x5x4t231 360vec128 z4x5x4t232 361vec128 z4x5x4t233 362vec128 z4x5x4t234 363vec128 z4x5x4t235 364vec128 z4x5x4t236 365vec128 z4x5x4t237 366vec128 z4x5x4t238 367vec128 z4x5x4t239 368 369 370vec128 carry 371vec128 carry0 372vec128 carry1 373vec128 carry2 374vec128 carry3 375vec128 carry4 376vec128 carry5 377vec128 carry6 378vec128 carry7 379vec128 carry8 380vec128 carry9 381vec128 carry10 382vec128 carry11 383vec128 carry12 384vec128 carry13 385vec128 carry14 386vec128 carry15 387vec128 carry16 388vec128 carry17 389vec128 carry18 390vec128 carry19 391vec128 red 392vec128 red0 393vec128 red1 394vec128 red2 395vec128 red3 396vec128 red4 397 398vec128 comb13 399vec128 comb22 400vec128 comb31 401vec128 redcoeffmask 402vec128 redcoeffmaskend 403vec128 redcoeffmaskveryend 404vec128 shuf0_01 405vec128 shuf0_2 406vec128 shuf0_3 407vec128 shuf1_01 408vec128 shuf1_2 409vec128 shuf1_3 410vec128 shuf2_01 411vec128 shuf2_2 412vec128 shuf2_3 413vec128 shuf3_01 414vec128 shuf3_2 415vec128 shuf3_3 416vec128 selw0220 417vec128 selw0105 418vec128 selw2325 419vec128 selw0433 420vec128 selw261c0 421vec128 selw0342 422vec128 selw1362 423vec128 selw3333 424vec128 sel01 425vec128 sel12 426vec128 sel23 427vec128 sel30 428vec128 mask12 429vec128 mask13 430 431 432vec128 bit 433vec128 done 434vec128 extbit 435vec128 check 436vec128 prevextbit 437vec128 nprevextbit 438vec128 loopmask 439vec128 flip 440vec128 nflip 441vec128 zero 442vec128 one 443vec128 a24vec 444vec128 2pconsts0 445vec128 2pconsts 446vec128 2p_03 447vec128 2p_47 448vec128 2p_811 449vec128 2p_1215 450vec128 2p_1619 451vec128 swapendian 452 453vec128 tmp00 454vec128 tmp00b 455vec128 tmp01 456vec128 tmp01b 457vec128 tmp02 458vec128 tmp02b 459vec128 tmp03 460vec128 tmp03b 461vec128 tmp04 462vec128 tmp04n 463vec128 tmp04b 464vec128 tmp04bn 465vec128 tmp10a 466vec128 tmp10b 467vec128 tmp11a 468vec128 tmp11b 469vec128 tmp12a 470vec128 tmp12b 471vec128 tmp13a 472vec128 tmp13b 473vec128 tmp14n 474vec128 tmp14a 475vec128 tmp14an 476vec128 tmp14b 477vec128 tmp14bn 478vec128 tmp20 479vec128 tmp20a 480vec128 tmp20b 481vec128 tmp21 482vec128 tmp21a 483vec128 tmp21b 484vec128 tmp22 485vec128 tmp22a 486vec128 tmp22b 487vec128 tmp23 488vec128 tmp23a 489vec128 tmp23b 490vec128 tmp24 491vec128 tmp24n 492vec128 tmp24a 493vec128 tmp24an 494vec128 tmp24b 495vec128 tmp24bn 496vec128 tmp30 497vec128 tmp30a 498vec128 tmp30b 499vec128 tmp31 500vec128 tmp31a 501vec128 tmp31b 502vec128 tmp32 503vec128 tmp32a 504vec128 tmp32b 505vec128 tmp33 506vec128 tmp33a 507vec128 tmp33b 508vec128 tmp34 509vec128 tmp34n 510vec128 tmp34a 511vec128 tmp34an 512vec128 tmp34b 513vec128 tmp34bn 514 515 516vec128 call0 517vec128 call1 518vec128 call2 519vec128 call3 520vec128 call4 521vec128 call5 522vec128 call6 523vec128 call7 524vec128 call8 525vec128 call9 526vec128 call10 527vec128 call11 528vec128 call12 529vec128 call13 530vec128 call14 531vec128 call15 532vec128 call16 533vec128 call17 534vec128 call18 535vec128 call19 536vec128 call20 537vec128 call21 538vec128 call22 539vec128 call23 540vec128 call24 541vec128 call25 542vec128 call26 543vec128 call27 544vec128 call28 545vec128 call29 546vec128 call30 547vec128 call31 548vec128 call32 549vec128 call33 550vec128 call34 551vec128 call35 552vec128 call36 553vec128 call37 554vec128 call38 555vec128 call39 556vec128 call40 557vec128 call41 558vec128 call42 559vec128 call43 560vec128 call44 561vec128 call45 562vec128 call46 563vec128 call47 564 565caller call0 566caller call1 567caller call2 568caller call3 569caller call4 570caller call5 571caller call6 572caller call7 573caller call8 574caller call9 575caller call10 576caller call11 577caller call12 578caller call13 579caller call14 580caller call15 581caller call16 582caller call17 583caller call18 584caller call19 585caller call20 586caller call21 587caller call22 588caller call23 589caller call24 590caller call25 591caller call26 592caller call27 593caller call28 594caller call29 595caller call30 596caller call31 597caller call32 598caller call33 599caller call34 600caller call35 601caller call36 602caller call37 603caller call38 604caller call39 605caller call40 606caller call41 607caller call42 608caller call43 609caller call44 610caller call45 611caller call46 612caller call47 613 614stack128 call0_stack 615stack128 call1_stack 616stack128 call2_stack 617stack128 call3_stack 618stack128 call4_stack 619stack128 call5_stack 620stack128 call6_stack 621stack128 call7_stack 622stack128 call8_stack 623stack128 call9_stack 624stack128 call10_stack 625stack128 call11_stack 626stack128 call12_stack 627stack128 call13_stack 628stack128 call14_stack 629stack128 call15_stack 630stack128 call16_stack 631stack128 call17_stack 632stack128 call18_stack 633stack128 call19_stack 634stack128 call20_stack 635stack128 call21_stack 636stack128 call22_stack 637stack128 call23_stack 638stack128 call24_stack 639stack128 call25_stack 640stack128 call26_stack 641stack128 call27_stack 642stack128 call28_stack 643stack128 call29_stack 644stack128 call30_stack 645stack128 call31_stack 646stack128 call32_stack 647stack128 call33_stack 648stack128 call34_stack 649stack128 call35_stack 650stack128 call36_stack 651stack128 call37_stack 652stack128 call38_stack 653stack128 call39_stack 654stack128 call40_stack 655stack128 call41_stack 656stack128 call42_stack 657stack128 call43_stack 658stack128 call44_stack 659stack128 call45_stack 660stack128 call46_stack 661stack128 call47_stack 662 663stack128 prevextbit_stack 664 665vec128 try 666 667enter mladder 668 669call0_stack = call0 670call1_stack = call1 671call2_stack = call2 672call3_stack = call3 673call4_stack = call4 674call5_stack = call5 675call6_stack = call6 676call7_stack = call7 677call8_stack = call8 678call9_stack = call9 679call10_stack = call10 680call11_stack = call11 681call12_stack = call12 682call13_stack = call13 683call14_stack = call14 684call15_stack = call15 685call16_stack = call16 686call17_stack = call17 687call18_stack = call18 688call19_stack = call19 689call20_stack = call20 690call21_stack = call21 691call22_stack = call22 692call23_stack = call23 693call24_stack = call24 694call25_stack = call25 695call26_stack = call26 696call27_stack = call27 697call28_stack = call28 698call29_stack = call29 699call30_stack = call30 700call31_stack = call31 701call32_stack = call32 702call33_stack = call33 703call34_stack = call34 704call35_stack = call35 705call36_stack = call36 706call37_stack = call37 707call38_stack = call38 708call39_stack = call39 709call40_stack = call40 710call41_stack = call41 711call42_stack = call42 712call43_stack = call43 713call44_stack = call44 714call45_stack = call45 715call46_stack = call46 716call47_stack = call47 717 718int32323232 zero = 0 719one = extern(_one) 720uint32323232 loopmask = 1 721loopmask <<= (8 * 15) 722loopmask <<= (6 % 8) 723uint32323232 prevextbit = 0 724 725swapendian = extern(_swapendian) 726shuf0_01 = extern(_shuf0_01) 727shuf0_2 = extern(_shuf0_2) 728shuf0_3 = extern(_shuf0_3) 729shuf1_01 = extern(_shuf1_01) 730shuf1_2 = extern(_shuf1_2) 731shuf1_3 = extern(_shuf1_3) 732shuf2_01 = extern(_shuf2_01) 733shuf2_2 = extern(_shuf2_2) 734shuf2_3 = extern(_shuf2_3) 735shuf3_01 = extern(_shuf3_01) 736shuf3_2 = extern(_shuf3_2) 737shuf3_3 = extern(_shuf3_3) 738mask12 = extern(_mask12) 739mask13 = extern(_mask13) 740selw0220 = extern(_selw0220) 741selw0105 = extern(_selw0105) 742selw2325 = extern(_selw2325) 743selw0433 = extern (_selw0433) 744selw261c0 = extern(_selw261c0) 745selw0342 = extern(_selw0342) 746selw1362 = extern(_selw1362) 747selw3333 = extern(_selw3333) 748sel01 = extern(select01) 749sel12 = extern(select12) 750sel23 = extern(select23) 751sel30 = extern(select30) 7522pconsts0 = extern(_2pconsts0) 7532pconsts = extern(_2pconsts) 754redcoeffmask = extern(redCoeffMask) 755redcoeffmaskend = extern(redCoeffMaskEnd) 756redcoeffmaskveryend = extern(redCoeffMaskVeryEnd) 757 758comb13 = extern(combine13) 759comb22 = extern(combine22) 760comb31 = extern(combine31) 761comb31 = extern(combine31) 762 7632p_03 = extern(_2p_03) 7642p_47 = extern(_2p_47) 7652p_811 = extern(_2p_811) 7662p_1215 = extern(_2p_1215) 7672p_1619 = extern(_2p_1619) 768 769a24vec = extern(_a24vec) 770 771# Initialization 772int32323232 done = 0 773sk = *(vec128 *) ((skp + 16) & ~15) 774sk = select bytes from sk by swapendian 775 776x1_03 = *(vec128 *) ((xp + 0) & ~15) 777x1_47 = *(vec128 *) ((xp + 16) & ~15) 778x1_811 = *(vec128 *) ((xp + 32) & ~15) 779x1_1215 = *(vec128 *) ((xp + 48) & ~15) 780x1_1619 = *(vec128 *) ((xp + 64) & ~15) 781 782x2_03 = one 783x2_47 = zero 784x2_811 = zero 785x2_1215 = zero 786x2_1619 = zero 787 788z2_03 = zero 789z2_47 = zero 790z2_811 = zero 791z2_1215 = zero 792z2_1619 = zero 793 794x3_03 = x1_03 795x3_47 = x1_47 796x3_811 = x1_811 797x3_1215 = x1_1215 798x3_1619 = x1_1619 799 800z3_03 = one 801z3_47 = zero 802z3_811 = zero 803z3_1215 = zero 804z3_1619 = zero 805 806loop: 807 808bit = sk & loopmask 809uint32323232 extbit = 0 - (bit > 0) 810tmp0 = extbit <<< (8*4) 811extbit ^= tmp0 812tmp0 = extbit <<< (8*8) 813extbit ^= tmp0 814loopmask >>= (1 % 8) 815 816#################################################################################### 817####################### Conditionally swap P2 and P3 ########################### 818#################################################################################### 819 820flip = prevextbit ^ extbit 821nflip = ~(flip | zero) 822prevextbit = extbit 823 824tmp0 = x2_03 & nflip 825tmp1 = x3_03 & flip 826tmp2 = x2_03 & flip 827tmp3 = x3_03 & nflip 828x2_03 = tmp0 ^ tmp1 829x3_03 = tmp2 ^ tmp3 830 831tmp0 = x2_47 & nflip 832tmp1 = x3_47 & flip 833tmp2 = x2_47 & flip 834tmp3 = x3_47 & nflip 835x2_47 = tmp0 ^ tmp1 836x3_47 = tmp2 ^ tmp3 837 838tmp0 = x2_811 & nflip 839tmp1 = x3_811 & flip 840tmp2 = x2_811 & flip 841tmp3 = x3_811 & nflip 842x2_811 = tmp0 ^ tmp1 843x3_811 = tmp2 ^ tmp3 844 845tmp0 = x2_1215 & nflip 846tmp1 = x3_1215 & flip 847tmp2 = x2_1215 & flip 848tmp3 = x3_1215 & nflip 849x2_1215 = tmp0 ^ tmp1 850x3_1215 = tmp2 ^ tmp3 851 852tmp0 = x2_1619 & nflip 853tmp1 = x3_1619 & flip 854tmp2 = x2_1619 & flip 855tmp3 = x3_1619 & nflip 856x2_1619 = tmp0 ^ tmp1 857x3_1619 = tmp2 ^ tmp3 858 859tmp0 = z2_03 & nflip 860tmp1 = z3_03 & flip 861tmp2 = z2_03 & flip 862tmp3 = z3_03 & nflip 863z2_03 = tmp0 ^ tmp1 864z3_03 = tmp2 ^ tmp3 865 866tmp0 = z2_47 & nflip 867tmp1 = z3_47 & flip 868tmp2 = z2_47 & flip 869tmp3 = z3_47 & nflip 870z2_47 = tmp0 ^ tmp1 871z3_47 = tmp2 ^ tmp3 872 873tmp0 = z2_811 & nflip 874tmp1 = z3_811 & flip 875tmp2 = z2_811 & flip 876tmp3 = z3_811 & nflip 877z2_811 = tmp0 ^ tmp1 878z3_811 = tmp2 ^ tmp3 879 880tmp0 = z2_1215 & nflip 881tmp1 = z3_1215 & flip 882tmp2 = z2_1215 & flip 883tmp3 = z3_1215 & nflip 884z2_1215 = tmp0 ^ tmp1 885z3_1215 = tmp2 ^ tmp3 886 887tmp0 = z2_1619 & nflip 888tmp1 = z3_1619 & flip 889tmp2 = z2_1619 & flip 890tmp3 = z3_1619 & nflip 891z2_1619 = tmp0 ^ tmp1 892z3_1619 = tmp2 ^ tmp3 893 894 895################################################################################### 896######################### A = X2+Z2 ############################## 897######################### C = X3+Z3 ############################## 898######################### B = X2-Z2 ############################## 899######################### D = X3-Z3 ############################## 900################################################################################### 901 902int32323232 b_1619 = x2_1619 + 2p_1619 903int32323232 d_1619 = x3_1619 + 2p_1619 904int32323232 a_1619 = x2_1619 + z2_1619 905int32323232 c_1619 = x3_1619 + z3_1619 906int32323232 b_1619 -= z2_1619 907int32323232 d_1619 -= z3_1619 908 909 910int32323232 b_03 = x2_03 + 2p_03 911acbd16 = combine a_1619 and c_1619 by shuf0_01 912int32323232 d_03 = x3_03 + 2p_03 913acbd17 = combine a_1619 and c_1619 by shuf1_01 914int32323232 a_03 = x2_03 + z2_03 915acbd18 = combine a_1619 and c_1619 by shuf2_01 916int32323232 c_03 = x3_03 + z3_03 917acbd19 = combine a_1619 and c_1619 by shuf3_01 918int32323232 b_03 -= z2_03 919acbd16 = combine acbd16 and b_1619 by shuf0_2 920int32323232 d_03 -= z3_03 921acbd17 = combine acbd17 and b_1619 by shuf1_2 922 923int32323232 b_47 = x2_47 + 2p_47 924acbd18 = combine acbd18 and b_1619 by shuf2_2 925int32323232 d_47 = x3_47 + 2p_47 926acbd19 = combine acbd19 and b_1619 by shuf3_2 927int32323232 a_47 = x2_47 + z2_47 928acbd16 = combine acbd16 and d_1619 by shuf0_3 929int32323232 c_47 = x3_47 + z3_47 930acbd17 = combine acbd17 and d_1619 by shuf1_3 931int32323232 b_47 -= z2_47 932acbd18 = combine acbd18 and d_1619 by shuf2_3 933int32323232 d_47 -= z3_47 934acbd19 = combine acbd19 and d_1619 by shuf3_3 935 936int32323232 b_811 = x2_811 + 2p_811 937acbd0 = combine a_03 and c_03 by shuf0_01 938int32323232 d_811 = x3_811 + 2p_811 939acbd1 = combine a_03 and c_03 by shuf1_01 940int32323232 a_811 = x2_811 + z2_811 941acbd4 = combine a_47 and c_47 by shuf0_01 942int32323232 c_811 = x3_811 + z3_811 943acbd5 = combine a_47 and c_47 by shuf1_01 944int32323232 b_811 -= z2_811 945acbd0 = combine acbd0 and b_03 by shuf0_2 946int32323232 d_811 -= z3_811 947acbd1 = combine acbd1 and b_03 by shuf1_2 948 949int32323232 b_1215 = x2_1215 + 2p_1215 950acbd4 = combine acbd4 and b_47 by shuf0_2 951int32323232 d_1215 = x3_1215 + 2p_1215 952acbd5 = combine acbd5 and b_47 by shuf1_2 953int32323232 a_1215 = x2_1215 + z2_1215 954acbd0 = combine acbd0 and d_03 by shuf0_3 955int32323232 c_1215 = x3_1215 + z3_1215 956acbd1 = combine acbd1 and d_03 by shuf1_3 957int32323232 b_1215 -= z2_1215 958acbd4 = combine acbd4 and d_47 by shuf0_3 959int32323232 d_1215 -= z3_1215 960acbd5 = combine acbd5 and d_47 by shuf1_3 961 962uint32323232 carry = acbd19 >> 12 963acbd8 = combine a_811 and c_811 by shuf0_01 964 965acbd9 = combine a_811 and c_811 by shuf1_01 966acbd12 = combine a_1215 and c_1215 by shuf0_01 967 968acbd19 &= mask12 969acbd13 = combine a_1215 and c_1215 by shuf1_01 970 971uint32323232 carry = (carry & 0xffff) * 19 972acbd8 = combine acbd8 and b_811 by shuf0_2 973 974acbd9 = combine acbd9 and b_811 by shuf1_2 975acbd12 = combine acbd12 and b_1215 by shuf0_2 976acbd13 = combine acbd13 and b_1215 by shuf1_2 977acbd8 = combine acbd8 and d_811 by shuf0_3 978acbd9 = combine acbd9 and d_811 by shuf1_3 979acbd12 = combine acbd12 and d_1215 by shuf0_3 980 981int32323232 acbd0 += carry 982acbd13 = combine acbd13 and d_1215 by shuf1_3 983 984uint32323232 carry1 = acbd4 >> 13 985vec19 = extern(_vec19) 986uint32323232 carry0 = acbd0 >> 13 987lnop 988uint32323232 carry2 = acbd8 >> 13 989uint32323232 carry3 = acbd12 >> 13 990 991int32323232 acbd5 += carry1 992acbd2 = combine a_03 and c_03 by shuf2_01 993acbd4 &= mask13 994acbd6 = combine a_47 and c_47 by shuf2_01 995int32323232 acbd1 += carry0 996acbd10 = combine a_811 and c_811 by shuf2_01 997acbd0 &= mask13 998acbd14 = combine a_1215 and c_1215 by shuf2_01 999int32323232 acbd9 += carry2 1000acbd2 = combine acbd2 and b_03 by shuf2_2 1001acbd8 &= mask13 1002acbd6 = combine acbd6 and b_47 by shuf2_2 1003int32323232 acbd13 += carry3 1004acbd10 = combine acbd10 and b_811 by shuf2_2 1005acbd12 &= mask13 1006acbd14 = combine acbd14 and b_1215 by shuf2_2 1007 1008uint32323232 carry0 = acbd1 >> 13 1009acbd2 = combine acbd2 and d_03 by shuf2_3 1010uint32323232 carry1 = acbd5 >> 13 1011acbd6 = combine acbd6 and d_47 by shuf2_3 1012uint32323232 carry2 = acbd9 >> 13 1013acbd10 = combine acbd10 and d_811 by shuf2_3 1014uint32323232 carry3 = acbd13 >> 13 1015acbd14 = combine acbd14 and d_1215 by shuf2_3 1016 1017int32323232 acbd2 += carry0 1018acbd3 = combine a_03 and c_03 by shuf3_01 1019acbd1 &= mask13 1020acbd7 = combine a_47 and c_47 by shuf3_01 1021int32323232 acbd6 += carry1 1022acbd11 = combine a_811 and c_811 by shuf3_01 1023acbd5 &= mask13 1024acbd15 = combine a_1215 and c_1215 by shuf3_01 1025int32323232 acbd10 += carry2 1026acbd3 = combine acbd3 and b_03 by shuf3_2 1027acbd9 &= mask13 1028acbd7 = combine acbd7 and b_47 by shuf3_2 1029int32323232 acbd14 += carry3 1030acbd11 = combine acbd11 and b_811 by shuf3_2 1031acbd13 &= mask13 1032acbd15 = combine acbd15 and b_1215 by shuf3_2 1033 1034uint32323232 carry0 = acbd2 >> 13 1035acbd3 = combine acbd3 and d_03 by shuf3_3 1036uint32323232 carry1 = acbd6 >> 13 1037acbd7 = combine acbd7 and d_47 by shuf3_3 1038uint32323232 carry2 = acbd10 >> 13 1039acbd11 = combine acbd11 and d_811 by shuf3_3 1040uint32323232 carry3 = acbd14 >> 13 1041acbd15 = combine acbd15 and d_1215 by shuf3_3 1042 1043acbd2 &= mask13 1044acbd6 &= mask13 1045acbd10 &= mask13 1046acbd14 &= mask13 1047 1048int32323232 acbd3 += carry0 1049int32323232 acbd7 += carry1 1050int32323232 acbd11 += carry2 1051int32323232 acbd15 += carry3 1052 1053uint32323232 carry0 = acbd3 >> 12 1054uint32323232 carry1 = acbd7 >> 12 1055uint32323232 carry2 = acbd11 >> 12 1056uint32323232 carry3 = acbd15 >> 12 1057 1058acbd3 &= mask12 1059acbd7 &= mask12 1060acbd11 &= mask12 1061abba0 = select bytes from acbd0 by selw0220 1062acbd15 &= mask12 1063abba1 = select bytes from acbd1 by selw0220 1064 1065int32323232 acbd4 += carry0 1066abba2 = select bytes from acbd2 by selw0220 1067int32323232 acbd8 += carry1 1068abba3 = select bytes from acbd3 by selw0220 1069int32323232 acbd12 += carry2 1070int32323232 acbd16 += carry3 1071 1072uint32323232 carry1 = acbd4 >> 13 1073uint32323232 carry2 = acbd8 >> 13 1074uint32323232 carry3 = acbd12 >> 13 1075uint32323232 carry4 = acbd16 >> 13 1076 1077acbd4 &= mask13 1078acbd8 &= mask13 1079acbd12 &= mask13 1080abba4 = select bytes from acbd4 by selw0220 1081acbd16 &= mask13 1082abba8 = select bytes from acbd8 by selw0220 1083 1084int32323232 acbd5 += carry1 1085abba12 = select bytes from acbd12 by selw0220 1086int32323232 acbd9 += carry2 1087abba16 = select bytes from acbd16 by selw0220 1088int32323232 acbd13 += carry3 1089int32323232 acbd17 += carry4 1090 1091uint32323232 carry1 = acbd5 >> 13 1092uint32323232 carry2 = acbd9 >> 13 1093uint32323232 carry3 = acbd13 >> 13 1094uint32323232 carry4 = acbd17 >> 13 1095 1096acbd5 &= mask13 1097acbd9 &= mask13 1098acbd13 &= mask13 1099abba5 = select bytes from acbd5 by selw0220 1100acbd17 &= mask13 1101abba9 = select bytes from acbd9 by selw0220 1102 1103int32323232 acbd6 += carry1 1104abba13 = select bytes from acbd13 by selw0220 1105int32323232 acbd10 += carry2 1106abba17 = select bytes from acbd17 by selw0220 1107int32323232 acbd14 += carry3 1108int32323232 acbd18 += carry4 1109 1110uint32323232 carry1 = acbd6 >> 13 1111uint32323232 carry2 = acbd10 >> 13 1112uint32323232 carry3 = acbd14 >> 13 1113uint32323232 carry4 = acbd18 >> 13 1114 1115acbd6 &= mask13 1116acbd10 &= mask13 1117acbd14 &= mask13 1118abba6 = select bytes from acbd6 by selw0220 1119acbd18 &= mask13 1120abba10 = select bytes from acbd10 by selw0220 1121 1122int32323232 acbd7 += carry1 1123abba14 = select bytes from acbd14 by selw0220 1124int32323232 acbd11 += carry2 1125abba18 = select bytes from acbd18 by selw0220 1126int32323232 acbd15 += carry3 1127int32323232 acbd19 += carry4 1128 1129 1130 1131################################################################################### 1132######################### AA = A^2 ############################## 1133######################### CB = C*B ############################## 1134######################### BB = B^2 ############################## 1135######################### DA = D*A ############################## 1136################################################################################### 1137 1138int32323232 aacbbbda0 = (acbd0 & 0xffff) * (abba0 & 0xffff) 1139abba7 = select bytes from acbd7 by selw0220 1140int32323232 aacbbbda1 = (acbd0 & 0xffff) * (abba1 & 0xffff) 1141abba11 = select bytes from acbd11 by selw0220 1142int32323232 aacbbbda2 = (acbd0 & 0xffff) * (abba2 & 0xffff) 1143abba15 = select bytes from acbd15 by selw0220 1144int32323232 aacbbbda3 = (acbd0 & 0xffff) * (abba3 & 0xffff) 1145abba19 = select bytes from acbd19 by selw0220 1146int32323232 aacbbbda4 = (acbd1 & 0xffff) * (abba3 & 0xffff) 1147int32323232 aacbbbda5 = (acbd2 & 0xffff) * (abba3 & 0xffff) 1148int32323232 aacbbbda6 = (acbd3 & 0xffff) * (abba3 & 0xffff) 1149int32323232 aacbbbda7 = (acbd0 & 0xffff) * (abba7 & 0xffff) 1150int32323232 aacbbbda1 += (acbd1 & 0xffff) * (abba0 & 0xffff) 1151int32323232 aacbbbda2 += (acbd1 & 0xffff) * (abba1 & 0xffff) 1152int32323232 aacbbbda3 += (acbd1 & 0xffff) * (abba2 & 0xffff) 1153int32323232 aacbbbda4 += (acbd2 & 0xffff) * (abba2 & 0xffff) 1154int32323232 aacbbbda5 += (acbd3 & 0xffff) * (abba2 & 0xffff) 1155int32323232 aacbbbda6 <<= 1 1156int32323232 aacbbbda7 += (acbd1 & 0xffff) * (abba6 & 0xffff) 1157int32323232 aacbbbda8 = (acbd1 & 0xffff) * (abba7 & 0xffff) 1158int32323232 aacbbbda2 += (acbd2 & 0xffff) * (abba0 & 0xffff) 1159int32323232 aacbbbda3 += (acbd2 & 0xffff) * (abba1 & 0xffff) 1160int32323232 aacbbbda4 += (acbd3 & 0xffff) * (abba1 & 0xffff) 1161int32323232 aacbbbda5 <<= 1 1162int32323232 aacbbbda6 += (acbd0 & 0xffff) * (abba6 & 0xffff) 1163int32323232 aacbbbda7 += (acbd2 & 0xffff) * (abba5 & 0xffff) 1164int32323232 aacbbbda8 += (acbd2 & 0xffff) * (abba6 & 0xffff) 1165int32323232 aacbbbda9 = (acbd2 & 0xffff) * (abba7 & 0xffff) 1166int32323232 aacbbbda3 += (acbd3 & 0xffff) * (abba0 & 0xffff) 1167int32323232 aacbbbda4 <<= 1 1168int32323232 aacbbbda5 += (acbd0 & 0xffff) * (abba5 & 0xffff) 1169int32323232 aacbbbda6 += (acbd1 & 0xffff) * (abba5 & 0xffff) 1170int32323232 aacbbbda7 += (acbd3 & 0xffff) * (abba4 & 0xffff) 1171int32323232 aacbbbda8 += (acbd3 & 0xffff) * (abba5 & 0xffff) 1172int32323232 aacbbbda9 += (acbd3 & 0xffff) * (abba6 & 0xffff) 1173int32323232 aacbbbda10 = (acbd3 & 0xffff) * (abba7 & 0xffff) 1174int32323232 aacbbbda4 += (acbd0 & 0xffff) * (abba4 & 0xffff) 1175int32323232 aacbbbda5 += (acbd1 & 0xffff) * (abba4 & 0xffff) 1176int32323232 aacbbbda6 += (acbd2 & 0xffff) * (abba4 & 0xffff) 1177int32323232 aacbbbda7 += (acbd4 & 0xffff) * (abba3 & 0xffff) 1178int32323232 aacbbbda8 += (acbd5 & 0xffff) * (abba3 & 0xffff) 1179int32323232 aacbbbda9 += (acbd6 & 0xffff) * (abba3 & 0xffff) 1180int32323232 aacbbbda10 += (acbd7 & 0xffff) * (abba3 & 0xffff) 1181int32323232 aacbbbda11 = (acbd11 & 0xffff) * (abba0 & 0xffff) 1182int32323232 aacbbbda4 += (acbd4 & 0xffff) * (abba0 & 0xffff) 1183int32323232 aacbbbda5 += (acbd4 & 0xffff) * (abba1 & 0xffff) 1184int32323232 aacbbbda6 += (acbd4 & 0xffff) * (abba2 & 0xffff) 1185int32323232 aacbbbda7 += (acbd5 & 0xffff) * (abba2 & 0xffff) 1186int32323232 aacbbbda8 += (acbd6 & 0xffff) * (abba2 & 0xffff) 1187int32323232 aacbbbda9 += (acbd7 & 0xffff) * (abba2 & 0xffff) 1188int32323232 aacbbbda10 <<= 1 1189int32323232 aacbbbda11 += (acbd10 & 0xffff) * (abba1 & 0xffff) 1190int32323232 aacbbbda12 = (acbd1 & 0xffff) * (abba11 & 0xffff) 1191int32323232 aacbbbda5 += (acbd5 & 0xffff) * (abba0 & 0xffff) 1192int32323232 aacbbbda6 += (acbd5 & 0xffff) * (abba1 & 0xffff) 1193int32323232 aacbbbda7 += (acbd6 & 0xffff) * (abba1 & 0xffff) 1194int32323232 aacbbbda8 += (acbd7 & 0xffff) * (abba1 & 0xffff) 1195int32323232 aacbbbda9 <<= 1 1196int32323232 aacbbbda10 += (acbd0 & 0xffff) * (abba10 & 0xffff) 1197int32323232 aacbbbda11 += (acbd9 & 0xffff) * (abba2 & 0xffff) 1198int32323232 aacbbbda12 += (acbd2 & 0xffff) * (abba10 & 0xffff) 1199int32323232 aacbbbda6 += (acbd6 & 0xffff) * (abba0 & 0xffff) 1200int32323232 aacbbbda7 += (acbd7 & 0xffff) * (abba0 & 0xffff) 1201int32323232 aacbbbda8 <<= 1 1202int32323232 aacbbbda9 += (acbd0 & 0xffff) * (abba9 & 0xffff) 1203int32323232 aacbbbda10 += (acbd1 & 0xffff) * (abba9 & 0xffff) 1204int32323232 aacbbbda11 += (acbd8 & 0xffff) * (abba3 & 0xffff) 1205int32323232 aacbbbda12 += (acbd3 & 0xffff) * (abba9 & 0xffff) 1206int32323232 aacbbbda13 = (acbd2 & 0xffff) * (abba11 & 0xffff) 1207int32323232 aacbbbda14 = (acbd3 & 0xffff) * (abba11 & 0xffff) 1208int32323232 aacbbbda8 += (acbd0 & 0xffff) * (abba8 & 0xffff) 1209int32323232 aacbbbda9 += (acbd1 & 0xffff) * (abba8 & 0xffff) 1210int32323232 aacbbbda10 += (acbd2 & 0xffff) * (abba8 & 0xffff) 1211int32323232 aacbbbda11 += (acbd7 & 0xffff) * (abba4 & 0xffff) 1212int32323232 aacbbbda12 += (acbd5 & 0xffff) * (abba7 & 0xffff) 1213int32323232 aacbbbda13 += (acbd3 & 0xffff) * (abba10 & 0xffff) 1214int32323232 aacbbbda14 += (acbd7 & 0xffff) * (abba7 & 0xffff) 1215int32323232 aacbbbda8 += (acbd4 & 0xffff) * (abba4 & 0xffff) 1216int32323232 aacbbbda9 += (acbd4 & 0xffff) * (abba5 & 0xffff) 1217int32323232 aacbbbda10 += (acbd4 & 0xffff) * (abba6 & 0xffff) 1218int32323232 aacbbbda11 += (acbd6 & 0xffff) * (abba5 & 0xffff) 1219int32323232 aacbbbda12 += (acbd6 & 0xffff) * (abba6 & 0xffff) 1220int32323232 aacbbbda13 += (acbd6 & 0xffff) * (abba7 & 0xffff) 1221int32323232 aacbbbda14 += (acbd11 & 0xffff) * (abba3 & 0xffff) 1222int32323232 aacbbbda8 += (acbd8 & 0xffff) * (abba0 & 0xffff) 1223int32323232 aacbbbda9 += (acbd5 & 0xffff) * (abba4 & 0xffff) 1224int32323232 aacbbbda10 += (acbd5 & 0xffff) * (abba5 & 0xffff) 1225int32323232 aacbbbda11 += (acbd5 & 0xffff) * (abba6 & 0xffff) 1226int32323232 aacbbbda12 += (acbd7 & 0xffff) * (abba5 & 0xffff) 1227int32323232 aacbbbda13 += (acbd7 & 0xffff) * (abba6 & 0xffff) 1228int32323232 aacbbbda14 <<= 1 1229int32323232 aacbbbda15 = (acbd0 & 0xffff) * (abba15 & 0xffff) 1230int32323232 aacbbbda9 += (acbd8 & 0xffff) * (abba1 & 0xffff) 1231int32323232 aacbbbda10 += (acbd6 & 0xffff) * (abba4 & 0xffff) 1232int32323232 aacbbbda11 += (acbd4 & 0xffff) * (abba7 & 0xffff) 1233int32323232 aacbbbda12 += (acbd9 & 0xffff) * (abba3 & 0xffff) 1234int32323232 aacbbbda13 += (acbd10 & 0xffff) * (abba3 & 0xffff) 1235int32323232 aacbbbda14 += (acbd0 & 0xffff) * (abba14 & 0xffff) 1236int32323232 aacbbbda15 += (acbd1 & 0xffff) * (abba14 & 0xffff) 1237int32323232 aacbbbda9 += (acbd9 & 0xffff) * (abba0 & 0xffff) 1238int32323232 aacbbbda10 += (acbd8 & 0xffff) * (abba2 & 0xffff) 1239int32323232 aacbbbda11 += (acbd3 & 0xffff) * (abba8 & 0xffff) 1240int32323232 aacbbbda12 += (acbd10 & 0xffff) * (abba2 & 0xffff) 1241int32323232 aacbbbda13 += (acbd11 & 0xffff) * (abba2 & 0xffff) 1242int32323232 aacbbbda14 += (acbd1 & 0xffff) * (abba13 & 0xffff) 1243int32323232 aacbbbda15 += (acbd2 & 0xffff) * (abba13 & 0xffff) 1244int32323232 aacbbbda16 = (acbd1 & 0xffff) * (abba15 & 0xffff) 1245int32323232 aacbbbda10 += (acbd9 & 0xffff) * (abba1 & 0xffff) 1246int32323232 aacbbbda11 += (acbd2 & 0xffff) * (abba9 & 0xffff) 1247int32323232 aacbbbda12 += (acbd11 & 0xffff) * (abba1 & 0xffff) 1248int32323232 aacbbbda13 <<= 1 1249int32323232 aacbbbda14 += (acbd2 & 0xffff) * (abba12 & 0xffff) 1250int32323232 aacbbbda15 += (acbd3 & 0xffff) * (abba12 & 0xffff) 1251int32323232 aacbbbda16 += (acbd2 & 0xffff) * (abba14 & 0xffff) 1252int32323232 aacbbbda10 += (acbd10 & 0xffff) * (abba0 & 0xffff) 1253int32323232 aacbbbda11 += (acbd1 & 0xffff) * (abba10 & 0xffff) 1254int32323232 aacbbbda12 <<= 1 1255int32323232 aacbbbda13 += (acbd0 & 0xffff) * (abba13 & 0xffff) 1256int32323232 aacbbbda14 += (acbd4 & 0xffff) * (abba10 & 0xffff) 1257int32323232 aacbbbda15 += (acbd4 & 0xffff) * (abba11 & 0xffff) 1258int32323232 aacbbbda16 += (acbd3 & 0xffff) * (abba13 & 0xffff) 1259int32323232 aacbbbda17 = (acbd2 & 0xffff) * (abba15 & 0xffff) 1260int32323232 aacbbbda11 += (acbd0 & 0xffff) * (abba11 & 0xffff) 1261int32323232 aacbbbda12 += (acbd0 & 0xffff) * (abba12 & 0xffff) 1262int32323232 aacbbbda13 += (acbd1 & 0xffff) * (abba12 & 0xffff) 1263int32323232 aacbbbda14 += (acbd5 & 0xffff) * (abba9 & 0xffff) 1264int32323232 aacbbbda15 += (acbd5 & 0xffff) * (abba10 & 0xffff) 1265int32323232 aacbbbda16 += (acbd5 & 0xffff) * (abba11 & 0xffff) 1266int32323232 aacbbbda17 += (acbd3 & 0xffff) * (abba14 & 0xffff) 1267int32323232 aacbbbda18 = (acbd3 & 0xffff) * (abba15 & 0xffff) 1268int32323232 aacbbbda12 += (acbd4 & 0xffff) * (abba8 & 0xffff) 1269int32323232 aacbbbda13 += (acbd4 & 0xffff) * (abba9 & 0xffff) 1270int32323232 aacbbbda14 += (acbd6 & 0xffff) * (abba8 & 0xffff) 1271int32323232 aacbbbda15 += (acbd6 & 0xffff) * (abba9 & 0xffff) 1272int32323232 aacbbbda16 += (acbd6 & 0xffff) * (abba10 & 0xffff) 1273int32323232 aacbbbda17 += (acbd6 & 0xffff) * (abba11 & 0xffff) 1274int32323232 aacbbbda18 += (acbd7 & 0xffff) * (abba11 & 0xffff) 1275int32323232 aacbbbda12 += (acbd8 & 0xffff) * (abba4 & 0xffff) 1276int32323232 aacbbbda13 += (acbd5 & 0xffff) * (abba8 & 0xffff) 1277int32323232 aacbbbda14 += (acbd8 & 0xffff) * (abba6 & 0xffff) 1278int32323232 aacbbbda15 += (acbd7 & 0xffff) * (abba8 & 0xffff) 1279int32323232 aacbbbda16 += (acbd7 & 0xffff) * (abba9 & 0xffff) 1280int32323232 aacbbbda17 += (acbd7 & 0xffff) * (abba10 & 0xffff) 1281int32323232 aacbbbda18 += (acbd11 & 0xffff) * (abba7 & 0xffff) 1282int32323232 aacbbbda12 += (acbd12 & 0xffff) * (abba0 & 0xffff) 1283int32323232 aacbbbda13 += (acbd8 & 0xffff) * (abba5 & 0xffff) 1284int32323232 aacbbbda14 += (acbd9 & 0xffff) * (abba5 & 0xffff) 1285int32323232 aacbbbda15 += (acbd8 & 0xffff) * (abba7 & 0xffff) 1286int32323232 aacbbbda16 += (acbd9 & 0xffff) * (abba7 & 0xffff) 1287int32323232 aacbbbda17 += (acbd10 & 0xffff) * (abba7 & 0xffff) 1288int32323232 aacbbbda18 += (acbd15 & 0xffff) * (abba3 & 0xffff) 1289int32323232 aacbbbda19 = (acbd0 & 0xffff) * (abba19 & 0xffff) 1290int32323232 aacbbbda13 += (acbd9 & 0xffff) * (abba4 & 0xffff) 1291int32323232 aacbbbda14 += (acbd10 & 0xffff) * (abba4 & 0xffff) 1292int32323232 aacbbbda15 += (acbd9 & 0xffff) * (abba6 & 0xffff) 1293int32323232 aacbbbda16 += (acbd10 & 0xffff) * (abba6 & 0xffff) 1294int32323232 aacbbbda17 += (acbd11 & 0xffff) * (abba6 & 0xffff) 1295int32323232 aacbbbda18 <<= 1 1296int32323232 aacbbbda19 += (acbd1 & 0xffff) * (abba18 & 0xffff) 1297int32323232 aacbbbda13 += (acbd12 & 0xffff) * (abba1 & 0xffff) 1298int32323232 aacbbbda14 += (acbd12 & 0xffff) * (abba2 & 0xffff) 1299int32323232 aacbbbda15 += (acbd10 & 0xffff) * (abba5 & 0xffff) 1300int32323232 aacbbbda16 += (acbd11 & 0xffff) * (abba5 & 0xffff) 1301int32323232 aacbbbda17 += (acbd14 & 0xffff) * (abba3 & 0xffff) 1302int32323232 aacbbbda18 += (acbd0 & 0xffff) * (abba18 & 0xffff) 1303int32323232 aacbbbda19 += (acbd2 & 0xffff) * (abba17 & 0xffff) 1304int32323232 aacbbbda13 += (acbd13 & 0xffff) * (abba0 & 0xffff) 1305int32323232 aacbbbda14 += (acbd13 & 0xffff) * (abba1 & 0xffff) 1306int32323232 aacbbbda15 += (acbd11 & 0xffff) * (abba4 & 0xffff) 1307int32323232 aacbbbda16 += (acbd13 & 0xffff) * (abba3 & 0xffff) 1308int32323232 aacbbbda17 += (acbd15 & 0xffff) * (abba2 & 0xffff) 1309int32323232 aacbbbda18 += (acbd1 & 0xffff) * (abba17 & 0xffff) 1310int32323232 aacbbbda19 += (acbd3 & 0xffff) * (abba16 & 0xffff) 1311int32323232 aacbbbda20 = (acbd1 & 0xffff) * (abba19 & 0xffff) 1312int32323232 aacbbbda14 += (acbd14 & 0xffff) * (abba0 & 0xffff) 1313int32323232 aacbbbda15 += (acbd12 & 0xffff) * (abba3 & 0xffff) 1314int32323232 aacbbbda16 += (acbd14 & 0xffff) * (abba2 & 0xffff) 1315int32323232 aacbbbda17 <<= 1 1316int32323232 aacbbbda18 += (acbd2 & 0xffff) * (abba16 & 0xffff) 1317int32323232 aacbbbda19 += (acbd4 & 0xffff) * (abba15 & 0xffff) 1318int32323232 aacbbbda20 += (acbd2 & 0xffff) * (abba18 & 0xffff) 1319int32323232 aacbbbda21 = (acbd2 & 0xffff) * (abba19 & 0xffff) 1320int32323232 aacbbbda15 += (acbd13 & 0xffff) * (abba2 & 0xffff) 1321int32323232 aacbbbda16 += (acbd15 & 0xffff) * (abba1 & 0xffff) 1322int32323232 aacbbbda17 += (acbd0 & 0xffff) * (abba17 & 0xffff) 1323int32323232 aacbbbda18 += (acbd4 & 0xffff) * (abba14 & 0xffff) 1324int32323232 aacbbbda19 += (acbd5 & 0xffff) * (abba14 & 0xffff) 1325int32323232 aacbbbda20 += (acbd3 & 0xffff) * (abba17 & 0xffff) 1326int32323232 aacbbbda21 += (acbd3 & 0xffff) * (abba18 & 0xffff) 1327int32323232 aacbbbda15 += (acbd14 & 0xffff) * (abba1 & 0xffff) 1328int32323232 aacbbbda16 <<= 1 1329int32323232 aacbbbda17 += (acbd1 & 0xffff) * (abba16 & 0xffff) 1330int32323232 aacbbbda18 += (acbd5 & 0xffff) * (abba13 & 0xffff) 1331int32323232 aacbbbda19 += (acbd6 & 0xffff) * (abba13 & 0xffff) 1332int32323232 aacbbbda20 += (acbd5 & 0xffff) * (abba15 & 0xffff) 1333int32323232 aacbbbda21 += (acbd6 & 0xffff) * (abba15 & 0xffff) 1334int32323232 aacbbbda15 += (acbd15 & 0xffff) * (abba0 & 0xffff) 1335int32323232 aacbbbda16 += (acbd0 & 0xffff) * (abba16 & 0xffff) 1336int32323232 aacbbbda17 += (acbd4 & 0xffff) * (abba13 & 0xffff) 1337int32323232 aacbbbda18 += (acbd6 & 0xffff) * (abba12 & 0xffff) 1338int32323232 aacbbbda19 += (acbd7 & 0xffff) * (abba12 & 0xffff) 1339int32323232 aacbbbda20 += (acbd6 & 0xffff) * (abba14 & 0xffff) 1340int32323232 aacbbbda21 += (acbd7 & 0xffff) * (abba14 & 0xffff) 1341int32323232 aacbbbda22 = (acbd3 & 0xffff) * (abba19 & 0xffff) 1342int32323232 aacbbbda16 += (acbd4 & 0xffff) * (abba12 & 0xffff) 1343int32323232 aacbbbda17 += (acbd5 & 0xffff) * (abba12 & 0xffff) 1344int32323232 aacbbbda18 += (acbd8 & 0xffff) * (abba10 & 0xffff) 1345int32323232 aacbbbda19 += (acbd8 & 0xffff) * (abba11 & 0xffff) 1346int32323232 aacbbbda20 += (acbd7 & 0xffff) * (abba13 & 0xffff) 1347int32323232 aacbbbda21 += (acbd10 & 0xffff) * (abba11 & 0xffff) 1348int32323232 aacbbbda22 += (acbd7 & 0xffff) * (abba15 & 0xffff) 1349int32323232 aacbbbda16 += (acbd8 & 0xffff) * (abba8 & 0xffff) 1350int32323232 aacbbbda17 += (acbd8 & 0xffff) * (abba9 & 0xffff) 1351int32323232 aacbbbda18 += (acbd9 & 0xffff) * (abba9 & 0xffff) 1352int32323232 aacbbbda19 += (acbd9 & 0xffff) * (abba10 & 0xffff) 1353int32323232 aacbbbda20 += (acbd9 & 0xffff) * (abba11 & 0xffff) 1354int32323232 aacbbbda21 += (acbd11 & 0xffff) * (abba10 & 0xffff) 1355int32323232 aacbbbda22 += (acbd11 & 0xffff) * (abba11 & 0xffff) 1356int32323232 aacbbbda16 += (acbd12 & 0xffff) * (abba4 & 0xffff) 1357int32323232 aacbbbda17 += (acbd9 & 0xffff) * (abba8 & 0xffff) 1358int32323232 aacbbbda18 += (acbd10 & 0xffff) * (abba8 & 0xffff) 1359int32323232 aacbbbda19 += (acbd10 & 0xffff) * (abba9 & 0xffff) 1360int32323232 aacbbbda20 += (acbd10 & 0xffff) * (abba10 & 0xffff) 1361int32323232 aacbbbda21 += (acbd14 & 0xffff) * (abba7 & 0xffff) 1362int32323232 aacbbbda22 += (acbd15 & 0xffff) * (abba7 & 0xffff) 1363int32323232 aacbbbda16 += (acbd16 & 0xffff) * (abba0 & 0xffff) 1364int32323232 aacbbbda17 += (acbd12 & 0xffff) * (abba5 & 0xffff) 1365int32323232 aacbbbda18 += (acbd12 & 0xffff) * (abba6 & 0xffff) 1366int32323232 aacbbbda19 += (acbd11 & 0xffff) * (abba8 & 0xffff) 1367int32323232 aacbbbda20 += (acbd11 & 0xffff) * (abba9 & 0xffff) 1368int32323232 aacbbbda21 += (acbd15 & 0xffff) * (abba6 & 0xffff) 1369int32323232 aacbbbda22 += (acbd19 & 0xffff) * (abba3 & 0xffff) 1370int32323232 aacbbbda23 = (acbd4 & 0xffff) * (abba19 & 0xffff) 1371int32323232 aacbbbda17 += (acbd13 & 0xffff) * (abba4 & 0xffff) 1372int32323232 aacbbbda18 += (acbd13 & 0xffff) * (abba5 & 0xffff) 1373int32323232 aacbbbda19 += (acbd12 & 0xffff) * (abba7 & 0xffff) 1374int32323232 aacbbbda20 += (acbd13 & 0xffff) * (abba7 & 0xffff) 1375int32323232 aacbbbda21 += (acbd18 & 0xffff) * (abba3 & 0xffff) 1376int32323232 aacbbbda22 <<= 1 1377int32323232 aacbbbda23 += (acbd5 & 0xffff) * (abba18 & 0xffff) 1378int32323232 aacbbbda17 += (acbd16 & 0xffff) * (abba1 & 0xffff) 1379int32323232 aacbbbda18 += (acbd14 & 0xffff) * (abba4 & 0xffff) 1380int32323232 aacbbbda19 += (acbd13 & 0xffff) * (abba6 & 0xffff) 1381int32323232 aacbbbda20 += (acbd14 & 0xffff) * (abba6 & 0xffff) 1382int32323232 aacbbbda21 += (acbd19 & 0xffff) * (abba2 & 0xffff) 1383int32323232 aacbbbda22 += (acbd4 & 0xffff) * (abba18 & 0xffff) 1384int32323232 aacbbbda23 += (acbd6 & 0xffff) * (abba17 & 0xffff) 1385int32323232 aacbbbda17 += (acbd17 & 0xffff) * (abba0 & 0xffff) 1386int32323232 aacbbbda18 += (acbd16 & 0xffff) * (abba2 & 0xffff) 1387int32323232 aacbbbda19 += (acbd14 & 0xffff) * (abba5 & 0xffff) 1388int32323232 aacbbbda20 += (acbd15 & 0xffff) * (abba5 & 0xffff) 1389int32323232 aacbbbda21 <<= 1 1390int32323232 aacbbbda22 += (acbd5 & 0xffff) * (abba17 & 0xffff) 1391int32323232 aacbbbda23 += (acbd7 & 0xffff) * (abba16 & 0xffff) 1392int32323232 aacbbbda24 = (acbd5 & 0xffff) * (abba19 & 0xffff) 1393int32323232 aacbbbda18 += (acbd17 & 0xffff) * (abba1 & 0xffff) 1394int32323232 aacbbbda19 += (acbd15 & 0xffff) * (abba4 & 0xffff) 1395int32323232 aacbbbda20 += (acbd17 & 0xffff) * (abba3 & 0xffff) 1396int32323232 aacbbbda21 += (acbd4 & 0xffff) * (abba17 & 0xffff) 1397int32323232 aacbbbda22 += (acbd6 & 0xffff) * (abba16 & 0xffff) 1398int32323232 aacbbbda23 += (acbd8 & 0xffff) * (abba15 & 0xffff) 1399int32323232 aacbbbda24 += (acbd6 & 0xffff) * (abba18 & 0xffff) 1400int32323232 aacbbbda18 += (acbd18 & 0xffff) * (abba0 & 0xffff) 1401int32323232 aacbbbda19 += (acbd16 & 0xffff) * (abba3 & 0xffff) 1402int32323232 aacbbbda20 += (acbd18 & 0xffff) * (abba2 & 0xffff) 1403int32323232 aacbbbda21 += (acbd5 & 0xffff) * (abba16 & 0xffff) 1404int32323232 aacbbbda22 += (acbd8 & 0xffff) * (abba14 & 0xffff) 1405int32323232 aacbbbda23 += (acbd9 & 0xffff) * (abba14 & 0xffff) 1406int32323232 aacbbbda24 += (acbd7 & 0xffff) * (abba17 & 0xffff) 1407int32323232 aacbbbda25 = (acbd6 & 0xffff) * (abba19 & 0xffff) 1408int32323232 aacbbbda19 += (acbd17 & 0xffff) * (abba2 & 0xffff) 1409int32323232 aacbbbda20 += (acbd19 & 0xffff) * (abba1 & 0xffff) 1410int32323232 aacbbbda21 += (acbd8 & 0xffff) * (abba13 & 0xffff) 1411int32323232 aacbbbda22 += (acbd9 & 0xffff) * (abba13 & 0xffff) 1412int32323232 aacbbbda23 += (acbd10 & 0xffff) * (abba13 & 0xffff) 1413int32323232 aacbbbda24 += (acbd9 & 0xffff) * (abba15 & 0xffff) 1414int32323232 aacbbbda25 += (acbd7 & 0xffff) * (abba18 & 0xffff) 1415int32323232 aacbbbda19 += (acbd18 & 0xffff) * (abba1 & 0xffff) 1416int32323232 aacbbbda20 <<= 1 1417int32323232 aacbbbda21 += (acbd9 & 0xffff) * (abba12 & 0xffff) 1418int32323232 aacbbbda22 += (acbd10 & 0xffff) * (abba12 & 0xffff) 1419int32323232 aacbbbda23 += (acbd11 & 0xffff) * (abba12 & 0xffff) 1420int32323232 aacbbbda24 += (acbd10 & 0xffff) * (abba14 & 0xffff) 1421int32323232 aacbbbda25 += (acbd10 & 0xffff) * (abba15 & 0xffff) 1422int32323232 aacbbbda19 += (acbd19 & 0xffff) * (abba0 & 0xffff) 1423int32323232 aacbbbda20 += (acbd4 & 0xffff) * (abba16 & 0xffff) 1424int32323232 aacbbbda21 += (acbd12 & 0xffff) * (abba9 & 0xffff) 1425int32323232 aacbbbda22 += (acbd12 & 0xffff) * (abba10 & 0xffff) 1426int32323232 aacbbbda23 += (acbd12 & 0xffff) * (abba11 & 0xffff) 1427int32323232 aacbbbda24 += (acbd11 & 0xffff) * (abba13 & 0xffff) 1428int32323232 aacbbbda25 += (acbd11 & 0xffff) * (abba14 & 0xffff) 1429int32323232 aacbbbda26 = (acbd7 & 0xffff) * (abba19 & 0xffff) 1430int32323232 aacbbbda20 += (acbd8 & 0xffff) * (abba12 & 0xffff) 1431int32323232 aacbbbda21 += (acbd13 & 0xffff) * (abba8 & 0xffff) 1432int32323232 aacbbbda22 += (acbd13 & 0xffff) * (abba9 & 0xffff) 1433int32323232 aacbbbda23 += (acbd13 & 0xffff) * (abba10 & 0xffff) 1434int32323232 aacbbbda24 += (acbd13 & 0xffff) * (abba11 & 0xffff) 1435int32323232 aacbbbda25 += (acbd14 & 0xffff) * (abba11 & 0xffff) 1436int32323232 aacbbbda26 += (acbd11 & 0xffff) * (abba15 & 0xffff) 1437int32323232 aacbbbda20 += (acbd12 & 0xffff) * (abba8 & 0xffff) 1438int32323232 aacbbbda21 += (acbd16 & 0xffff) * (abba5 & 0xffff) 1439int32323232 aacbbbda22 += (acbd14 & 0xffff) * (abba8 & 0xffff) 1440int32323232 aacbbbda23 += (acbd14 & 0xffff) * (abba9 & 0xffff) 1441int32323232 aacbbbda24 += (acbd14 & 0xffff) * (abba10 & 0xffff) 1442int32323232 aacbbbda25 += (acbd15 & 0xffff) * (abba10 & 0xffff) 1443int32323232 aacbbbda26 += (acbd15 & 0xffff) * (abba11 & 0xffff) 1444int32323232 aacbbbda20 += (acbd16 & 0xffff) * (abba4 & 0xffff) 1445int32323232 aacbbbda21 += (acbd17 & 0xffff) * (abba4 & 0xffff) 1446int32323232 aacbbbda22 += (acbd16 & 0xffff) * (abba6 & 0xffff) 1447int32323232 aacbbbda23 += (acbd15 & 0xffff) * (abba8 & 0xffff) 1448int32323232 aacbbbda24 += (acbd15 & 0xffff) * (abba9 & 0xffff) 1449int32323232 aacbbbda25 += (acbd18 & 0xffff) * (abba7 & 0xffff) 1450int32323232 aacbbbda26 += (acbd19 & 0xffff) * (abba7 & 0xffff) 1451int32323232 aacbbbda27 = (acbd8 & 0xffff) * (abba19 & 0xffff) 1452int32323232 aacbbbda28 = (acbd9 & 0xffff) * (abba19 & 0xffff) 1453int32323232 aacbbbda22 += (acbd17 & 0xffff) * (abba5 & 0xffff) 1454int32323232 aacbbbda23 += (acbd16 & 0xffff) * (abba7 & 0xffff) 1455int32323232 aacbbbda24 += (acbd17 & 0xffff) * (abba7 & 0xffff) 1456int32323232 aacbbbda25 += (acbd19 & 0xffff) * (abba6 & 0xffff) 1457int32323232 aacbbbda26 <<= 1 1458int32323232 aacbbbda27 += (acbd9 & 0xffff) * (abba18 & 0xffff) 1459int32323232 aacbbbda28 += (acbd10 & 0xffff) * (abba18 & 0xffff) 1460int32323232 aacbbbda22 += (acbd18 & 0xffff) * (abba4 & 0xffff) 1461int32323232 aacbbbda23 += (acbd17 & 0xffff) * (abba6 & 0xffff) 1462int32323232 aacbbbda24 += (acbd18 & 0xffff) * (abba6 & 0xffff) 1463int32323232 aacbbbda25 <<= 1 1464int32323232 aacbbbda26 += (acbd8 & 0xffff) * (abba18 & 0xffff) 1465int32323232 aacbbbda27 += (acbd10 & 0xffff) * (abba17 & 0xffff) 1466int32323232 aacbbbda28 += (acbd11 & 0xffff) * (abba17 & 0xffff) 1467int32323232 aacbbbda29 = (acbd10 & 0xffff) * (abba19 & 0xffff) 1468int32323232 aacbbbda23 += (acbd18 & 0xffff) * (abba5 & 0xffff) 1469int32323232 aacbbbda24 += (acbd19 & 0xffff) * (abba5 & 0xffff) 1470int32323232 aacbbbda25 += (acbd8 & 0xffff) * (abba17 & 0xffff) 1471int32323232 aacbbbda26 += (acbd9 & 0xffff) * (abba17 & 0xffff) 1472int32323232 aacbbbda27 += (acbd11 & 0xffff) * (abba16 & 0xffff) 1473int32323232 aacbbbda28 += (acbd13 & 0xffff) * (abba15 & 0xffff) 1474int32323232 aacbbbda29 += (acbd11 & 0xffff) * (abba18 & 0xffff) 1475int32323232 aacbbbda23 += (acbd19 & 0xffff) * (abba4 & 0xffff) 1476int32323232 aacbbbda24 <<= 1 1477int32323232 aacbbbda25 += (acbd9 & 0xffff) * (abba16 & 0xffff) 1478int32323232 aacbbbda26 += (acbd10 & 0xffff) * (abba16 & 0xffff) 1479int32323232 aacbbbda27 += (acbd12 & 0xffff) * (abba15 & 0xffff) 1480int32323232 aacbbbda28 += (acbd14 & 0xffff) * (abba14 & 0xffff) 1481int32323232 aacbbbda29 += (acbd14 & 0xffff) * (abba15 & 0xffff) 1482int32323232 aacbbbda30 = (acbd11 & 0xffff) * (abba19 & 0xffff) 1483int32323232 aacbbbda24 += (acbd8 & 0xffff) * (abba16 & 0xffff) 1484int32323232 aacbbbda25 += (acbd12 & 0xffff) * (abba13 & 0xffff) 1485int32323232 aacbbbda26 += (acbd12 & 0xffff) * (abba14 & 0xffff) 1486int32323232 aacbbbda27 += (acbd13 & 0xffff) * (abba14 & 0xffff) 1487int32323232 aacbbbda28 += (acbd15 & 0xffff) * (abba13 & 0xffff) 1488int32323232 aacbbbda29 += (acbd15 & 0xffff) * (abba14 & 0xffff) 1489int32323232 aacbbbda30 += (acbd15 & 0xffff) * (abba15 & 0xffff) 1490int32323232 aacbbbda24 += (acbd12 & 0xffff) * (abba12 & 0xffff) 1491int32323232 aacbbbda25 += (acbd13 & 0xffff) * (abba12 & 0xffff) 1492int32323232 aacbbbda26 += (acbd13 & 0xffff) * (abba13 & 0xffff) 1493int32323232 aacbbbda27 += (acbd14 & 0xffff) * (abba13 & 0xffff) 1494int32323232 aacbbbda28 += (acbd17 & 0xffff) * (abba11 & 0xffff) 1495int32323232 aacbbbda29 += (acbd18 & 0xffff) * (abba11 & 0xffff) 1496int32323232 aacbbbda30 += (acbd19 & 0xffff) * (abba11 & 0xffff) 1497int32323232 aacbbbda24 += (acbd16 & 0xffff) * (abba8 & 0xffff) 1498int32323232 aacbbbda25 += (acbd16 & 0xffff) * (abba9 & 0xffff) 1499int32323232 aacbbbda26 += (acbd14 & 0xffff) * (abba12 & 0xffff) 1500int32323232 aacbbbda27 += (acbd15 & 0xffff) * (abba12 & 0xffff) 1501int32323232 aacbbbda28 += (acbd18 & 0xffff) * (abba10 & 0xffff) 1502int32323232 aacbbbda29 += (acbd19 & 0xffff) * (abba10 & 0xffff) 1503int32323232 aacbbbda30 <<= 1 1504int32323232 aacbbbda31 = (acbd12 & 0xffff) * (abba19 & 0xffff) 1505int32323232 aacbbbda25 += (acbd17 & 0xffff) * (abba8 & 0xffff) 1506int32323232 aacbbbda26 += (acbd16 & 0xffff) * (abba10 & 0xffff) 1507int32323232 aacbbbda27 += (acbd16 & 0xffff) * (abba11 & 0xffff) 1508int32323232 aacbbbda28 += (acbd19 & 0xffff) * (abba9 & 0xffff) 1509int32323232 aacbbbda29 <<= 1 1510int32323232 aacbbbda30 += (acbd12 & 0xffff) * (abba18 & 0xffff) 1511int32323232 aacbbbda31 += (acbd13 & 0xffff) * (abba18 & 0xffff) 1512int32323232 aacbbbda32 = (acbd13 & 0xffff) * (abba19 & 0xffff) 1513int32323232 aacbbbda26 += (acbd17 & 0xffff) * (abba9 & 0xffff) 1514int32323232 aacbbbda27 += (acbd17 & 0xffff) * (abba10 & 0xffff) 1515int32323232 aacbbbda28 <<= 1 1516int32323232 aacbbbda29 += (acbd12 & 0xffff) * (abba17 & 0xffff) 1517int32323232 aacbbbda30 += (acbd13 & 0xffff) * (abba17 & 0xffff) 1518int32323232 aacbbbda31 += (acbd14 & 0xffff) * (abba17 & 0xffff) 1519int32323232 aacbbbda32 += (acbd14 & 0xffff) * (abba18 & 0xffff) 1520int32323232 aacbbbda26 += (acbd18 & 0xffff) * (abba8 & 0xffff) 1521int32323232 aacbbbda27 += (acbd18 & 0xffff) * (abba9 & 0xffff) 1522int32323232 aacbbbda28 += (acbd12 & 0xffff) * (abba16 & 0xffff) 1523int32323232 aacbbbda29 += (acbd13 & 0xffff) * (abba16 & 0xffff) 1524int32323232 aacbbbda30 += (acbd14 & 0xffff) * (abba16 & 0xffff) 1525int32323232 aacbbbda31 += (acbd15 & 0xffff) * (abba16 & 0xffff) 1526int32323232 aacbbbda32 += (acbd15 & 0xffff) * (abba17 & 0xffff) 1527int32323232 aacbbbda33 = (acbd14 & 0xffff) * (abba19 & 0xffff) 1528int32323232 aacbbbda27 += (acbd19 & 0xffff) * (abba8 & 0xffff) 1529int32323232 aacbbbda28 += (acbd16 & 0xffff) * (abba12 & 0xffff) 1530int32323232 aacbbbda29 += (acbd16 & 0xffff) * (abba13 & 0xffff) 1531int32323232 aacbbbda30 += (acbd16 & 0xffff) * (abba14 & 0xffff) 1532int32323232 aacbbbda31 += (acbd16 & 0xffff) * (abba15 & 0xffff) 1533int32323232 aacbbbda32 += (acbd17 & 0xffff) * (abba15 & 0xffff) 1534int32323232 aacbbbda33 += (acbd15 & 0xffff) * (abba18 & 0xffff) 1535int32323232 aacbbbda34 = (acbd15 & 0xffff) * (abba19 & 0xffff) 1536int32323232 aacbbbda35 = (acbd16 & 0xffff) * (abba19 & 0xffff) 1537int32323232 aacbbbda29 += (acbd17 & 0xffff) * (abba12 & 0xffff) 1538int32323232 aacbbbda30 += (acbd17 & 0xffff) * (abba13 & 0xffff) 1539int32323232 aacbbbda31 += (acbd17 & 0xffff) * (abba14 & 0xffff) 1540int32323232 aacbbbda32 += (acbd18 & 0xffff) * (abba14 & 0xffff) 1541int32323232 aacbbbda33 += (acbd18 & 0xffff) * (abba15 & 0xffff) 1542int32323232 aacbbbda34 += (acbd19 & 0xffff) * (abba15 & 0xffff) 1543int32323232 aacbbbda35 += (acbd17 & 0xffff) * (abba18 & 0xffff) 1544int32323232 aacbbbda36 = (acbd17 & 0xffff) * (abba19 & 0xffff) 1545int32323232 aacbbbda30 += (acbd18 & 0xffff) * (abba12 & 0xffff) 1546int32323232 aacbbbda31 += (acbd18 & 0xffff) * (abba13 & 0xffff) 1547int32323232 aacbbbda32 += (acbd19 & 0xffff) * (abba13 & 0xffff) 1548int32323232 aacbbbda33 += (acbd19 & 0xffff) * (abba14 & 0xffff) 1549int32323232 aacbbbda34 <<= 1 1550int32323232 aacbbbda35 += (acbd18 & 0xffff) * (abba17 & 0xffff) 1551int32323232 aacbbbda36 += (acbd18 & 0xffff) * (abba18 & 0xffff) 1552int32323232 aacbbbda37 = (acbd18 & 0xffff) * (abba19 & 0xffff) 1553int32323232 aacbbbda31 += (acbd19 & 0xffff) * (abba12 & 0xffff) 1554int32323232 aacbbbda32 <<= 1 1555int32323232 aacbbbda33 <<= 1 1556int32323232 aacbbbda34 += (acbd16 & 0xffff) * (abba18 & 0xffff) 1557int32323232 aacbbbda35 += (acbd19 & 0xffff) * (abba16 & 0xffff) 1558int32323232 aacbbbda36 += (acbd19 & 0xffff) * (abba17 & 0xffff) 1559int32323232 aacbbbda37 += (acbd19 & 0xffff) * (abba18 & 0xffff) 1560int32323232 aacbbbda38 = (acbd19 & 0xffff) * (abba19 & 0xffff) 1561int32323232 aacbbbda32 += (acbd16 & 0xffff) * (abba16 & 0xffff) 1562int32323232 aacbbbda33 += (acbd16 & 0xffff) * (abba17 & 0xffff) 1563int32323232 aacbbbda34 += (acbd17 & 0xffff) * (abba17 & 0xffff) 1564int32323232 aacbbbda36 <<= 1 1565int32323232 aacbbbda37 <<= 1 1566int32323232 aacbbbda38 <<= 1 1567int32323232 aacbbbda33 += (acbd17 & 0xffff) * (abba16 & 0xffff) 1568int32323232 aacbbbda34 += (acbd18 & 0xffff) * (abba16 & 0xffff) 1569 1570## Reduction 1571 1572uint32323232 carry0 = aacbbbda20 >> 13 1573uint32323232 carry1 = aacbbbda24 >> 13 1574uint32323232 carry2 = aacbbbda28 >> 13 1575uint32323232 carry3 = aacbbbda32 >> 13 1576 1577int32323232 aacbbbda21 += carry0 1578aacbbbda20 &= mask13 1579int32323232 aacbbbda25 += carry1 1580aacbbbda24 &= mask13 1581int32323232 aacbbbda29 += carry2 1582aacbbbda28 &= mask13 1583int32323232 aacbbbda33 += carry3 1584aacbbbda32 &= mask13 1585 1586uint32323232 carry0 = aacbbbda21 >> 13 1587uint32323232 carry1 = aacbbbda25 >> 13 1588uint32323232 carry2 = aacbbbda29 >> 13 1589uint32323232 carry3 = aacbbbda33 >> 13 1590 1591int32323232 aacbbbda22 += carry0 1592aacbbbda21 &= mask13 1593int32323232 aacbbbda26 += carry1 1594aacbbbda25 &= mask13 1595int32323232 aacbbbda30 += carry2 1596aacbbbda29 &= mask13 1597int32323232 aacbbbda34 += carry3 1598aacbbbda33 &= mask13 1599 1600uint32323232 carry0 = aacbbbda22 >> 13 1601uint32323232 carry1 = aacbbbda26 >> 13 1602uint32323232 carry2 = aacbbbda30 >> 13 1603uint32323232 carry3 = aacbbbda34 >> 13 1604 1605int32323232 aacbbbda23 += carry0 1606aacbbbda22 &= mask13 1607int32323232 aacbbbda27 += carry1 1608aacbbbda26 &= mask13 1609int32323232 aacbbbda31 += carry2 1610aacbbbda30 &= mask13 1611int32323232 aacbbbda35 += carry3 1612aacbbbda34 &= mask13 1613 1614uint32323232 carry0 = aacbbbda23 >> 12 1615uint32323232 carry1 = aacbbbda27 >> 12 1616uint32323232 carry2 = aacbbbda31 >> 12 1617uint32323232 carry3 = aacbbbda35 >> 12 1618 1619int32323232 aacbbbda24 += carry0 1620aacbbbda23 &= mask12 1621int32323232 aacbbbda28 += carry1 1622aacbbbda27 &= mask12 1623int32323232 aacbbbda32 += carry2 1624aacbbbda31 &= mask12 1625int32323232 aacbbbda36 += carry3 1626aacbbbda35 &= mask12 1627 1628uint32323232 carry1 = aacbbbda24 >> 13 1629uint32323232 carry2 = aacbbbda28 >> 13 1630uint32323232 carry3 = aacbbbda32 >> 13 1631uint32323232 carry4 = aacbbbda36 >> 13 1632 1633int32323232 aacbbbda25 += carry1 1634aacbbbda24 &= mask13 1635int32323232 aacbbbda29 += carry2 1636aacbbbda28 &= mask13 1637int32323232 aacbbbda33 += carry3 1638aacbbbda32 &= mask13 1639int32323232 aacbbbda37 += carry4 1640aacbbbda36 &= mask13 1641 1642uint32323232 carry1 = aacbbbda25 >> 13 1643uint32323232 carry2 = aacbbbda29 >> 13 1644uint32323232 carry3 = aacbbbda33 >> 13 1645uint32323232 carry4 = aacbbbda37 >> 13 1646 1647int32323232 aacbbbda26 += carry1 1648aacbbbda25 &= mask13 1649int32323232 aacbbbda30 += carry2 1650aacbbbda29 &= mask13 1651int32323232 aacbbbda34 += carry3 1652aacbbbda33 &= mask13 1653int32323232 aacbbbda38 += carry4 1654aacbbbda37 &= mask13 1655 1656uint32323232 carry1 = aacbbbda26 >> 13 1657uint32323232 carry2 = aacbbbda30 >> 13 1658uint32323232 carry3 = aacbbbda34 >> 13 1659uint32323232 aacbbbda39 = aacbbbda38 >> 13 1660 1661int32323232 aacbbbda27 += carry1 1662aacbbbda26 &= mask13 1663int32323232 aacbbbda31 += carry2 1664aacbbbda30 &= mask13 1665int32323232 aacbbbda35 += carry3 1666aacbbbda34 &= mask13 1667 1668uint32323232 carry1 = aacbbbda27 >> 12 1669aacbbbda38 &= mask13 1670uint32323232 carry2 = aacbbbda31 >> 12 1671uint32323232 carry3 = aacbbbda35 >> 12 1672 1673int32323232 aacbbbda28 += carry1 1674aacbbbda27 &= mask12 1675int32323232 aacbbbda32 += carry2 1676aacbbbda31 &= mask12 1677int32323232 aacbbbda36 += carry3 1678aacbbbda35 &= mask12 1679 1680int32323232 aacbbbda0 += (aacbbbda20 & 0xffff) * (vec19 & 0xffff) 1681int32323232 aacbbbda1 += (aacbbbda21 & 0xffff) * (vec19 & 0xffff) 1682int32323232 aacbbbda2 += (aacbbbda22 & 0xffff) * (vec19 & 0xffff) 1683int32323232 aacbbbda3 += (aacbbbda23 & 0xffff) * (vec19 & 0xffff) 1684int32323232 aacbbbda4 += (aacbbbda24 & 0xffff) * (vec19 & 0xffff) 1685int32323232 aacbbbda5 += (aacbbbda25 & 0xffff) * (vec19 & 0xffff) 1686int32323232 aacbbbda6 += (aacbbbda26 & 0xffff) * (vec19 & 0xffff) 1687int32323232 aacbbbda7 += (aacbbbda27 & 0xffff) * (vec19 & 0xffff) 1688int32323232 aacbbbda8 += (aacbbbda28 & 0xffff) * (vec19 & 0xffff) 1689int32323232 aacbbbda9 += (aacbbbda29 & 0xffff) * (vec19 & 0xffff) 1690int32323232 aacbbbda10 += (aacbbbda30 & 0xffff) * (vec19 & 0xffff) 1691int32323232 aacbbbda11 += (aacbbbda31 & 0xffff) * (vec19 & 0xffff) 1692int32323232 aacbbbda12 += (aacbbbda32 & 0xffff) * (vec19 & 0xffff) 1693int32323232 aacbbbda13 += (aacbbbda33 & 0xffff) * (vec19 & 0xffff) 1694int32323232 aacbbbda14 += (aacbbbda34 & 0xffff) * (vec19 & 0xffff) 1695int32323232 aacbbbda15 += (aacbbbda35 & 0xffff) * (vec19 & 0xffff) 1696int32323232 aacbbbda16 += (aacbbbda36 & 0xffff) * (vec19 & 0xffff) 1697int32323232 aacbbbda17 += (aacbbbda37 & 0xffff) * (vec19 & 0xffff) 1698int32323232 aacbbbda18 += (aacbbbda38 & 0xffff) * (vec19 & 0xffff) 1699int32323232 aacbbbda19 += (aacbbbda39 & 0xffff) * (vec19 & 0xffff) 1700 1701uint32323232 carry = aacbbbda16 >> 13 1702int32323232 aacbbbda17 += carry 1703uint32323232 carry = aacbbbda17 >> 13 1704int32323232 aacbbbda18 += carry 1705uint32323232 carry = aacbbbda18 >> 13 1706int32323232 aacbbbda19 += carry 1707uint32323232 carry = aacbbbda19 >> 12 1708#Multiply carry by 19 1709int32323232 red = carry << 4 1710int32323232 red += carry 1711int32323232 red += carry 1712int32323232 red += carry 1713 1714int32323232 aacbbbda0 += red 1715aacbbbda19 &= mask12 1716 1717aacbbbda16 &= mask13 1718aacbbbda17 &= mask13 1719aacbbbda18 &= mask13 1720aacbbbda19 &= mask12 1721 1722uint32323232 carry0 = aacbbbda0 >> 13 1723uint32323232 carry1 = aacbbbda4 >> 13 1724uint32323232 carry2 = aacbbbda8 >> 13 1725uint32323232 carry3 = aacbbbda12 >> 13 1726 1727int32323232 aacbbbda1 += carry0 1728int32323232 aacbbbda5 += carry1 1729int32323232 aacbbbda9 += carry2 1730int32323232 aacbbbda13 += carry3 1731 1732aacbbbda0 &= mask13 1733aacbbbda4 &= mask13 1734aacbbbda8 &= mask13 1735aacbbbda12 &= mask13 1736 1737uint32323232 carry0 = aacbbbda1 >> 13 1738uint32323232 carry1 = aacbbbda5 >> 13 1739uint32323232 carry2 = aacbbbda9 >> 13 1740uint32323232 carry3 = aacbbbda13 >> 13 1741 1742int32323232 aacbbbda2 += carry0 1743int32323232 aacbbbda6 += carry1 1744int32323232 aacbbbda10 += carry2 1745int32323232 aacbbbda14 += carry3 1746 1747aacbbbda1 &= mask13 1748aacbbbda5 &= mask13 1749aacbbbda9 &= mask13 1750aacbbbda13 &= mask13 1751 1752uint32323232 carry0 = aacbbbda2 >> 13 1753uint32323232 carry1 = aacbbbda6 >> 13 1754uint32323232 carry2 = aacbbbda10 >> 13 1755uint32323232 carry3 = aacbbbda14 >> 13 1756 1757int32323232 aacbbbda3 += carry0 1758int32323232 aacbbbda7 += carry1 1759int32323232 aacbbbda11 += carry2 1760int32323232 aacbbbda15 += carry3 1761 1762aacbbbda2 &= mask13 1763aacbbbda6 &= mask13 1764aacbbbda10 &= mask13 1765aacbbbda14 &= mask13 1766 1767uint32323232 carry0 = aacbbbda3 >> 12 1768uint32323232 carry1 = aacbbbda7 >> 12 1769uint32323232 carry2 = aacbbbda11 >> 12 1770uint32323232 carry3 = aacbbbda15 >> 12 1771 1772int32323232 aacbbbda4 += carry0 1773int32323232 aacbbbda8 += carry1 1774int32323232 aacbbbda12 += carry2 1775int32323232 aacbbbda16 += carry3 1776 1777aacbbbda3 &= mask12 1778aacbbbda7 &= mask12 1779aacbbbda11 &= mask12 1780aacbbbda15 &= mask12 1781 1782uint32323232 carry1 = aacbbbda4 >> 13 1783uint32323232 carry2 = aacbbbda8 >> 13 1784uint32323232 carry3 = aacbbbda12 >> 13 1785uint32323232 carry4 = aacbbbda16 >> 13 1786 1787int32323232 aacbbbda5 += carry1 1788int32323232 aacbbbda9 += carry2 1789int32323232 aacbbbda13 += carry3 1790int32323232 aacbbbda17 += carry4 1791 1792aacbbbda4 &= mask13 1793aacbbbda8 &= mask13 1794aacbbbda12 &= mask13 1795aacbbbda16 &= mask13 1796 1797uint32323232 carry1 = aacbbbda5 >> 13 1798uint32323232 carry2 = aacbbbda9 >> 13 1799uint32323232 carry3 = aacbbbda13 >> 13 1800uint32323232 carry4 = aacbbbda17 >> 13 1801 1802int32323232 aacbbbda6 += carry1 1803int32323232 aacbbbda10 += carry2 1804int32323232 aacbbbda14 += carry3 1805int32323232 aacbbbda18 += carry4 1806 1807aacbbbda5 &= mask13 1808aacbbbda9 &= mask13 1809aacbbbda13 &= mask13 1810aacbbbda17 &= mask13 1811 1812uint32323232 carry1 = aacbbbda6 >> 13 1813uint32323232 carry2 = aacbbbda10 >> 13 1814uint32323232 carry3 = aacbbbda14 >> 13 1815uint32323232 carry4 = aacbbbda18 >> 13 1816 1817int32323232 aacbbbda7 += carry1 1818int32323232 aacbbbda11 += carry2 1819int32323232 aacbbbda15 += carry3 1820int32323232 aacbbbda19 += carry4 1821 1822aacbbbda6 &= mask13 1823aacbbbda10 &= mask13 1824aacbbbda14 &= mask13 1825aacbbbda18 &= mask13 1826 1827################################################################################### 1828################ Multiply aa with a24 and bb with a24-1 ############# 1829################################################################################### 1830 1831int32323232 tmp0 = aacbbbda0 << 16 18322p2p2pcb0 = combine 2pconsts0 and aacbbbda0 by selw0105 1833int32323232 tmp1 = aacbbbda1 << 16 18342p2p2pcb1 = combine 2pconsts and aacbbbda1 by selw0105 1835int32323232 tmp2 = aacbbbda2 << 16 18362p2p2pcb2 = combine 2pconsts and aacbbbda2 by selw0105 1837int32323232 tmp3 = aacbbbda3 << 16 18382p2p2pcb3 = combine 2pconsts and aacbbbda3 by selw2325 1839uint32323232 tmp0 += (tmp0 >> 16) * (a24vec >> 16) 18402p2p2pcb4 = combine 2pconsts and aacbbbda4 by selw0105 1841uint32323232 tmp1 += (tmp1 >> 16) * (a24vec >> 16) 18422p2p2pcb5 = combine 2pconsts and aacbbbda5 by selw0105 1843uint32323232 tmp2 += (tmp2 >> 16) * (a24vec >> 16) 18442p2p2pcb6 = combine 2pconsts and aacbbbda6 by selw0105 1845uint32323232 tmp3 += (tmp3 >> 16) * (a24vec >> 16) 18462p2p2pcb7 = combine 2pconsts and aacbbbda7 by selw2325 1847int32323232 tmp4 = aacbbbda4 << 16 18482p2p2pcb8 = combine 2pconsts and aacbbbda8 by selw0105 1849int32323232 tmp5 = aacbbbda5 << 16 18502p2p2pcb9 = combine 2pconsts and aacbbbda9 by selw0105 1851int32323232 tmp6 = aacbbbda6 << 16 18522p2p2pcb10 = combine 2pconsts and aacbbbda10 by selw0105 1853int32323232 tmp7 = aacbbbda7 << 16 18542p2p2pcb11 = combine 2pconsts and aacbbbda11 by selw2325 1855uint32323232 tmp4 += (tmp4 >> 16) * (a24vec >> 16) 18562p2p2pcb12 = combine 2pconsts and aacbbbda12 by selw0105 1857uint32323232 tmp5 += (tmp5 >> 16) * (a24vec >> 16) 18582p2p2pcb13 = combine 2pconsts and aacbbbda13 by selw0105 1859uint32323232 tmp6 += (tmp6 >> 16) * (a24vec >> 16) 18602p2p2pcb14 = combine 2pconsts and aacbbbda14 by selw0105 1861uint32323232 tmp7 += (tmp7 >> 16) * (a24vec >> 16) 18622p2p2pcb15 = combine 2pconsts and aacbbbda15 by selw2325 1863int32323232 tmp8 = aacbbbda8 << 16 18642p2p2pcb16 = combine 2pconsts and aacbbbda16 by selw0105 1865int32323232 tmp9 = aacbbbda9 << 16 18662p2p2pcb17 = combine 2pconsts and aacbbbda17 by selw0105 1867int32323232 tmp10 = aacbbbda10 << 16 18682p2p2pcb18 = combine 2pconsts and aacbbbda18 by selw0105 1869int32323232 tmp11 = aacbbbda11 << 16 18702p2p2pcb19 = combine 2pconsts and aacbbbda19 by selw2325 1871uint32323232 tmp8 += (tmp8 >> 16) * (a24vec >> 16) 1872aa_a24aadada0 = combine aacbbbda0 and tmp0 by selw0433 1873uint32323232 tmp9 += (tmp9 >> 16) * (a24vec >> 16) 1874aa_a24aadada1 = combine aacbbbda1 and tmp1 by selw0433 1875uint32323232 tmp10 += (tmp10 >> 16) * (a24vec >> 16) 1876aa_a24aadada2 = combine aacbbbda2 and tmp2 by selw0433 1877uint32323232 tmp11 += (tmp11 >> 16) * (a24vec >> 16) 1878aa_a24aadada3 = combine aacbbbda3 and tmp3 by selw0433 1879int32323232 tmp12 = aacbbbda12 << 16 1880aa_a24aadada4 = combine aacbbbda4 and tmp4 by selw0433 1881int32323232 tmp13 = aacbbbda13 << 16 1882aa_a24aadada5 = combine aacbbbda5 and tmp5 by selw0433 1883int32323232 tmp14 = aacbbbda14 << 16 1884aa_a24aadada6 = combine aacbbbda6 and tmp6 by selw0433 1885int32323232 tmp15 = aacbbbda15 << 16 1886aa_a24aadada7 = combine aacbbbda7 and tmp7 by selw0433 1887uint32323232 tmp12 += (tmp12 >> 16) * (a24vec >> 16) 1888aa_a24aadada8 = combine aacbbbda8 and tmp8 by selw0433 1889uint32323232 tmp13 += (tmp13 >> 16) * (a24vec >> 16) 1890aa_a24aadada9 = combine aacbbbda9 and tmp9 by selw0433 1891uint32323232 tmp14 += (tmp14 >> 16) * (a24vec >> 16) 1892bb_a24m1bbcb0 = combine aacbbbda0 and tmp0 by selw261c0 1893uint32323232 tmp15 += (tmp15 >> 16) * (a24vec >> 16) 1894bb_a24m1bbcb1 = combine aacbbbda1 and tmp1 by selw261c0 1895int32323232 tmp16 = aacbbbda16 << 16 1896bb_a24m1bbcb2 = combine aacbbbda2 and tmp2 by selw261c0 1897int32323232 tmp17 = aacbbbda17 << 16 1898bb_a24m1bbcb3 = combine aacbbbda3 and tmp3 by selw261c0 1899int32323232 tmp18 = aacbbbda18 << 16 1900bb_a24m1bbcb4 = combine aacbbbda4 and tmp4 by selw261c0 1901int32323232 tmp19 = aacbbbda19 << 16 1902bb_a24m1bbcb5 = combine aacbbbda5 and tmp5 by selw261c0 1903uint32323232 tmp16 += (tmp16 >> 16) * (a24vec >> 16) 1904bb_a24m1bbcb6 = combine aacbbbda6 and tmp6 by selw261c0 1905uint32323232 tmp17 += (tmp17 >> 16) * (a24vec >> 16) 1906bb_a24m1bbcb7 = combine aacbbbda7 and tmp7 by selw261c0 1907uint32323232 tmp18 += (tmp18 >> 16) * (a24vec >> 16) 1908bb_a24m1bbcb8 = combine aacbbbda8 and tmp8 by selw261c0 1909uint32323232 tmp19 += (tmp19 >> 16) * (a24vec >> 16) 1910bb_a24m1bbcb9 = combine aacbbbda9 and tmp9 by selw261c0 1911 1912################################################################################### 1913######################### E = AA-BB ####################### 1914######################### t4 = a24AA - a24m1BB ####################### 1915######################### t1 = DA-CB ####################### 1916######################### t0 = DA+CB ####################### 1917################################################################################### 1918 1919 1920int32323232 et4t1t00 = aa_a24aadada0 + 2p2p2pcb0 1921aa_a24aadada10 = combine aacbbbda10 and tmp10 by selw0433 1922int32323232 et4t1t01 = aa_a24aadada1 + 2p2p2pcb1 1923aa_a24aadada11 = combine aacbbbda11 and tmp11 by selw0433 1924int32323232 et4t1t02 = aa_a24aadada2 + 2p2p2pcb2 1925aa_a24aadada12 = combine aacbbbda12 and tmp12 by selw0433 1926int32323232 et4t1t03 = aa_a24aadada3 + 2p2p2pcb3 1927aa_a24aadada13 = combine aacbbbda13 and tmp13 by selw0433 1928int32323232 et4t1t04 = aa_a24aadada4 + 2p2p2pcb4 1929aa_a24aadada14 = combine aacbbbda14 and tmp14 by selw0433 1930int32323232 et4t1t05 = aa_a24aadada5 + 2p2p2pcb5 1931aa_a24aadada15 = combine aacbbbda15 and tmp15 by selw0433 1932int32323232 et4t1t06 = aa_a24aadada6 + 2p2p2pcb6 1933aa_a24aadada16 = combine aacbbbda16 and tmp16 by selw0433 1934int32323232 et4t1t07 = aa_a24aadada7 + 2p2p2pcb7 1935aa_a24aadada17 = combine aacbbbda17 and tmp17 by selw0433 1936int32323232 et4t1t08 = aa_a24aadada8 + 2p2p2pcb8 1937aa_a24aadada18 = combine aacbbbda18 and tmp18 by selw0433 1938int32323232 et4t1t09 = aa_a24aadada9 + 2p2p2pcb9 1939aa_a24aadada19 = combine aacbbbda19 and tmp19 by selw0433 1940int32323232 et4t1t010 = aa_a24aadada10 + 2p2p2pcb10 1941bb_a24m1bbcb10 = combine aacbbbda10 and tmp10 by selw261c0 1942int32323232 et4t1t011 = aa_a24aadada11 + 2p2p2pcb11 1943bb_a24m1bbcb11 = combine aacbbbda11 and tmp11 by selw261c0 1944int32323232 et4t1t012 = aa_a24aadada12 + 2p2p2pcb12 1945bb_a24m1bbcb12 = combine aacbbbda12 and tmp12 by selw261c0 1946int32323232 et4t1t013 = aa_a24aadada13 + 2p2p2pcb13 1947bb_a24m1bbcb13 = combine aacbbbda13 and tmp13 by selw261c0 1948int32323232 et4t1t014 = aa_a24aadada14 + 2p2p2pcb14 1949bb_a24m1bbcb14 = combine aacbbbda14 and tmp14 by selw261c0 1950int32323232 et4t1t015 = aa_a24aadada15 + 2p2p2pcb15 1951bb_a24m1bbcb15 = combine aacbbbda15 and tmp15 by selw261c0 1952int32323232 et4t1t016 = aa_a24aadada16 + 2p2p2pcb16 1953bb_a24m1bbcb16 = combine aacbbbda16 and tmp16 by selw261c0 1954int32323232 et4t1t017 = aa_a24aadada17 + 2p2p2pcb17 1955bb_a24m1bbcb17 = combine aacbbbda17 and tmp17 by selw261c0 1956int32323232 et4t1t018 = aa_a24aadada18 + 2p2p2pcb18 1957bb_a24m1bbcb18 = combine aacbbbda18 and tmp18 by selw261c0 1958int32323232 et4t1t019 = aa_a24aadada19 + 2p2p2pcb19 1959bb_a24m1bbcb19 = combine aacbbbda19 and tmp19 by selw261c0 1960 1961int32323232 et4t1t00 = et4t1t00 - bb_a24m1bbcb0 1962int32323232 et4t1t01 = et4t1t01 - bb_a24m1bbcb1 1963int32323232 et4t1t02 = et4t1t02 - bb_a24m1bbcb2 1964int32323232 et4t1t03 = et4t1t03 - bb_a24m1bbcb3 1965int32323232 et4t1t04 = et4t1t04 - bb_a24m1bbcb4 1966int32323232 et4t1t05 = et4t1t05 - bb_a24m1bbcb5 1967int32323232 et4t1t06 = et4t1t06 - bb_a24m1bbcb6 1968int32323232 et4t1t07 = et4t1t07 - bb_a24m1bbcb7 1969int32323232 et4t1t08 = et4t1t08 - bb_a24m1bbcb8 1970int32323232 et4t1t09 = et4t1t09 - bb_a24m1bbcb9 1971int32323232 et4t1t010 = et4t1t010 - bb_a24m1bbcb10 1972int32323232 et4t1t011 = et4t1t011 - bb_a24m1bbcb11 1973int32323232 et4t1t012 = et4t1t012 - bb_a24m1bbcb12 1974int32323232 et4t1t013 = et4t1t013 - bb_a24m1bbcb13 1975int32323232 et4t1t014 = et4t1t014 - bb_a24m1bbcb14 1976int32323232 et4t1t015 = et4t1t015 - bb_a24m1bbcb15 1977int32323232 et4t1t016 = et4t1t016 - bb_a24m1bbcb16 1978int32323232 et4t1t017 = et4t1t017 - bb_a24m1bbcb17 1979int32323232 et4t1t018 = et4t1t018 - bb_a24m1bbcb18 1980int32323232 et4t1t019 = et4t1t019 - bb_a24m1bbcb19 1981 1982## Reduction 1983uint32323232 carry0 = et4t1t016 >> 13 1984int32323232 et4t1t017 += carry0 1985uint32323232 carry1 = et4t1t017 >> 13 1986int32323232 et4t1t018 += carry1 1987uint32323232 carry = et4t1t018 >> 13 1988int32323232 et4t1t019 += carry 1989uint32323232 carry = et4t1t019 >> 12 1990 1991#Multiply carry by 19 1992int32323232 red = carry << 4 1993int32323232 red += carry 1994int32323232 red += carry 1995int32323232 red += carry 1996int32323232 et4t1t00 += red 1997 1998et4t1t016 &= mask13 1999et4t1t017 &= mask13 2000et4t1t018 &= mask13 2001et4t1t019 &= mask12 2002 2003uint32323232 carry0 = et4t1t00 >> 13 2004uint32323232 carry1 = et4t1t04 >> 13 2005uint32323232 carry2 = et4t1t08 >> 13 2006uint32323232 carry3 = et4t1t012 >> 13 2007 2008et4t1t00 &= mask13 2009et4t1t04 &= mask13 2010et4t1t08 &= mask13 2011et4t1t012 &= mask13 2012 2013int32323232 et4t1t01 += carry0 2014int32323232 et4t1t05 += carry1 2015int32323232 et4t1t09 += carry2 2016int32323232 et4t1t013 += carry3 2017 2018uint32323232 carry0 = et4t1t01 >> 13 2019uint32323232 carry1 = et4t1t05 >> 13 2020uint32323232 carry2 = et4t1t09 >> 13 2021uint32323232 carry3 = et4t1t013 >> 13 2022 2023et4t1t01 &= mask13 2024et4t1t05 &= mask13 2025et4t1t09 &= mask13 2026et4t1t013 &= mask13 2027 2028int32323232 et4t1t02 += carry0 2029int32323232 et4t1t06 += carry1 2030int32323232 et4t1t010 += carry2 2031int32323232 et4t1t014 += carry3 2032 2033uint32323232 carry0 = et4t1t02 >> 13 2034uint32323232 carry1 = et4t1t06 >> 13 2035uint32323232 carry2 = et4t1t010 >> 13 2036uint32323232 carry3 = et4t1t014 >> 13 2037 2038et4t1t02 &= mask13 2039et4t1t06 &= mask13 2040et4t1t010 &= mask13 2041et4t1t014 &= mask13 2042 2043int32323232 et4t1t03 += carry0 2044int32323232 et4t1t07 += carry1 2045int32323232 et4t1t011 += carry2 2046int32323232 et4t1t015 += carry3 2047 2048uint32323232 carry0 = et4t1t03 >> 12 2049uint32323232 carry1 = et4t1t07 >> 12 2050uint32323232 carry2 = et4t1t011 >> 12 2051uint32323232 carry3 = et4t1t015 >> 12 2052 2053et4t1t03 &= mask12 2054et4t1t07 &= mask12 2055et4t1t011 &= mask12 2056et0aat10 = combine et4t1t00 and aacbbbda0 by selw0342 2057et4t1t015 &= mask12 2058et0aat11 = combine et4t1t01 and aacbbbda1 by selw0342 2059 2060int32323232 et4t1t04 += carry0 2061et0aat12 = combine et4t1t02 and aacbbbda2 by selw0342 2062int32323232 et4t1t08 += carry1 2063et0aat13 = combine et4t1t03 and aacbbbda3 by selw0342 2064int32323232 et4t1t012 += carry2 2065t4t0bbt10 = combine et4t1t00 and aacbbbda0 by selw1362 2066int32323232 et4t1t016 += carry3 2067t4t0bbt11 = combine et4t1t01 and aacbbbda1 by selw1362 2068 2069uint32323232 carry1 = et4t1t04 >> 13 2070t4t0bbt12 = combine et4t1t02 and aacbbbda2 by selw1362 2071uint32323232 carry2 = et4t1t08 >> 13 2072t4t0bbt13 = combine et4t1t03 and aacbbbda3 by selw1362 2073uint32323232 carry3 = et4t1t012 >> 13 2074uint32323232 carry4 = et4t1t016 >> 13 2075 2076et4t1t04 &= mask13 2077et4t1t08 &= mask13 2078et4t1t012 &= mask13 2079et0aat14 = combine et4t1t04 and aacbbbda4 by selw0342 2080et4t1t016 &= mask13 2081et0aat18 = combine et4t1t08 and aacbbbda8 by selw0342 2082 2083int32323232 et4t1t05 += carry1 2084et0aat112 = combine et4t1t012 and aacbbbda12 by selw0342 2085int32323232 et4t1t09 += carry2 2086et0aat116 = combine et4t1t016 and aacbbbda16 by selw0342 2087int32323232 et4t1t013 += carry3 2088t4t0bbt14 = combine et4t1t04 and aacbbbda4 by selw1362 2089int32323232 et4t1t017 += carry4 2090t4t0bbt18 = combine et4t1t08 and aacbbbda8 by selw1362 2091 2092uint32323232 carry1 = et4t1t05 >> 13 2093t4t0bbt112 = combine et4t1t012 and aacbbbda12 by selw1362 2094uint32323232 carry2 = et4t1t09 >> 13 2095t4t0bbt116 = combine et4t1t016 and aacbbbda16 by selw1362 2096uint32323232 carry3 = et4t1t013 >> 13 2097uint32323232 carry4 = et4t1t017 >> 13 2098 2099et4t1t05 &= mask13 2100et4t1t09 &= mask13 2101et4t1t013 &= mask13 2102et0aat15 = combine et4t1t05 and aacbbbda5 by selw0342 2103et4t1t017 &= mask13 2104et0aat19 = combine et4t1t09 and aacbbbda9 by selw0342 2105 2106int32323232 et4t1t06 += carry1 2107et0aat113 = combine et4t1t013 and aacbbbda13 by selw0342 2108int32323232 et4t1t010 += carry2 2109et0aat117 = combine et4t1t017 and aacbbbda17 by selw0342 2110int32323232 et4t1t014 += carry3 2111t4t0bbt15 = combine et4t1t05 and aacbbbda5 by selw1362 2112int32323232 et4t1t018 += carry4 2113t4t0bbt19 = combine et4t1t09 and aacbbbda9 by selw1362 2114 2115uint32323232 carry1 = et4t1t06 >> 13 2116t4t0bbt113 = combine et4t1t013 and aacbbbda13 by selw1362 2117uint32323232 carry2 = et4t1t010 >> 13 2118t4t0bbt117 = combine et4t1t017 and aacbbbda17 by selw1362 2119uint32323232 carry3 = et4t1t014 >> 13 2120uint32323232 carry4 = et4t1t018 >> 13 2121 2122et4t1t06 &= mask13 2123et4t1t010 &= mask13 2124et4t1t014 &= mask13 2125et0aat16 = combine et4t1t06 and aacbbbda6 by selw0342 2126et4t1t018 &= mask13 2127et0aat110 = combine et4t1t010 and aacbbbda10 by selw0342 2128 2129int32323232 et4t1t07 += carry1 2130et0aat114 = combine et4t1t014 and aacbbbda14 by selw0342 2131int32323232 et4t1t011 += carry2 2132et0aat118 = combine et4t1t018 and aacbbbda18 by selw0342 2133int32323232 et4t1t015 += carry3 2134t4t0bbt16 = combine et4t1t06 and aacbbbda6 by selw1362 2135int32323232 et4t1t019 += carry4 2136t4t0bbt110 = combine et4t1t010 and aacbbbda10 by selw1362 2137 2138################################################################################## 2139######################### Z4 = E*t4 ######################### 2140######################### X5 = t0^2 ######################### 2141######################### X4 = AA*BB ######################### 2142######################### t2 = t1^2 ######################### 2143################################################################################## 2144 2145int32323232 z4x5x4t20 = (t4t0bbt10 & 0xffff) * (et0aat10 & 0xffff) 2146et0aat17 = combine et4t1t07 and aacbbbda7 by selw0342 2147int32323232 z4x5x4t21 = (t4t0bbt10 & 0xffff) * (et0aat11 & 0xffff) 2148t4t0bbt17 = combine et4t1t07 and aacbbbda7 by selw1362 2149int32323232 z4x5x4t22 = (t4t0bbt10 & 0xffff) * (et0aat12 & 0xffff) 2150et0aat111 = combine et4t1t011 and aacbbbda11 by selw0342 2151int32323232 z4x5x4t23 = (t4t0bbt10 & 0xffff) * (et0aat13 & 0xffff) 2152t4t0bbt111 = combine et4t1t011 and aacbbbda11 by selw1362 2153int32323232 z4x5x4t24 = (t4t0bbt11 & 0xffff) * (et0aat13 & 0xffff) 2154t4t0bbt114 = combine et4t1t014 and aacbbbda14 by selw1362 2155int32323232 z4x5x4t25 = (t4t0bbt12 & 0xffff) * (et0aat13 & 0xffff) 2156et0aat115 = combine et4t1t015 and aacbbbda15 by selw0342 2157int32323232 z4x5x4t26 = (t4t0bbt13 & 0xffff) * (et0aat13 & 0xffff) 2158t4t0bbt115 = combine et4t1t015 and aacbbbda15 by selw1362 2159int32323232 z4x5x4t27 = (t4t0bbt10 & 0xffff) * (et0aat17 & 0xffff) 2160t4t0bbt118 = combine et4t1t018 and aacbbbda18 by selw1362 2161int32323232 z4x5x4t21 += (t4t0bbt11 & 0xffff) * (et0aat10 & 0xffff) 2162et0aat119 = combine et4t1t019 and aacbbbda19 by selw0342 2163int32323232 z4x5x4t22 += (t4t0bbt11 & 0xffff) * (et0aat11 & 0xffff) 2164t4t0bbt119 = combine et4t1t019 and aacbbbda19 by selw1362 2165int32323232 z4x5x4t23 += (t4t0bbt11 & 0xffff) * (et0aat12 & 0xffff) 2166int32323232 z4x5x4t24 += (t4t0bbt12 & 0xffff) * (et0aat12 & 0xffff) 2167int32323232 z4x5x4t25 += (t4t0bbt13 & 0xffff) * (et0aat12 & 0xffff) 2168int32323232 z4x5x4t26 <<= 1 2169int32323232 z4x5x4t27 += (t4t0bbt11 & 0xffff) * (et0aat16 & 0xffff) 2170int32323232 z4x5x4t28 = (t4t0bbt11 & 0xffff) * (et0aat17 & 0xffff) 2171int32323232 z4x5x4t22 += (t4t0bbt12 & 0xffff) * (et0aat10 & 0xffff) 2172int32323232 z4x5x4t23 += (t4t0bbt12 & 0xffff) * (et0aat11 & 0xffff) 2173int32323232 z4x5x4t24 += (t4t0bbt13 & 0xffff) * (et0aat11 & 0xffff) 2174int32323232 z4x5x4t25 <<= 1 2175int32323232 z4x5x4t26 += (t4t0bbt10 & 0xffff) * (et0aat16 & 0xffff) 2176int32323232 z4x5x4t27 += (t4t0bbt12 & 0xffff) * (et0aat15 & 0xffff) 2177int32323232 z4x5x4t28 += (t4t0bbt12 & 0xffff) * (et0aat16 & 0xffff) 2178int32323232 z4x5x4t29 = (t4t0bbt12 & 0xffff) * (et0aat17 & 0xffff) 2179int32323232 z4x5x4t23 += (t4t0bbt13 & 0xffff) * (et0aat10 & 0xffff) 2180int32323232 z4x5x4t24 <<= 1 2181int32323232 z4x5x4t25 += (t4t0bbt10 & 0xffff) * (et0aat15 & 0xffff) 2182int32323232 z4x5x4t26 += (t4t0bbt11 & 0xffff) * (et0aat15 & 0xffff) 2183int32323232 z4x5x4t27 += (t4t0bbt13 & 0xffff) * (et0aat14 & 0xffff) 2184int32323232 z4x5x4t28 += (t4t0bbt13 & 0xffff) * (et0aat15 & 0xffff) 2185int32323232 z4x5x4t29 += (t4t0bbt13 & 0xffff) * (et0aat16 & 0xffff) 2186int32323232 z4x5x4t210 = (t4t0bbt13 & 0xffff) * (et0aat17 & 0xffff) 2187int32323232 z4x5x4t24 += (t4t0bbt10 & 0xffff) * (et0aat14 & 0xffff) 2188int32323232 z4x5x4t25 += (t4t0bbt11 & 0xffff) * (et0aat14 & 0xffff) 2189int32323232 z4x5x4t26 += (t4t0bbt12 & 0xffff) * (et0aat14 & 0xffff) 2190int32323232 z4x5x4t27 += (t4t0bbt14 & 0xffff) * (et0aat13 & 0xffff) 2191int32323232 z4x5x4t28 += (t4t0bbt15 & 0xffff) * (et0aat13 & 0xffff) 2192int32323232 z4x5x4t29 += (t4t0bbt16 & 0xffff) * (et0aat13 & 0xffff) 2193int32323232 z4x5x4t210 += (t4t0bbt17 & 0xffff) * (et0aat13 & 0xffff) 2194int32323232 z4x5x4t211 = (t4t0bbt111 & 0xffff) * (et0aat10 & 0xffff) 2195int32323232 z4x5x4t24 += (t4t0bbt14 & 0xffff) * (et0aat10 & 0xffff) 2196int32323232 z4x5x4t25 += (t4t0bbt14 & 0xffff) * (et0aat11 & 0xffff) 2197int32323232 z4x5x4t26 += (t4t0bbt14 & 0xffff) * (et0aat12 & 0xffff) 2198int32323232 z4x5x4t27 += (t4t0bbt15 & 0xffff) * (et0aat12 & 0xffff) 2199int32323232 z4x5x4t28 += (t4t0bbt16 & 0xffff) * (et0aat12 & 0xffff) 2200int32323232 z4x5x4t29 += (t4t0bbt17 & 0xffff) * (et0aat12 & 0xffff) 2201int32323232 z4x5x4t210 <<= 1 2202int32323232 z4x5x4t211 += (t4t0bbt110 & 0xffff) * (et0aat11 & 0xffff) 2203int32323232 z4x5x4t212 = (t4t0bbt11 & 0xffff) * (et0aat111 & 0xffff) 2204int32323232 z4x5x4t25 += (t4t0bbt15 & 0xffff) * (et0aat10 & 0xffff) 2205int32323232 z4x5x4t26 += (t4t0bbt15 & 0xffff) * (et0aat11 & 0xffff) 2206int32323232 z4x5x4t27 += (t4t0bbt16 & 0xffff) * (et0aat11 & 0xffff) 2207int32323232 z4x5x4t28 += (t4t0bbt17 & 0xffff) * (et0aat11 & 0xffff) 2208int32323232 z4x5x4t29 <<= 1 2209int32323232 z4x5x4t210 += (t4t0bbt10 & 0xffff) * (et0aat110 & 0xffff) 2210int32323232 z4x5x4t211 += (t4t0bbt19 & 0xffff) * (et0aat12 & 0xffff) 2211int32323232 z4x5x4t212 += (t4t0bbt12 & 0xffff) * (et0aat110 & 0xffff) 2212int32323232 z4x5x4t26 += (t4t0bbt16 & 0xffff) * (et0aat10 & 0xffff) 2213int32323232 z4x5x4t27 += (t4t0bbt17 & 0xffff) * (et0aat10 & 0xffff) 2214int32323232 z4x5x4t28 <<= 1 2215int32323232 z4x5x4t29 += (t4t0bbt10 & 0xffff) * (et0aat19 & 0xffff) 2216int32323232 z4x5x4t210 += (t4t0bbt11 & 0xffff) * (et0aat19 & 0xffff) 2217int32323232 z4x5x4t211 += (t4t0bbt18 & 0xffff) * (et0aat13 & 0xffff) 2218int32323232 z4x5x4t212 += (t4t0bbt13 & 0xffff) * (et0aat19 & 0xffff) 2219int32323232 z4x5x4t213 = (t4t0bbt12 & 0xffff) * (et0aat111 & 0xffff) 2220int32323232 z4x5x4t214 = (t4t0bbt13 & 0xffff) * (et0aat111 & 0xffff) 2221int32323232 z4x5x4t28 += (t4t0bbt10 & 0xffff) * (et0aat18 & 0xffff) 2222int32323232 z4x5x4t29 += (t4t0bbt11 & 0xffff) * (et0aat18 & 0xffff) 2223int32323232 z4x5x4t210 += (t4t0bbt12 & 0xffff) * (et0aat18 & 0xffff) 2224int32323232 z4x5x4t211 += (t4t0bbt17 & 0xffff) * (et0aat14 & 0xffff) 2225int32323232 z4x5x4t212 += (t4t0bbt15 & 0xffff) * (et0aat17 & 0xffff) 2226int32323232 z4x5x4t213 += (t4t0bbt13 & 0xffff) * (et0aat110 & 0xffff) 2227int32323232 z4x5x4t214 += (t4t0bbt17 & 0xffff) * (et0aat17 & 0xffff) 2228int32323232 z4x5x4t28 += (t4t0bbt14 & 0xffff) * (et0aat14 & 0xffff) 2229int32323232 z4x5x4t29 += (t4t0bbt14 & 0xffff) * (et0aat15 & 0xffff) 2230int32323232 z4x5x4t210 += (t4t0bbt14 & 0xffff) * (et0aat16 & 0xffff) 2231int32323232 z4x5x4t211 += (t4t0bbt16 & 0xffff) * (et0aat15 & 0xffff) 2232int32323232 z4x5x4t212 += (t4t0bbt16 & 0xffff) * (et0aat16 & 0xffff) 2233int32323232 z4x5x4t213 += (t4t0bbt16 & 0xffff) * (et0aat17 & 0xffff) 2234int32323232 z4x5x4t214 += (t4t0bbt111 & 0xffff) * (et0aat13 & 0xffff) 2235int32323232 z4x5x4t28 += (t4t0bbt18 & 0xffff) * (et0aat10 & 0xffff) 2236int32323232 z4x5x4t29 += (t4t0bbt15 & 0xffff) * (et0aat14 & 0xffff) 2237int32323232 z4x5x4t210 += (t4t0bbt15 & 0xffff) * (et0aat15 & 0xffff) 2238int32323232 z4x5x4t211 += (t4t0bbt15 & 0xffff) * (et0aat16 & 0xffff) 2239int32323232 z4x5x4t212 += (t4t0bbt17 & 0xffff) * (et0aat15 & 0xffff) 2240int32323232 z4x5x4t213 += (t4t0bbt17 & 0xffff) * (et0aat16 & 0xffff) 2241int32323232 z4x5x4t214 <<= 1 2242int32323232 z4x5x4t215 = (t4t0bbt10 & 0xffff) * (et0aat115 & 0xffff) 2243int32323232 z4x5x4t29 += (t4t0bbt18 & 0xffff) * (et0aat11 & 0xffff) 2244int32323232 z4x5x4t210 += (t4t0bbt16 & 0xffff) * (et0aat14 & 0xffff) 2245int32323232 z4x5x4t211 += (t4t0bbt14 & 0xffff) * (et0aat17 & 0xffff) 2246int32323232 z4x5x4t212 += (t4t0bbt19 & 0xffff) * (et0aat13 & 0xffff) 2247int32323232 z4x5x4t213 += (t4t0bbt110 & 0xffff) * (et0aat13 & 0xffff) 2248int32323232 z4x5x4t214 += (t4t0bbt10 & 0xffff) * (et0aat114 & 0xffff) 2249int32323232 z4x5x4t215 += (t4t0bbt11 & 0xffff) * (et0aat114 & 0xffff) 2250int32323232 z4x5x4t29 += (t4t0bbt19 & 0xffff) * (et0aat10 & 0xffff) 2251int32323232 z4x5x4t210 += (t4t0bbt18 & 0xffff) * (et0aat12 & 0xffff) 2252int32323232 z4x5x4t211 += (t4t0bbt13 & 0xffff) * (et0aat18 & 0xffff) 2253int32323232 z4x5x4t212 += (t4t0bbt110 & 0xffff) * (et0aat12 & 0xffff) 2254int32323232 z4x5x4t213 += (t4t0bbt111 & 0xffff) * (et0aat12 & 0xffff) 2255int32323232 z4x5x4t214 += (t4t0bbt11 & 0xffff) * (et0aat113 & 0xffff) 2256int32323232 z4x5x4t215 += (t4t0bbt12 & 0xffff) * (et0aat113 & 0xffff) 2257int32323232 z4x5x4t216 = (t4t0bbt11 & 0xffff) * (et0aat115 & 0xffff) 2258int32323232 z4x5x4t210 += (t4t0bbt19 & 0xffff) * (et0aat11 & 0xffff) 2259int32323232 z4x5x4t211 += (t4t0bbt12 & 0xffff) * (et0aat19 & 0xffff) 2260int32323232 z4x5x4t212 += (t4t0bbt111 & 0xffff) * (et0aat11 & 0xffff) 2261int32323232 z4x5x4t213 <<= 1 2262int32323232 z4x5x4t214 += (t4t0bbt12 & 0xffff) * (et0aat112 & 0xffff) 2263int32323232 z4x5x4t215 += (t4t0bbt13 & 0xffff) * (et0aat112 & 0xffff) 2264int32323232 z4x5x4t216 += (t4t0bbt12 & 0xffff) * (et0aat114 & 0xffff) 2265int32323232 z4x5x4t210 += (t4t0bbt110 & 0xffff) * (et0aat10 & 0xffff) 2266int32323232 z4x5x4t211 += (t4t0bbt11 & 0xffff) * (et0aat110 & 0xffff) 2267int32323232 z4x5x4t212 <<= 1 2268int32323232 z4x5x4t213 += (t4t0bbt10 & 0xffff) * (et0aat113 & 0xffff) 2269int32323232 z4x5x4t214 += (t4t0bbt14 & 0xffff) * (et0aat110 & 0xffff) 2270int32323232 z4x5x4t215 += (t4t0bbt14 & 0xffff) * (et0aat111 & 0xffff) 2271int32323232 z4x5x4t216 += (t4t0bbt13 & 0xffff) * (et0aat113 & 0xffff) 2272int32323232 z4x5x4t217 = (t4t0bbt12 & 0xffff) * (et0aat115 & 0xffff) 2273int32323232 z4x5x4t211 += (t4t0bbt10 & 0xffff) * (et0aat111 & 0xffff) 2274int32323232 z4x5x4t212 += (t4t0bbt10 & 0xffff) * (et0aat112 & 0xffff) 2275int32323232 z4x5x4t213 += (t4t0bbt11 & 0xffff) * (et0aat112 & 0xffff) 2276int32323232 z4x5x4t214 += (t4t0bbt15 & 0xffff) * (et0aat19 & 0xffff) 2277int32323232 z4x5x4t215 += (t4t0bbt15 & 0xffff) * (et0aat110 & 0xffff) 2278int32323232 z4x5x4t216 += (t4t0bbt15 & 0xffff) * (et0aat111 & 0xffff) 2279int32323232 z4x5x4t217 += (t4t0bbt13 & 0xffff) * (et0aat114 & 0xffff) 2280int32323232 z4x5x4t218 = (t4t0bbt13 & 0xffff) * (et0aat115 & 0xffff) 2281int32323232 z4x5x4t212 += (t4t0bbt14 & 0xffff) * (et0aat18 & 0xffff) 2282int32323232 z4x5x4t213 += (t4t0bbt14 & 0xffff) * (et0aat19 & 0xffff) 2283int32323232 z4x5x4t214 += (t4t0bbt16 & 0xffff) * (et0aat18 & 0xffff) 2284int32323232 z4x5x4t215 += (t4t0bbt16 & 0xffff) * (et0aat19 & 0xffff) 2285int32323232 z4x5x4t216 += (t4t0bbt16 & 0xffff) * (et0aat110 & 0xffff) 2286int32323232 z4x5x4t217 += (t4t0bbt16 & 0xffff) * (et0aat111 & 0xffff) 2287int32323232 z4x5x4t218 += (t4t0bbt17 & 0xffff) * (et0aat111 & 0xffff) 2288int32323232 z4x5x4t212 += (t4t0bbt18 & 0xffff) * (et0aat14 & 0xffff) 2289int32323232 z4x5x4t213 += (t4t0bbt15 & 0xffff) * (et0aat18 & 0xffff) 2290int32323232 z4x5x4t214 += (t4t0bbt18 & 0xffff) * (et0aat16 & 0xffff) 2291int32323232 z4x5x4t215 += (t4t0bbt17 & 0xffff) * (et0aat18 & 0xffff) 2292int32323232 z4x5x4t216 += (t4t0bbt17 & 0xffff) * (et0aat19 & 0xffff) 2293int32323232 z4x5x4t217 += (t4t0bbt17 & 0xffff) * (et0aat110 & 0xffff) 2294int32323232 z4x5x4t218 += (t4t0bbt111 & 0xffff) * (et0aat17 & 0xffff) 2295int32323232 z4x5x4t212 += (t4t0bbt112 & 0xffff) * (et0aat10 & 0xffff) 2296int32323232 z4x5x4t213 += (t4t0bbt18 & 0xffff) * (et0aat15 & 0xffff) 2297int32323232 z4x5x4t214 += (t4t0bbt19 & 0xffff) * (et0aat15 & 0xffff) 2298int32323232 z4x5x4t215 += (t4t0bbt18 & 0xffff) * (et0aat17 & 0xffff) 2299int32323232 z4x5x4t216 += (t4t0bbt19 & 0xffff) * (et0aat17 & 0xffff) 2300int32323232 z4x5x4t217 += (t4t0bbt110 & 0xffff) * (et0aat17 & 0xffff) 2301int32323232 z4x5x4t218 += (t4t0bbt115 & 0xffff) * (et0aat13 & 0xffff) 2302int32323232 z4x5x4t219 = (t4t0bbt10 & 0xffff) * (et0aat119 & 0xffff) 2303int32323232 z4x5x4t213 += (t4t0bbt19 & 0xffff) * (et0aat14 & 0xffff) 2304int32323232 z4x5x4t214 += (t4t0bbt110 & 0xffff) * (et0aat14 & 0xffff) 2305int32323232 z4x5x4t215 += (t4t0bbt19 & 0xffff) * (et0aat16 & 0xffff) 2306int32323232 z4x5x4t216 += (t4t0bbt110 & 0xffff) * (et0aat16 & 0xffff) 2307int32323232 z4x5x4t217 += (t4t0bbt111 & 0xffff) * (et0aat16 & 0xffff) 2308int32323232 z4x5x4t218 <<= 1 2309int32323232 z4x5x4t219 += (t4t0bbt11 & 0xffff) * (et0aat118 & 0xffff) 2310int32323232 z4x5x4t213 += (t4t0bbt112 & 0xffff) * (et0aat11 & 0xffff) 2311int32323232 z4x5x4t214 += (t4t0bbt112 & 0xffff) * (et0aat12 & 0xffff) 2312int32323232 z4x5x4t215 += (t4t0bbt110 & 0xffff) * (et0aat15 & 0xffff) 2313int32323232 z4x5x4t216 += (t4t0bbt111 & 0xffff) * (et0aat15 & 0xffff) 2314int32323232 z4x5x4t217 += (t4t0bbt114 & 0xffff) * (et0aat13 & 0xffff) 2315int32323232 z4x5x4t218 += (t4t0bbt10 & 0xffff) * (et0aat118 & 0xffff) 2316int32323232 z4x5x4t219 += (t4t0bbt12 & 0xffff) * (et0aat117 & 0xffff) 2317int32323232 z4x5x4t213 += (t4t0bbt113 & 0xffff) * (et0aat10 & 0xffff) 2318int32323232 z4x5x4t214 += (t4t0bbt113 & 0xffff) * (et0aat11 & 0xffff) 2319int32323232 z4x5x4t215 += (t4t0bbt111 & 0xffff) * (et0aat14 & 0xffff) 2320int32323232 z4x5x4t216 += (t4t0bbt113 & 0xffff) * (et0aat13 & 0xffff) 2321int32323232 z4x5x4t217 += (t4t0bbt115 & 0xffff) * (et0aat12 & 0xffff) 2322int32323232 z4x5x4t218 += (t4t0bbt11 & 0xffff) * (et0aat117 & 0xffff) 2323int32323232 z4x5x4t219 += (t4t0bbt13 & 0xffff) * (et0aat116 & 0xffff) 2324int32323232 z4x5x4t220 = (t4t0bbt11 & 0xffff) * (et0aat119 & 0xffff) 2325int32323232 z4x5x4t214 += (t4t0bbt114 & 0xffff) * (et0aat10 & 0xffff) 2326int32323232 z4x5x4t215 += (t4t0bbt112 & 0xffff) * (et0aat13 & 0xffff) 2327int32323232 z4x5x4t216 += (t4t0bbt114 & 0xffff) * (et0aat12 & 0xffff) 2328int32323232 z4x5x4t217 <<= 1 2329int32323232 z4x5x4t218 += (t4t0bbt12 & 0xffff) * (et0aat116 & 0xffff) 2330int32323232 z4x5x4t219 += (t4t0bbt14 & 0xffff) * (et0aat115 & 0xffff) 2331int32323232 z4x5x4t220 += (t4t0bbt12 & 0xffff) * (et0aat118 & 0xffff) 2332int32323232 z4x5x4t221 = (t4t0bbt12 & 0xffff) * (et0aat119 & 0xffff) 2333int32323232 z4x5x4t215 += (t4t0bbt113 & 0xffff) * (et0aat12 & 0xffff) 2334int32323232 z4x5x4t216 += (t4t0bbt115 & 0xffff) * (et0aat11 & 0xffff) 2335int32323232 z4x5x4t217 += (t4t0bbt10 & 0xffff) * (et0aat117 & 0xffff) 2336int32323232 z4x5x4t218 += (t4t0bbt14 & 0xffff) * (et0aat114 & 0xffff) 2337int32323232 z4x5x4t219 += (t4t0bbt15 & 0xffff) * (et0aat114 & 0xffff) 2338int32323232 z4x5x4t220 += (t4t0bbt13 & 0xffff) * (et0aat117 & 0xffff) 2339int32323232 z4x5x4t221 += (t4t0bbt13 & 0xffff) * (et0aat118 & 0xffff) 2340int32323232 z4x5x4t215 += (t4t0bbt114 & 0xffff) * (et0aat11 & 0xffff) 2341int32323232 z4x5x4t216 <<= 1 2342int32323232 z4x5x4t217 += (t4t0bbt11 & 0xffff) * (et0aat116 & 0xffff) 2343int32323232 z4x5x4t218 += (t4t0bbt15 & 0xffff) * (et0aat113 & 0xffff) 2344int32323232 z4x5x4t219 += (t4t0bbt16 & 0xffff) * (et0aat113 & 0xffff) 2345int32323232 z4x5x4t220 += (t4t0bbt15 & 0xffff) * (et0aat115 & 0xffff) 2346int32323232 z4x5x4t221 += (t4t0bbt16 & 0xffff) * (et0aat115 & 0xffff) 2347int32323232 z4x5x4t215 += (t4t0bbt115 & 0xffff) * (et0aat10 & 0xffff) 2348int32323232 z4x5x4t216 += (t4t0bbt10 & 0xffff) * (et0aat116 & 0xffff) 2349int32323232 z4x5x4t217 += (t4t0bbt14 & 0xffff) * (et0aat113 & 0xffff) 2350int32323232 z4x5x4t218 += (t4t0bbt16 & 0xffff) * (et0aat112 & 0xffff) 2351int32323232 z4x5x4t219 += (t4t0bbt17 & 0xffff) * (et0aat112 & 0xffff) 2352int32323232 z4x5x4t220 += (t4t0bbt16 & 0xffff) * (et0aat114 & 0xffff) 2353int32323232 z4x5x4t221 += (t4t0bbt17 & 0xffff) * (et0aat114 & 0xffff) 2354int32323232 z4x5x4t222 = (t4t0bbt13 & 0xffff) * (et0aat119 & 0xffff) 2355int32323232 z4x5x4t216 += (t4t0bbt14 & 0xffff) * (et0aat112 & 0xffff) 2356int32323232 z4x5x4t217 += (t4t0bbt15 & 0xffff) * (et0aat112 & 0xffff) 2357int32323232 z4x5x4t218 += (t4t0bbt18 & 0xffff) * (et0aat110 & 0xffff) 2358int32323232 z4x5x4t219 += (t4t0bbt18 & 0xffff) * (et0aat111 & 0xffff) 2359int32323232 z4x5x4t220 += (t4t0bbt17 & 0xffff) * (et0aat113 & 0xffff) 2360int32323232 z4x5x4t221 += (t4t0bbt110 & 0xffff) * (et0aat111 & 0xffff) 2361int32323232 z4x5x4t222 += (t4t0bbt17 & 0xffff) * (et0aat115 & 0xffff) 2362int32323232 z4x5x4t216 += (t4t0bbt18 & 0xffff) * (et0aat18 & 0xffff) 2363int32323232 z4x5x4t217 += (t4t0bbt18 & 0xffff) * (et0aat19 & 0xffff) 2364int32323232 z4x5x4t218 += (t4t0bbt19 & 0xffff) * (et0aat19 & 0xffff) 2365int32323232 z4x5x4t219 += (t4t0bbt19 & 0xffff) * (et0aat110 & 0xffff) 2366int32323232 z4x5x4t220 += (t4t0bbt19 & 0xffff) * (et0aat111 & 0xffff) 2367int32323232 z4x5x4t221 += (t4t0bbt111 & 0xffff) * (et0aat110 & 0xffff) 2368int32323232 z4x5x4t222 += (t4t0bbt111 & 0xffff) * (et0aat111 & 0xffff) 2369int32323232 z4x5x4t216 += (t4t0bbt112 & 0xffff) * (et0aat14 & 0xffff) 2370int32323232 z4x5x4t217 += (t4t0bbt19 & 0xffff) * (et0aat18 & 0xffff) 2371int32323232 z4x5x4t218 += (t4t0bbt110 & 0xffff) * (et0aat18 & 0xffff) 2372int32323232 z4x5x4t219 += (t4t0bbt110 & 0xffff) * (et0aat19 & 0xffff) 2373int32323232 z4x5x4t220 += (t4t0bbt110 & 0xffff) * (et0aat110 & 0xffff) 2374int32323232 z4x5x4t221 += (t4t0bbt114 & 0xffff) * (et0aat17 & 0xffff) 2375int32323232 z4x5x4t222 += (t4t0bbt115 & 0xffff) * (et0aat17 & 0xffff) 2376int32323232 z4x5x4t216 += (t4t0bbt116 & 0xffff) * (et0aat10 & 0xffff) 2377int32323232 z4x5x4t217 += (t4t0bbt112 & 0xffff) * (et0aat15 & 0xffff) 2378int32323232 z4x5x4t218 += (t4t0bbt112 & 0xffff) * (et0aat16 & 0xffff) 2379int32323232 z4x5x4t219 += (t4t0bbt111 & 0xffff) * (et0aat18 & 0xffff) 2380int32323232 z4x5x4t220 += (t4t0bbt111 & 0xffff) * (et0aat19 & 0xffff) 2381int32323232 z4x5x4t221 += (t4t0bbt115 & 0xffff) * (et0aat16 & 0xffff) 2382int32323232 z4x5x4t222 += (t4t0bbt119 & 0xffff) * (et0aat13 & 0xffff) 2383int32323232 z4x5x4t223 = (t4t0bbt14 & 0xffff) * (et0aat119 & 0xffff) 2384int32323232 z4x5x4t217 += (t4t0bbt113 & 0xffff) * (et0aat14 & 0xffff) 2385int32323232 z4x5x4t218 += (t4t0bbt113 & 0xffff) * (et0aat15 & 0xffff) 2386int32323232 z4x5x4t219 += (t4t0bbt112 & 0xffff) * (et0aat17 & 0xffff) 2387int32323232 z4x5x4t220 += (t4t0bbt113 & 0xffff) * (et0aat17 & 0xffff) 2388int32323232 z4x5x4t221 += (t4t0bbt118 & 0xffff) * (et0aat13 & 0xffff) 2389int32323232 z4x5x4t222 <<= 1 2390int32323232 z4x5x4t223 += (t4t0bbt15 & 0xffff) * (et0aat118 & 0xffff) 2391int32323232 z4x5x4t217 += (t4t0bbt116 & 0xffff) * (et0aat11 & 0xffff) 2392int32323232 z4x5x4t218 += (t4t0bbt114 & 0xffff) * (et0aat14 & 0xffff) 2393int32323232 z4x5x4t219 += (t4t0bbt113 & 0xffff) * (et0aat16 & 0xffff) 2394int32323232 z4x5x4t220 += (t4t0bbt114 & 0xffff) * (et0aat16 & 0xffff) 2395int32323232 z4x5x4t221 += (t4t0bbt119 & 0xffff) * (et0aat12 & 0xffff) 2396int32323232 z4x5x4t222 += (t4t0bbt14 & 0xffff) * (et0aat118 & 0xffff) 2397int32323232 z4x5x4t223 += (t4t0bbt16 & 0xffff) * (et0aat117 & 0xffff) 2398int32323232 z4x5x4t217 += (t4t0bbt117 & 0xffff) * (et0aat10 & 0xffff) 2399int32323232 z4x5x4t218 += (t4t0bbt116 & 0xffff) * (et0aat12 & 0xffff) 2400int32323232 z4x5x4t219 += (t4t0bbt114 & 0xffff) * (et0aat15 & 0xffff) 2401int32323232 z4x5x4t220 += (t4t0bbt115 & 0xffff) * (et0aat15 & 0xffff) 2402int32323232 z4x5x4t221 <<= 1 2403int32323232 z4x5x4t222 += (t4t0bbt15 & 0xffff) * (et0aat117 & 0xffff) 2404int32323232 z4x5x4t223 += (t4t0bbt17 & 0xffff) * (et0aat116 & 0xffff) 2405int32323232 z4x5x4t224 = (t4t0bbt15 & 0xffff) * (et0aat119 & 0xffff) 2406int32323232 z4x5x4t218 += (t4t0bbt117 & 0xffff) * (et0aat11 & 0xffff) 2407int32323232 z4x5x4t219 += (t4t0bbt115 & 0xffff) * (et0aat14 & 0xffff) 2408int32323232 z4x5x4t220 += (t4t0bbt117 & 0xffff) * (et0aat13 & 0xffff) 2409int32323232 z4x5x4t221 += (t4t0bbt14 & 0xffff) * (et0aat117 & 0xffff) 2410int32323232 z4x5x4t222 += (t4t0bbt16 & 0xffff) * (et0aat116 & 0xffff) 2411int32323232 z4x5x4t223 += (t4t0bbt18 & 0xffff) * (et0aat115 & 0xffff) 2412int32323232 z4x5x4t224 += (t4t0bbt16 & 0xffff) * (et0aat118 & 0xffff) 2413int32323232 z4x5x4t218 += (t4t0bbt118 & 0xffff) * (et0aat10 & 0xffff) 2414int32323232 z4x5x4t219 += (t4t0bbt116 & 0xffff) * (et0aat13 & 0xffff) 2415int32323232 z4x5x4t220 += (t4t0bbt118 & 0xffff) * (et0aat12 & 0xffff) 2416int32323232 z4x5x4t221 += (t4t0bbt15 & 0xffff) * (et0aat116 & 0xffff) 2417int32323232 z4x5x4t222 += (t4t0bbt18 & 0xffff) * (et0aat114 & 0xffff) 2418int32323232 z4x5x4t223 += (t4t0bbt19 & 0xffff) * (et0aat114 & 0xffff) 2419int32323232 z4x5x4t224 += (t4t0bbt17 & 0xffff) * (et0aat117 & 0xffff) 2420int32323232 z4x5x4t225 = (t4t0bbt16 & 0xffff) * (et0aat119 & 0xffff) 2421int32323232 z4x5x4t219 += (t4t0bbt117 & 0xffff) * (et0aat12 & 0xffff) 2422int32323232 z4x5x4t220 += (t4t0bbt119 & 0xffff) * (et0aat11 & 0xffff) 2423int32323232 z4x5x4t221 += (t4t0bbt18 & 0xffff) * (et0aat113 & 0xffff) 2424int32323232 z4x5x4t222 += (t4t0bbt19 & 0xffff) * (et0aat113 & 0xffff) 2425int32323232 z4x5x4t223 += (t4t0bbt110 & 0xffff) * (et0aat113 & 0xffff) 2426int32323232 z4x5x4t224 += (t4t0bbt19 & 0xffff) * (et0aat115 & 0xffff) 2427int32323232 z4x5x4t225 += (t4t0bbt17 & 0xffff) * (et0aat118 & 0xffff) 2428int32323232 z4x5x4t219 += (t4t0bbt118 & 0xffff) * (et0aat11 & 0xffff) 2429int32323232 z4x5x4t220 <<= 1 2430int32323232 z4x5x4t221 += (t4t0bbt19 & 0xffff) * (et0aat112 & 0xffff) 2431int32323232 z4x5x4t222 += (t4t0bbt110 & 0xffff) * (et0aat112 & 0xffff) 2432int32323232 z4x5x4t223 += (t4t0bbt111 & 0xffff) * (et0aat112 & 0xffff) 2433int32323232 z4x5x4t224 += (t4t0bbt110 & 0xffff) * (et0aat114 & 0xffff) 2434int32323232 z4x5x4t225 += (t4t0bbt110 & 0xffff) * (et0aat115 & 0xffff) 2435int32323232 z4x5x4t219 += (t4t0bbt119 & 0xffff) * (et0aat10 & 0xffff) 2436int32323232 z4x5x4t220 += (t4t0bbt14 & 0xffff) * (et0aat116 & 0xffff) 2437int32323232 z4x5x4t221 += (t4t0bbt112 & 0xffff) * (et0aat19 & 0xffff) 2438int32323232 z4x5x4t222 += (t4t0bbt112 & 0xffff) * (et0aat110 & 0xffff) 2439int32323232 z4x5x4t223 += (t4t0bbt112 & 0xffff) * (et0aat111 & 0xffff) 2440int32323232 z4x5x4t224 += (t4t0bbt111 & 0xffff) * (et0aat113 & 0xffff) 2441int32323232 z4x5x4t225 += (t4t0bbt111 & 0xffff) * (et0aat114 & 0xffff) 2442int32323232 z4x5x4t226 = (t4t0bbt17 & 0xffff) * (et0aat119 & 0xffff) 2443int32323232 z4x5x4t220 += (t4t0bbt18 & 0xffff) * (et0aat112 & 0xffff) 2444int32323232 z4x5x4t221 += (t4t0bbt113 & 0xffff) * (et0aat18 & 0xffff) 2445int32323232 z4x5x4t222 += (t4t0bbt113 & 0xffff) * (et0aat19 & 0xffff) 2446int32323232 z4x5x4t223 += (t4t0bbt113 & 0xffff) * (et0aat110 & 0xffff) 2447int32323232 z4x5x4t224 += (t4t0bbt113 & 0xffff) * (et0aat111 & 0xffff) 2448int32323232 z4x5x4t225 += (t4t0bbt114 & 0xffff) * (et0aat111 & 0xffff) 2449int32323232 z4x5x4t226 += (t4t0bbt111 & 0xffff) * (et0aat115 & 0xffff) 2450int32323232 z4x5x4t220 += (t4t0bbt112 & 0xffff) * (et0aat18 & 0xffff) 2451int32323232 z4x5x4t221 += (t4t0bbt116 & 0xffff) * (et0aat15 & 0xffff) 2452int32323232 z4x5x4t222 += (t4t0bbt114 & 0xffff) * (et0aat18 & 0xffff) 2453int32323232 z4x5x4t223 += (t4t0bbt114 & 0xffff) * (et0aat19 & 0xffff) 2454int32323232 z4x5x4t224 += (t4t0bbt114 & 0xffff) * (et0aat110 & 0xffff) 2455int32323232 z4x5x4t225 += (t4t0bbt115 & 0xffff) * (et0aat110 & 0xffff) 2456int32323232 z4x5x4t226 += (t4t0bbt115 & 0xffff) * (et0aat111 & 0xffff) 2457int32323232 z4x5x4t220 += (t4t0bbt116 & 0xffff) * (et0aat14 & 0xffff) 2458int32323232 z4x5x4t221 += (t4t0bbt117 & 0xffff) * (et0aat14 & 0xffff) 2459int32323232 z4x5x4t222 += (t4t0bbt116 & 0xffff) * (et0aat16 & 0xffff) 2460int32323232 z4x5x4t223 += (t4t0bbt115 & 0xffff) * (et0aat18 & 0xffff) 2461int32323232 z4x5x4t224 += (t4t0bbt115 & 0xffff) * (et0aat19 & 0xffff) 2462int32323232 z4x5x4t225 += (t4t0bbt118 & 0xffff) * (et0aat17 & 0xffff) 2463int32323232 z4x5x4t226 += (t4t0bbt119 & 0xffff) * (et0aat17 & 0xffff) 2464int32323232 z4x5x4t227 = (t4t0bbt18 & 0xffff) * (et0aat119 & 0xffff) 2465int32323232 z4x5x4t228 = (t4t0bbt19 & 0xffff) * (et0aat119 & 0xffff) 2466int32323232 z4x5x4t222 += (t4t0bbt117 & 0xffff) * (et0aat15 & 0xffff) 2467int32323232 z4x5x4t223 += (t4t0bbt116 & 0xffff) * (et0aat17 & 0xffff) 2468int32323232 z4x5x4t224 += (t4t0bbt117 & 0xffff) * (et0aat17 & 0xffff) 2469int32323232 z4x5x4t225 += (t4t0bbt119 & 0xffff) * (et0aat16 & 0xffff) 2470int32323232 z4x5x4t226 <<= 1 2471int32323232 z4x5x4t227 += (t4t0bbt19 & 0xffff) * (et0aat118 & 0xffff) 2472int32323232 z4x5x4t228 += (t4t0bbt110 & 0xffff) * (et0aat118 & 0xffff) 2473int32323232 z4x5x4t222 += (t4t0bbt118 & 0xffff) * (et0aat14 & 0xffff) 2474int32323232 z4x5x4t223 += (t4t0bbt117 & 0xffff) * (et0aat16 & 0xffff) 2475int32323232 z4x5x4t224 += (t4t0bbt118 & 0xffff) * (et0aat16 & 0xffff) 2476int32323232 z4x5x4t225 <<= 1 2477int32323232 z4x5x4t226 += (t4t0bbt18 & 0xffff) * (et0aat118 & 0xffff) 2478int32323232 z4x5x4t227 += (t4t0bbt110 & 0xffff) * (et0aat117 & 0xffff) 2479int32323232 z4x5x4t228 += (t4t0bbt111 & 0xffff) * (et0aat117 & 0xffff) 2480int32323232 z4x5x4t229 = (t4t0bbt110 & 0xffff) * (et0aat119 & 0xffff) 2481int32323232 z4x5x4t223 += (t4t0bbt118 & 0xffff) * (et0aat15 & 0xffff) 2482int32323232 z4x5x4t224 += (t4t0bbt119 & 0xffff) * (et0aat15 & 0xffff) 2483int32323232 z4x5x4t225 += (t4t0bbt18 & 0xffff) * (et0aat117 & 0xffff) 2484int32323232 z4x5x4t226 += (t4t0bbt19 & 0xffff) * (et0aat117 & 0xffff) 2485int32323232 z4x5x4t227 += (t4t0bbt111 & 0xffff) * (et0aat116 & 0xffff) 2486int32323232 z4x5x4t228 += (t4t0bbt113 & 0xffff) * (et0aat115 & 0xffff) 2487int32323232 z4x5x4t229 += (t4t0bbt111 & 0xffff) * (et0aat118 & 0xffff) 2488int32323232 z4x5x4t223 += (t4t0bbt119 & 0xffff) * (et0aat14 & 0xffff) 2489int32323232 z4x5x4t224 <<= 1 2490int32323232 z4x5x4t225 += (t4t0bbt19 & 0xffff) * (et0aat116 & 0xffff) 2491int32323232 z4x5x4t226 += (t4t0bbt110 & 0xffff) * (et0aat116 & 0xffff) 2492int32323232 z4x5x4t227 += (t4t0bbt112 & 0xffff) * (et0aat115 & 0xffff) 2493int32323232 z4x5x4t228 += (t4t0bbt114 & 0xffff) * (et0aat114 & 0xffff) 2494int32323232 z4x5x4t229 += (t4t0bbt114 & 0xffff) * (et0aat115 & 0xffff) 2495int32323232 z4x5x4t230 = (t4t0bbt111 & 0xffff) * (et0aat119 & 0xffff) 2496int32323232 z4x5x4t224 += (t4t0bbt18 & 0xffff) * (et0aat116 & 0xffff) 2497int32323232 z4x5x4t225 += (t4t0bbt112 & 0xffff) * (et0aat113 & 0xffff) 2498int32323232 z4x5x4t226 += (t4t0bbt112 & 0xffff) * (et0aat114 & 0xffff) 2499int32323232 z4x5x4t227 += (t4t0bbt113 & 0xffff) * (et0aat114 & 0xffff) 2500int32323232 z4x5x4t228 += (t4t0bbt115 & 0xffff) * (et0aat113 & 0xffff) 2501int32323232 z4x5x4t229 += (t4t0bbt115 & 0xffff) * (et0aat114 & 0xffff) 2502int32323232 z4x5x4t230 += (t4t0bbt115 & 0xffff) * (et0aat115 & 0xffff) 2503int32323232 z4x5x4t224 += (t4t0bbt112 & 0xffff) * (et0aat112 & 0xffff) 2504int32323232 z4x5x4t225 += (t4t0bbt113 & 0xffff) * (et0aat112 & 0xffff) 2505int32323232 z4x5x4t226 += (t4t0bbt113 & 0xffff) * (et0aat113 & 0xffff) 2506int32323232 z4x5x4t227 += (t4t0bbt114 & 0xffff) * (et0aat113 & 0xffff) 2507int32323232 z4x5x4t228 += (t4t0bbt117 & 0xffff) * (et0aat111 & 0xffff) 2508int32323232 z4x5x4t229 += (t4t0bbt118 & 0xffff) * (et0aat111 & 0xffff) 2509int32323232 z4x5x4t230 += (t4t0bbt119 & 0xffff) * (et0aat111 & 0xffff) 2510int32323232 z4x5x4t224 += (t4t0bbt116 & 0xffff) * (et0aat18 & 0xffff) 2511int32323232 z4x5x4t225 += (t4t0bbt116 & 0xffff) * (et0aat19 & 0xffff) 2512int32323232 z4x5x4t226 += (t4t0bbt114 & 0xffff) * (et0aat112 & 0xffff) 2513int32323232 z4x5x4t227 += (t4t0bbt115 & 0xffff) * (et0aat112 & 0xffff) 2514int32323232 z4x5x4t228 += (t4t0bbt118 & 0xffff) * (et0aat110 & 0xffff) 2515int32323232 z4x5x4t229 += (t4t0bbt119 & 0xffff) * (et0aat110 & 0xffff) 2516int32323232 z4x5x4t230 <<= 1 2517int32323232 z4x5x4t231 = (t4t0bbt112 & 0xffff) * (et0aat119 & 0xffff) 2518int32323232 z4x5x4t225 += (t4t0bbt117 & 0xffff) * (et0aat18 & 0xffff) 2519int32323232 z4x5x4t226 += (t4t0bbt116 & 0xffff) * (et0aat110 & 0xffff) 2520int32323232 z4x5x4t227 += (t4t0bbt116 & 0xffff) * (et0aat111 & 0xffff) 2521int32323232 z4x5x4t228 += (t4t0bbt119 & 0xffff) * (et0aat19 & 0xffff) 2522int32323232 z4x5x4t229 <<= 1 2523int32323232 z4x5x4t230 += (t4t0bbt112 & 0xffff) * (et0aat118 & 0xffff) 2524int32323232 z4x5x4t231 += (t4t0bbt113 & 0xffff) * (et0aat118 & 0xffff) 2525int32323232 z4x5x4t232 = (t4t0bbt113 & 0xffff) * (et0aat119 & 0xffff) 2526int32323232 z4x5x4t226 += (t4t0bbt117 & 0xffff) * (et0aat19 & 0xffff) 2527int32323232 z4x5x4t227 += (t4t0bbt117 & 0xffff) * (et0aat110 & 0xffff) 2528int32323232 z4x5x4t228 <<= 1 2529int32323232 z4x5x4t229 += (t4t0bbt112 & 0xffff) * (et0aat117 & 0xffff) 2530int32323232 z4x5x4t230 += (t4t0bbt113 & 0xffff) * (et0aat117 & 0xffff) 2531int32323232 z4x5x4t231 += (t4t0bbt114 & 0xffff) * (et0aat117 & 0xffff) 2532int32323232 z4x5x4t232 += (t4t0bbt114 & 0xffff) * (et0aat118 & 0xffff) 2533int32323232 z4x5x4t226 += (t4t0bbt118 & 0xffff) * (et0aat18 & 0xffff) 2534int32323232 z4x5x4t227 += (t4t0bbt118 & 0xffff) * (et0aat19 & 0xffff) 2535int32323232 z4x5x4t228 += (t4t0bbt112 & 0xffff) * (et0aat116 & 0xffff) 2536int32323232 z4x5x4t229 += (t4t0bbt113 & 0xffff) * (et0aat116 & 0xffff) 2537int32323232 z4x5x4t230 += (t4t0bbt114 & 0xffff) * (et0aat116 & 0xffff) 2538int32323232 z4x5x4t231 += (t4t0bbt115 & 0xffff) * (et0aat116 & 0xffff) 2539int32323232 z4x5x4t232 += (t4t0bbt115 & 0xffff) * (et0aat117 & 0xffff) 2540int32323232 z4x5x4t233 = (t4t0bbt114 & 0xffff) * (et0aat119 & 0xffff) 2541int32323232 z4x5x4t227 += (t4t0bbt119 & 0xffff) * (et0aat18 & 0xffff) 2542int32323232 z4x5x4t228 += (t4t0bbt116 & 0xffff) * (et0aat112 & 0xffff) 2543int32323232 z4x5x4t229 += (t4t0bbt116 & 0xffff) * (et0aat113 & 0xffff) 2544int32323232 z4x5x4t230 += (t4t0bbt116 & 0xffff) * (et0aat114 & 0xffff) 2545int32323232 z4x5x4t231 += (t4t0bbt116 & 0xffff) * (et0aat115 & 0xffff) 2546int32323232 z4x5x4t232 += (t4t0bbt117 & 0xffff) * (et0aat115 & 0xffff) 2547int32323232 z4x5x4t233 += (t4t0bbt115 & 0xffff) * (et0aat118 & 0xffff) 2548int32323232 z4x5x4t234 = (t4t0bbt115 & 0xffff) * (et0aat119 & 0xffff) 2549int32323232 z4x5x4t235 = (t4t0bbt116 & 0xffff) * (et0aat119 & 0xffff) 2550int32323232 z4x5x4t229 += (t4t0bbt117 & 0xffff) * (et0aat112 & 0xffff) 2551int32323232 z4x5x4t230 += (t4t0bbt117 & 0xffff) * (et0aat113 & 0xffff) 2552int32323232 z4x5x4t231 += (t4t0bbt117 & 0xffff) * (et0aat114 & 0xffff) 2553int32323232 z4x5x4t232 += (t4t0bbt118 & 0xffff) * (et0aat114 & 0xffff) 2554int32323232 z4x5x4t233 += (t4t0bbt118 & 0xffff) * (et0aat115 & 0xffff) 2555int32323232 z4x5x4t234 += (t4t0bbt119 & 0xffff) * (et0aat115 & 0xffff) 2556int32323232 z4x5x4t235 += (t4t0bbt117 & 0xffff) * (et0aat118 & 0xffff) 2557int32323232 z4x5x4t236 = (t4t0bbt117 & 0xffff) * (et0aat119 & 0xffff) 2558int32323232 z4x5x4t230 += (t4t0bbt118 & 0xffff) * (et0aat112 & 0xffff) 2559int32323232 z4x5x4t231 += (t4t0bbt118 & 0xffff) * (et0aat113 & 0xffff) 2560int32323232 z4x5x4t232 += (t4t0bbt119 & 0xffff) * (et0aat113 & 0xffff) 2561int32323232 z4x5x4t233 += (t4t0bbt119 & 0xffff) * (et0aat114 & 0xffff) 2562int32323232 z4x5x4t234 <<= 1 2563int32323232 z4x5x4t235 += (t4t0bbt118 & 0xffff) * (et0aat117 & 0xffff) 2564int32323232 z4x5x4t236 += (t4t0bbt118 & 0xffff) * (et0aat118 & 0xffff) 2565int32323232 z4x5x4t237 = (t4t0bbt118 & 0xffff) * (et0aat119 & 0xffff) 2566int32323232 z4x5x4t231 += (t4t0bbt119 & 0xffff) * (et0aat112 & 0xffff) 2567int32323232 z4x5x4t232 <<= 1 2568int32323232 z4x5x4t233 <<= 1 2569int32323232 z4x5x4t234 += (t4t0bbt116 & 0xffff) * (et0aat118 & 0xffff) 2570int32323232 z4x5x4t235 += (t4t0bbt119 & 0xffff) * (et0aat116 & 0xffff) 2571int32323232 z4x5x4t236 += (t4t0bbt119 & 0xffff) * (et0aat117 & 0xffff) 2572int32323232 z4x5x4t237 += (t4t0bbt119 & 0xffff) * (et0aat118 & 0xffff) 2573int32323232 z4x5x4t238 = (t4t0bbt119 & 0xffff) * (et0aat119 & 0xffff) 2574int32323232 z4x5x4t232 += (t4t0bbt116 & 0xffff) * (et0aat116 & 0xffff) 2575int32323232 z4x5x4t233 += (t4t0bbt116 & 0xffff) * (et0aat117 & 0xffff) 2576int32323232 z4x5x4t234 += (t4t0bbt117 & 0xffff) * (et0aat117 & 0xffff) 2577int32323232 z4x5x4t236 <<= 1 2578int32323232 z4x5x4t237 <<= 1 2579int32323232 z4x5x4t238 <<= 1 2580int32323232 z4x5x4t233 += (t4t0bbt117 & 0xffff) * (et0aat116 & 0xffff) 2581int32323232 z4x5x4t234 += (t4t0bbt118 & 0xffff) * (et0aat116 & 0xffff) 2582 2583 2584## Reduction 2585 2586uint32323232 carry0 = z4x5x4t220 >> 13 2587uint32323232 carry1 = z4x5x4t224 >> 13 2588uint32323232 carry2 = z4x5x4t228 >> 13 2589uint32323232 carry3 = z4x5x4t232 >> 13 2590 2591int32323232 z4x5x4t221 += carry0 2592z4x5x4t220 &= mask13 2593int32323232 z4x5x4t225 += carry1 2594z4x5x4t224 &= mask13 2595int32323232 z4x5x4t229 += carry2 2596z4x5x4t228 &= mask13 2597int32323232 z4x5x4t233 += carry3 2598z4x5x4t232 &= mask13 2599 2600uint32323232 carry0 = z4x5x4t221 >> 13 2601uint32323232 carry1 = z4x5x4t225 >> 13 2602uint32323232 carry2 = z4x5x4t229 >> 13 2603uint32323232 carry3 = z4x5x4t233 >> 13 2604 2605int32323232 z4x5x4t222 += carry0 2606z4x5x4t221 &= mask13 2607int32323232 z4x5x4t226 += carry1 2608z4x5x4t225 &= mask13 2609int32323232 z4x5x4t230 += carry2 2610z4x5x4t229 &= mask13 2611int32323232 z4x5x4t234 += carry3 2612z4x5x4t233 &= mask13 2613 2614uint32323232 carry0 = z4x5x4t222 >> 13 2615uint32323232 carry1 = z4x5x4t226 >> 13 2616uint32323232 carry2 = z4x5x4t230 >> 13 2617uint32323232 carry3 = z4x5x4t234 >> 13 2618 2619int32323232 z4x5x4t223 += carry0 2620z4x5x4t222 &= mask13 2621int32323232 z4x5x4t227 += carry1 2622z4x5x4t226 &= mask13 2623int32323232 z4x5x4t231 += carry2 2624z4x5x4t230 &= mask13 2625int32323232 z4x5x4t235 += carry3 2626z4x5x4t234 &= mask13 2627 2628uint32323232 carry0 = z4x5x4t223 >> 12 2629uint32323232 carry1 = z4x5x4t227 >> 12 2630uint32323232 carry2 = z4x5x4t231 >> 12 2631uint32323232 carry3 = z4x5x4t235 >> 12 2632 2633int32323232 z4x5x4t224 += carry0 2634z4x5x4t223 &= mask12 2635int32323232 z4x5x4t228 += carry1 2636z4x5x4t227 &= mask12 2637int32323232 z4x5x4t232 += carry2 2638z4x5x4t231 &= mask12 2639int32323232 z4x5x4t236 += carry3 2640z4x5x4t235 &= mask12 2641 2642uint32323232 carry1 = z4x5x4t224 >> 13 2643uint32323232 carry2 = z4x5x4t228 >> 13 2644uint32323232 carry3 = z4x5x4t232 >> 13 2645uint32323232 carry4 = z4x5x4t236 >> 13 2646 2647int32323232 z4x5x4t225 += carry1 2648z4x5x4t224 &= mask13 2649int32323232 z4x5x4t229 += carry2 2650z4x5x4t228 &= mask13 2651int32323232 z4x5x4t233 += carry3 2652z4x5x4t232 &= mask13 2653int32323232 z4x5x4t237 += carry4 2654z4x5x4t236 &= mask13 2655 2656uint32323232 carry1 = z4x5x4t225 >> 13 2657uint32323232 carry2 = z4x5x4t229 >> 13 2658uint32323232 carry3 = z4x5x4t233 >> 13 2659uint32323232 carry4 = z4x5x4t237 >> 13 2660 2661int32323232 z4x5x4t226 += carry1 2662z4x5x4t225 &= mask13 2663int32323232 z4x5x4t230 += carry2 2664z4x5x4t229 &= mask13 2665int32323232 z4x5x4t234 += carry3 2666z4x5x4t233 &= mask13 2667int32323232 z4x5x4t238 += carry4 2668z4x5x4t237 &= mask13 2669 2670uint32323232 carry1 = z4x5x4t226 >> 13 2671uint32323232 carry2 = z4x5x4t230 >> 13 2672uint32323232 carry3 = z4x5x4t234 >> 13 2673uint32323232 z4x5x4t239 = z4x5x4t238 >> 13 2674 2675int32323232 z4x5x4t227 += carry1 2676z4x5x4t226 &= mask13 2677int32323232 z4x5x4t231 += carry2 2678z4x5x4t230 &= mask13 2679int32323232 z4x5x4t235 += carry3 2680z4x5x4t234 &= mask13 2681 2682 2683uint32323232 carry1 = z4x5x4t227 >> 12 2684z4x5x4t238 &= mask13 2685uint32323232 carry2 = z4x5x4t231 >> 12 2686uint32323232 carry3 = z4x5x4t235 >> 12 2687 2688int32323232 z4x5x4t228 += carry1 2689z4x5x4t227 &= mask12 2690int32323232 z4x5x4t232 += carry2 2691z4x5x4t231 &= mask12 2692int32323232 z4x5x4t236 += carry3 2693z4x5x4t235 &= mask12 2694 2695int32323232 z4x5x4t20 += (z4x5x4t220 & 0xffff) * (vec19 & 0xffff) 2696int32323232 z4x5x4t21 += (z4x5x4t221 & 0xffff) * (vec19 & 0xffff) 2697int32323232 z4x5x4t22 += (z4x5x4t222 & 0xffff) * (vec19 & 0xffff) 2698int32323232 z4x5x4t23 += (z4x5x4t223 & 0xffff) * (vec19 & 0xffff) 2699int32323232 z4x5x4t24 += (z4x5x4t224 & 0xffff) * (vec19 & 0xffff) 2700int32323232 z4x5x4t25 += (z4x5x4t225 & 0xffff) * (vec19 & 0xffff) 2701int32323232 z4x5x4t26 += (z4x5x4t226 & 0xffff) * (vec19 & 0xffff) 2702int32323232 z4x5x4t27 += (z4x5x4t227 & 0xffff) * (vec19 & 0xffff) 2703int32323232 z4x5x4t28 += (z4x5x4t228 & 0xffff) * (vec19 & 0xffff) 2704int32323232 z4x5x4t29 += (z4x5x4t229 & 0xffff) * (vec19 & 0xffff) 2705int32323232 z4x5x4t210 += (z4x5x4t230 & 0xffff) * (vec19 & 0xffff) 2706int32323232 z4x5x4t211 += (z4x5x4t231 & 0xffff) * (vec19 & 0xffff) 2707int32323232 z4x5x4t212 += (z4x5x4t232 & 0xffff) * (vec19 & 0xffff) 2708int32323232 z4x5x4t213 += (z4x5x4t233 & 0xffff) * (vec19 & 0xffff) 2709int32323232 z4x5x4t214 += (z4x5x4t234 & 0xffff) * (vec19 & 0xffff) 2710int32323232 z4x5x4t215 += (z4x5x4t235 & 0xffff) * (vec19 & 0xffff) 2711int32323232 z4x5x4t216 += (z4x5x4t236 & 0xffff) * (vec19 & 0xffff) 2712int32323232 z4x5x4t217 += (z4x5x4t237 & 0xffff) * (vec19 & 0xffff) 2713int32323232 z4x5x4t218 += (z4x5x4t238 & 0xffff) * (vec19 & 0xffff) 2714int32323232 z4x5x4t219 += (z4x5x4t239 & 0xffff) * (vec19 & 0xffff) 2715 2716uint32323232 carry = z4x5x4t216 >> 13 2717int32323232 z4x5x4t217 += carry 2718uint32323232 carry = z4x5x4t217 >> 13 2719int32323232 z4x5x4t218 += carry 2720uint32323232 carry = z4x5x4t218 >> 13 2721int32323232 z4x5x4t219 += carry 2722uint32323232 carry = z4x5x4t219 >> 12 2723#Multiply carry by 19 2724int32323232 red = carry << 4 2725int32323232 red += carry 2726int32323232 red += carry 2727int32323232 red += carry 2728 2729int32323232 z4x5x4t20 += red 2730z4x5x4t219 &= mask12 2731 2732z4x5x4t216 &= mask13 2733z4x5x4t217 &= mask13 2734z4x5x4t218 &= mask13 2735z4x5x4t219 &= mask12 2736 2737uint32323232 carry0 = z4x5x4t20 >> 13 2738uint32323232 carry1 = z4x5x4t24 >> 13 2739uint32323232 carry2 = z4x5x4t28 >> 13 2740uint32323232 carry3 = z4x5x4t212 >> 13 2741 2742int32323232 z4x5x4t21 += carry0 2743z4x5x4t20 &= mask13 2744int32323232 z4x5x4t25 += carry1 2745z4x5x4t24 &= mask13 2746int32323232 z4x5x4t29 += carry2 2747z4x5x4t28 &= mask13 2748int32323232 z4x5x4t213 += carry3 2749z4x5x4t212 &= mask13 2750 2751uint32323232 carry0 = z4x5x4t21 >> 13 2752uint32323232 carry1 = z4x5x4t25 >> 13 2753uint32323232 carry2 = z4x5x4t29 >> 13 2754uint32323232 carry3 = z4x5x4t213 >> 13 2755 2756int32323232 z4x5x4t22 += carry0 2757z4x5x4t21 &= mask13 2758int32323232 z4x5x4t26 += carry1 2759z4x5x4t25 &= mask13 2760int32323232 z4x5x4t210 += carry2 2761z4x5x4t29 &= mask13 2762int32323232 z4x5x4t214 += carry3 2763z4x5x4t213 &= mask13 2764 2765uint32323232 carry0 = z4x5x4t22 >> 13 2766uint32323232 carry1 = z4x5x4t26 >> 13 2767uint32323232 carry2 = z4x5x4t210 >> 13 2768uint32323232 carry3 = z4x5x4t214 >> 13 2769 2770int32323232 z4x5x4t23 += carry0 2771z4x5x4t22 &= mask13 2772int32323232 z4x5x4t27 += carry1 2773z4x5x4t26 &= mask13 2774int32323232 z4x5x4t211 += carry2 2775z4x5x4t210 &= mask13 2776int32323232 z4x5x4t215 += carry3 2777z4x5x4t214 &= mask13 2778 2779uint32323232 carry0 = z4x5x4t23 >> 12 2780uint32323232 carry1 = z4x5x4t27 >> 12 2781uint32323232 carry2 = z4x5x4t211 >> 12 2782uint32323232 carry3 = z4x5x4t215 >> 12 2783 2784z4x5x4t23 &= mask12 2785z4x5x4t27 &= mask12 2786z4x5x4t211 &= mask12 2787t20 = select bytes from z4x5x4t20 by selw3333 2788z4x5x4t215 &= mask12 2789t21 = select bytes from z4x5x4t21 by selw3333 2790 2791int32323232 z4x5x4t24 += carry0 2792t22 = select bytes from z4x5x4t22 by selw3333 2793int32323232 z4x5x4t28 += carry1 2794t23 = select bytes from z4x5x4t23 by selw3333 2795int32323232 z4x5x4t212 += carry2 2796int32323232 z4x5x4t216 += carry3 2797 2798uint32323232 carry1 = z4x5x4t24 >> 13 2799uint32323232 carry2 = z4x5x4t28 >> 13 2800uint32323232 carry3 = z4x5x4t212 >> 13 2801uint32323232 carry4 = z4x5x4t216 >> 13 2802 2803z4x5x4t24 &= mask13 2804z4x5x4t28 &= mask13 2805z4x5x4t212 &= mask13 2806t24 = select bytes from z4x5x4t24 by selw3333 2807z4x5x4t216 &= mask13 2808t28 = select bytes from z4x5x4t28 by selw3333 2809 2810int32323232 z4x5x4t25 += carry1 2811t212 = select bytes from z4x5x4t212 by selw3333 2812int32323232 z4x5x4t29 += carry2 2813t216 = select bytes from z4x5x4t216 by selw3333 2814int32323232 z4x5x4t213 += carry3 2815int32323232 z4x5x4t217 += carry4 2816 2817uint32323232 carry1 = z4x5x4t25 >> 13 2818uint32323232 carry2 = z4x5x4t29 >> 13 2819uint32323232 carry3 = z4x5x4t213 >> 13 2820uint32323232 carry4 = z4x5x4t217 >> 13 2821 2822z4x5x4t25 &= mask13 2823z4x5x4t29 &= mask13 2824z4x5x4t213 &= mask13 2825t25 = select bytes from z4x5x4t25 by selw3333 2826z4x5x4t217 &= mask13 2827t29 = select bytes from z4x5x4t29 by selw3333 2828 2829int32323232 z4x5x4t26 += carry1 2830t213 = select bytes from z4x5x4t213 by selw3333 2831int32323232 z4x5x4t210 += carry2 2832t217 = select bytes from z4x5x4t217 by selw3333 2833int32323232 z4x5x4t214 += carry3 2834int32323232 z4x5x4t218 += carry4 2835 2836uint32323232 carry1 = z4x5x4t26 >> 13 2837uint32323232 carry2 = z4x5x4t210 >> 13 2838uint32323232 carry3 = z4x5x4t214 >> 13 2839uint32323232 carry4 = z4x5x4t218 >> 13 2840 2841z4x5x4t26 &= mask13 2842z4x5x4t210 &= mask13 2843z4x5x4t214 &= mask13 2844t26 = select bytes from z4x5x4t26 by selw3333 2845z4x5x4t218 &= mask13 2846t210 = select bytes from z4x5x4t210 by selw3333 2847 2848int32323232 z4x5x4t27 += carry1 2849t214 = select bytes from z4x5x4t214 by selw3333 2850int32323232 z4x5x4t211 += carry2 2851t218 = select bytes from z4x5x4t218 by selw3333 2852int32323232 z4x5x4t215 += carry3 2853int32323232 z4x5x4t219 += carry4 2854 2855 2856############################################################### 2857# Multiplications by t20..t23 ### 2858############################################################### 2859 2860int32323232 tmp10 = (t21 & 0xffff) * (x1_03 & 0xffff) 2861t27 = select bytes from z4x5x4t27 by selw3333 2862int32323232 tmp11 = (t21 & 0xffff) * (x1_47 & 0xffff) 2863t211 = select bytes from z4x5x4t211 by selw3333 2864int32323232 tmp12 = (t21 & 0xffff) * (x1_811 & 0xffff) 2865t215 = select bytes from z4x5x4t215 by selw3333 2866int32323232 tmp13 = (t21 & 0xffff) * (x1_1215 & 0xffff) 2867t219 = select bytes from z4x5x4t219 by selw3333 2868int32323232 tmp14 = (t21 & 0xffff) * (x1_1619 & 0xffff) 2869x2_03 = combine z4x5x4t20 and z4x5x4t21 by shuf2_01 2870 2871 2872int32323232 z3_03 = (t20 & 0xffff) * (x1_03 & 0xffff) 2873x2_47 = combine z4x5x4t24 and z4x5x4t25 by shuf2_01 2874int32323232 z3_47 = (t20 & 0xffff) * (x1_47 & 0xffff) 2875x2_811 = combine z4x5x4t28 and z4x5x4t29 by shuf2_01 2876int32323232 z3_811 = (t20 & 0xffff) * (x1_811 & 0xffff) 2877x2_1215 = combine z4x5x4t212 and z4x5x4t213 by shuf2_01 2878int32323232 z3_1215 = (t20 & 0xffff) * (x1_1215 & 0xffff) 2879x2_1619 = combine z4x5x4t216 and z4x5x4t217 by shuf2_01 2880int32323232 z3_1619 = (t20 & 0xffff) * (x1_1619 & 0xffff) 2881 2882tmp10b = tmp10 >> (8 * 4) 2883int32323232 tmp10a = tmp10 << 1 2884x2_03 = combine x2_03 and z4x5x4t22 by shuf2_2 2885int32323232 tmp11a = tmp11 << 1 2886x2_47 = combine x2_47 and z4x5x4t26 by shuf2_2 2887int32323232 tmp12a = tmp12 << 1 2888x2_811 = combine x2_811 and z4x5x4t210 by shuf2_2 2889int32323232 tmp13a = tmp13 << 1 2890tmp14a = tmp14 << (8 * 12) 2891int32323232 tmp20 = (t22 & 0xffff) * (x1_03 & 0xffff) 2892tmp11b = combine tmp10a and tmp11 by comb13 2893int32323232 tmp21 = (t22 & 0xffff) * (x1_47 & 0xffff) 2894tmp12b = combine tmp11a and tmp12 by comb13 2895int32323232 tmp22 = (t22 & 0xffff) * (x1_811 & 0xffff) 2896tmp13b = combine tmp12a and tmp13 by comb13 2897int32323232 tmp23 = (t22 & 0xffff) * (x1_1215 & 0xffff) 2898tmp14b = combine tmp13a and tmp14 by comb13 2899int32323232 tmp24 = (t22 & 0xffff) * (x1_1619 & 0xffff) 2900x2_1215 = combine x2_1215 and z4x5x4t214 by shuf2_2 2901int32323232 z3_03 += tmp10b 2902x2_1619 = combine x2_1619 and z4x5x4t218 by shuf2_2 2903int32323232 z3_47 += tmp11b 2904x2_03 = combine x2_03 and z4x5x4t23 by shuf2_3 2905int32323232 z3_811 += tmp12b 2906x2_47 = combine x2_47 and z4x5x4t27 by shuf2_3 2907int32323232 z3_1215 += tmp13b 2908x2_811 = combine x2_811 and z4x5x4t211 by shuf2_3 2909int32323232 z3_1619 += tmp14b 2910x2_1215 = combine x2_1215 and z4x5x4t215 by shuf2_3 2911int32323232 z3_2023 = tmp14a << 1 2912 2913tmp20b = tmp20 >> (8 * 8) 2914int32323232 tmp20a = tmp20 << 1 2915x2_1619 = combine x2_1619 and z4x5x4t219 by shuf2_3 2916int32323232 tmp21a = tmp21 << 1 2917z2_03 = combine z4x5x4t20 and z4x5x4t21 by shuf0_01 2918int32323232 tmp22a = tmp22 << 1 2919tmp24a = tmp24 << (8 * 8) 2920int32323232 tmp23a = tmp23 << 1 2921z2_47 = combine z4x5x4t24 and z4x5x4t25 by shuf0_01 2922int32323232 tmp30 = (t23 & 0xffff) * (x1_03 & 0xffff) 2923z2_811 = combine z4x5x4t28 and z4x5x4t29 by shuf0_01 2924int32323232 tmp31 = (t23 & 0xffff) * (x1_47 & 0xffff) 2925tmp21b = combine tmp20a and tmp21 by comb22 2926int32323232 tmp32 = (t23 & 0xffff) * (x1_811 & 0xffff) 2927tmp22b = combine tmp21a and tmp22 by comb22 2928int32323232 tmp33 = (t23 & 0xffff) * (x1_1215 & 0xffff) 2929tmp23b = combine tmp22a and tmp23 by comb22 2930int32323232 tmp34 = (t23 & 0xffff) * (x1_1619 & 0xffff) 2931tmp24b = combine tmp23a and tmp24 by comb22 2932int32323232 tmp24a <<= 1 2933z2_1215 = combine z4x5x4t212 and z4x5x4t213 by shuf0_01 2934int32323232 z3_03 += tmp20b 2935z2_1619 = combine z4x5x4t216 and z4x5x4t217 by shuf0_01 2936int32323232 z3_47 += tmp21b 2937z2_03 = combine z2_03 and z4x5x4t22 by shuf0_2 2938int32323232 z3_811 += tmp22b 2939z2_47 = combine z2_47 and z4x5x4t26 by shuf0_2 2940int32323232 z3_1215 += tmp23b 2941z2_811 = combine z2_811 and z4x5x4t210 by shuf0_2 2942int32323232 z3_1619 += tmp24b 2943z2_1215 = combine z2_1215 and z4x5x4t214 by shuf0_2 2944int32323232 z3_2023 += tmp24a 2945 2946tmp30b = tmp30 >> (8 * 12) 2947int32323232 tmp30a = tmp30 << 1 2948z2_1619 = combine z2_1619 and z4x5x4t218 by shuf0_2 2949int32323232 tmp31a = tmp31 << 1 2950z2_03 = combine z2_03 and z4x5x4t23 by shuf0_3 2951int32323232 tmp32a = tmp32 << 1 2952tmp34a = tmp34 << (8 * 4) 2953int32323232 tmp33a = tmp33 << 1 2954z2_47 = combine z2_47 and z4x5x4t27 by shuf0_3 2955int32323232 tmp00 = (t24 & 0xffff) * (x1_03 & 0xffff) 2956z2_811 = combine z2_811 and z4x5x4t211 by shuf0_3 2957int32323232 tmp01 = (t24 & 0xffff) * (x1_47 & 0xffff) 2958tmp31b = combine tmp30a and tmp31 by comb31 2959int32323232 tmp02 = (t24 & 0xffff) * (x1_811 & 0xffff) 2960tmp32b = combine tmp31a and tmp32 by comb31 2961int32323232 tmp03 = (t24 & 0xffff) * (x1_1215 & 0xffff) 2962tmp33b = combine tmp32a and tmp33 by comb31 2963int32323232 tmp04 = (t24 & 0xffff) * (x1_1619 & 0xffff) 2964tmp34b = combine tmp33a and tmp34 by comb31 2965int32323232 tmp34a <<= 1 2966z2_1215 = combine z2_1215 and z4x5x4t215 by shuf0_3 2967int32323232 z3_03 += tmp30b 2968z2_1619 = combine z2_1619 and z4x5x4t219 by shuf0_3 2969int32323232 z3_47 += tmp31b 2970x3_03 = combine z4x5x4t20 and z4x5x4t21 by shuf1_01 2971 2972int32323232 z3_811 += tmp32b 2973x3_47 = combine z4x5x4t24 and z4x5x4t25 by shuf1_01 2974int32323232 z3_1215 += tmp33b 2975x3_811 = combine z4x5x4t28 and z4x5x4t29 by shuf1_01 2976int32323232 z3_1619 += tmp34b 2977x3_1215 = combine z4x5x4t212 and z4x5x4t213 by shuf1_01 2978int32323232 z3_2023 += tmp34a 2979x3_1619 = combine z4x5x4t216 and z4x5x4t217 by shuf1_01 2980 2981################################################################# 2982### Multiplications by t24..t27 ### 2983################################################################# 2984 2985int32323232 tmp10 = (t25 & 0xffff) * (x1_03 & 0xffff) 2986x3_03 = combine x3_03 and z4x5x4t22 by shuf1_2 2987int32323232 tmp11 = (t25 & 0xffff) * (x1_47 & 0xffff) 2988x3_47 = combine x3_47 and z4x5x4t26 by shuf1_2 2989int32323232 tmp12 = (t25 & 0xffff) * (x1_811 & 0xffff) 2990x3_811 = combine x3_811 and z4x5x4t210 by shuf1_2 2991int32323232 tmp13 = (t25 & 0xffff) * (x1_1215 & 0xffff) 2992x3_1215 = combine x3_1215 and z4x5x4t214 by shuf1_2 2993int32323232 tmp14 = (t25 & 0xffff) * (x1_1619 & 0xffff) 2994x3_1619 = combine x3_1619 and z4x5x4t218 by shuf1_2 2995 2996int32323232 z3_47 += tmp00 2997x3_03 = combine x3_03 and z4x5x4t23 by shuf1_3 2998int32323232 z3_811 += tmp01 2999x3_47 = combine x3_47 and z4x5x4t27 by shuf1_3 3000int32323232 z3_1215 += tmp02 3001x3_811 = combine x3_811 and z4x5x4t211 by shuf1_3 3002int32323232 z3_1619 += tmp03 3003tmp10b = tmp10 >> (8 * 4) 3004int32323232 z3_2023 += tmp04 3005x3_1215 = combine x3_1215 and z4x5x4t215 by shuf1_3 3006 3007int32323232 tmp10a = tmp10 << 1 3008x3_1619 = combine x3_1619 and z4x5x4t219 by shuf1_3 3009int32323232 tmp11a = tmp11 << 1 3010int32323232 tmp12a = tmp12 << 1 3011int32323232 tmp13a = tmp13 << 1 3012tmp14a = tmp14 << (8 * 12) 3013int32323232 tmp20 = (t26 & 0xffff) * (x1_03 & 0xffff) 3014tmp11b = combine tmp10a and tmp11 by comb13 3015int32323232 tmp21 = (t26 & 0xffff) * (x1_47 & 0xffff) 3016tmp12b = combine tmp11a and tmp12 by comb13 3017int32323232 tmp22 = (t26 & 0xffff) * (x1_811 & 0xffff) 3018tmp13b = combine tmp12a and tmp13 by comb13 3019int32323232 tmp23 = (t26 & 0xffff) * (x1_1215 & 0xffff) 3020tmp14b = combine tmp13a and tmp14 by comb13 3021int32323232 tmp24 = (t26 & 0xffff) * (x1_1619 & 0xffff) 3022int32323232 z3_47 += tmp10b 3023int32323232 z3_811 += tmp11b 3024int32323232 z3_1215 += tmp12b 3025int32323232 z3_1619 += tmp13b 3026int32323232 z3_2023 += tmp14b 3027int32323232 z3_2427 = tmp14a << 1 3028 3029tmp20b = tmp20 >> (8 * 8) 3030int32323232 tmp20a = tmp20 << 1 3031int32323232 tmp21a = tmp21 << 1 3032int32323232 tmp22a = tmp22 << 1 3033tmp24a = tmp24 << (8 * 8) 3034int32323232 tmp23a = tmp23 << 1 3035int32323232 tmp30 = (t27 & 0xffff) * (x1_03 & 0xffff) 3036int32323232 tmp31 = (t27 & 0xffff) * (x1_47 & 0xffff) 3037tmp21b = combine tmp20a and tmp21 by comb22 3038int32323232 tmp32 = (t27 & 0xffff) * (x1_811 & 0xffff) 3039tmp22b = combine tmp21a and tmp22 by comb22 3040int32323232 tmp33 = (t27 & 0xffff) * (x1_1215 & 0xffff) 3041tmp23b = combine tmp22a and tmp23 by comb22 3042int32323232 tmp34 = (t27 & 0xffff) * (x1_1619 & 0xffff) 3043tmp24b = combine tmp23a and tmp24 by comb22 3044int32323232 tmp24a <<= 1 3045int32323232 z3_47 += tmp20b 3046int32323232 z3_811 += tmp21b 3047int32323232 z3_1215 += tmp22b 3048int32323232 z3_1619 += tmp23b 3049int32323232 z3_2023 += tmp24b 3050int32323232 z3_2427 += tmp24a 3051 3052tmp30b = tmp30 >> (8 * 12) 3053int32323232 tmp30a = tmp30 << 1 3054int32323232 tmp31a = tmp31 << 1 3055int32323232 tmp32a = tmp32 << 1 3056tmp34a = tmp34 << (8 * 4) 3057int32323232 tmp33a = tmp33 << 1 3058int32323232 tmp00 = (t28 & 0xffff) * (x1_03 & 0xffff) 3059int32323232 tmp01 = (t28 & 0xffff) * (x1_47 & 0xffff) 3060tmp31b = combine tmp30a and tmp31 by comb31 3061int32323232 tmp02 = (t28 & 0xffff) * (x1_811 & 0xffff) 3062tmp32b = combine tmp31a and tmp32 by comb31 3063int32323232 tmp03 = (t28 & 0xffff) * (x1_1215 & 0xffff) 3064tmp33b = combine tmp32a and tmp33 by comb31 3065int32323232 tmp04 = (t28 & 0xffff) * (x1_1619 & 0xffff) 3066tmp34b = combine tmp33a and tmp34 by comb31 3067int32323232 tmp34a <<= 1 3068int32323232 z3_47 += tmp30b 3069int32323232 z3_811 += tmp31b 3070int32323232 z3_1215 += tmp32b 3071int32323232 z3_1619 += tmp33b 3072int32323232 z3_2023 += tmp34b 3073int32323232 z3_2427 += tmp34a 3074 3075################################################################# 3076### Multiplications by t28..t211 ### 3077################################################################# 3078 3079int32323232 tmp10 = (t29 & 0xffff) * (x1_03 & 0xffff) 3080int32323232 tmp11 = (t29 & 0xffff) * (x1_47 & 0xffff) 3081int32323232 tmp12 = (t29 & 0xffff) * (x1_811 & 0xffff) 3082int32323232 tmp13 = (t29 & 0xffff) * (x1_1215 & 0xffff) 3083int32323232 tmp14 = (t29 & 0xffff) * (x1_1619 & 0xffff) 3084 3085int32323232 z3_811 += tmp00 3086int32323232 z3_1215 += tmp01 3087int32323232 z3_1619 += tmp02 3088int32323232 z3_2023 += tmp03 3089int32323232 z3_2427 += tmp04 3090 3091tmp10b = tmp10 >> (8 * 4) 3092int32323232 tmp10a = tmp10 << 1 3093int32323232 tmp11a = tmp11 << 1 3094int32323232 tmp12a = tmp12 << 1 3095tmp14a = tmp14 << (8 * 12) 3096int32323232 tmp13a = tmp13 << 1 3097int32323232 tmp20 = (t210 & 0xffff) * (x1_03 & 0xffff) 3098int32323232 tmp21 = (t210 & 0xffff) * (x1_47 & 0xffff) 3099tmp11b = combine tmp10a and tmp11 by comb13 3100int32323232 tmp22 = (t210 & 0xffff) * (x1_811 & 0xffff) 3101tmp12b = combine tmp11a and tmp12 by comb13 3102int32323232 tmp23 = (t210 & 0xffff) * (x1_1215 & 0xffff) 3103tmp13b = combine tmp12a and tmp13 by comb13 3104int32323232 tmp24 = (t210 & 0xffff) * (x1_1619 & 0xffff) 3105tmp14b = combine tmp13a and tmp14 by comb13 3106int32323232 z3_811 += tmp10b 3107int32323232 z3_1215 += tmp11b 3108int32323232 z3_1619 += tmp12b 3109int32323232 z3_2023 += tmp13b 3110int32323232 z3_2427 += tmp14b 3111tmp20b = tmp20 >> (8 * 8) 3112int32323232 z3_2831 = tmp14a << 1 3113 3114int32323232 tmp20a = tmp20 << 1 3115int32323232 tmp21a = tmp21 << 1 3116int32323232 tmp22a = tmp22 << 1 3117int32323232 tmp23a = tmp23 << 1 3118tmp24a = tmp24 << (8 * 8) 3119int32323232 tmp30 = (t211 & 0xffff) * (x1_03 & 0xffff) 3120tmp21b = combine tmp20a and tmp21 by comb22 3121int32323232 tmp31 = (t211 & 0xffff) * (x1_47 & 0xffff) 3122tmp22b = combine tmp21a and tmp22 by comb22 3123int32323232 tmp32 = (t211 & 0xffff) * (x1_811 & 0xffff) 3124tmp23b = combine tmp22a and tmp23 by comb22 3125int32323232 tmp33 = (t211 & 0xffff) * (x1_1215 & 0xffff) 3126tmp24b = combine tmp23a and tmp24 by comb22 3127int32323232 tmp34 = (t211 & 0xffff) * (x1_1619 & 0xffff) 3128int32323232 tmp24a <<= 1 3129int32323232 z3_811 += tmp20b 3130int32323232 z3_1215 += tmp21b 3131int32323232 z3_1619 += tmp22b 3132int32323232 z3_2023 += tmp23b 3133int32323232 z3_2427 += tmp24b 3134tmp30b = tmp30 >> (8 * 12) 3135int32323232 z3_2831 += tmp24a 3136 3137int32323232 tmp30a = tmp30 << 1 3138int32323232 tmp31a = tmp31 << 1 3139int32323232 tmp32a = tmp32 << 1 3140int32323232 tmp33a = tmp33 << 1 3141tmp34a = tmp34 << (8 * 4) 3142int32323232 tmp00 = (t212 & 0xffff) * (x1_03 & 0xffff) 3143tmp31b = combine tmp30a and tmp31 by comb31 3144int32323232 tmp01 = (t212 & 0xffff) * (x1_47 & 0xffff) 3145tmp32b = combine tmp31a and tmp32 by comb31 3146int32323232 tmp02 = (t212 & 0xffff) * (x1_811 & 0xffff) 3147tmp33b = combine tmp32a and tmp33 by comb31 3148int32323232 tmp03 = (t212 & 0xffff) * (x1_1215 & 0xffff) 3149tmp34b = combine tmp33a and tmp34 by comb31 3150int32323232 tmp04 = (t212 & 0xffff) * (x1_1619 & 0xffff) 3151int32323232 tmp34a <<= 1 3152int32323232 z3_811 += tmp30b 3153int32323232 z3_1215 += tmp31b 3154int32323232 z3_1619 += tmp32b 3155int32323232 z3_2023 += tmp33b 3156int32323232 z3_2427 += tmp34b 3157int32323232 z3_2831 += tmp34a 3158 3159################################################################# 3160### Multiplications by t212..t215 ### 3161################################################################# 3162 3163int32323232 tmp10 = (t213 & 0xffff) * (x1_03 & 0xffff) 3164int32323232 tmp11 = (t213 & 0xffff) * (x1_47 & 0xffff) 3165int32323232 tmp12 = (t213 & 0xffff) * (x1_811 & 0xffff) 3166int32323232 tmp13 = (t213 & 0xffff) * (x1_1215 & 0xffff) 3167int32323232 tmp14 = (t213 & 0xffff) * (x1_1619 & 0xffff) 3168 3169int32323232 z3_1215 += tmp00 3170int32323232 z3_1619 += tmp01 3171int32323232 z3_2023 += tmp02 3172int32323232 z3_2427 += tmp03 3173tmp10b = tmp10 >> (8 * 4) 3174int32323232 z3_2831 += tmp04 3175 3176int32323232 tmp10a = tmp10 << 1 3177int32323232 tmp11a = tmp11 << 1 3178int32323232 tmp12a = tmp12 << 1 3179int32323232 tmp13a = tmp13 << 1 3180tmp14a = tmp14 << (8 * 12) 3181int32323232 tmp20 = (t214 & 0xffff) * (x1_03 & 0xffff) 3182tmp11b = combine tmp10a and tmp11 by comb13 3183int32323232 tmp21 = (t214 & 0xffff) * (x1_47 & 0xffff) 3184tmp12b = combine tmp11a and tmp12 by comb13 3185int32323232 tmp22 = (t214 & 0xffff) * (x1_811 & 0xffff) 3186tmp13b = combine tmp12a and tmp13 by comb13 3187int32323232 tmp23 = (t214 & 0xffff) * (x1_1215 & 0xffff) 3188tmp14b = combine tmp13a and tmp14 by comb13 3189int32323232 tmp24 = (t214 & 0xffff) * (x1_1619 & 0xffff) 3190int32323232 z3_1215 += tmp10b 3191int32323232 z3_1619 += tmp11b 3192int32323232 z3_2023 += tmp12b 3193int32323232 z3_2427 += tmp13b 3194int32323232 z3_2831 += tmp14b 3195int32323232 z3_3235 = tmp14a << 1 3196 3197tmp20b = tmp20 >> (8 * 8) 3198int32323232 tmp20a = tmp20 << 1 3199int32323232 tmp21a = tmp21 << 1 3200int32323232 tmp22a = tmp22 << 1 3201tmp24a = tmp24 << (8 * 8) 3202int32323232 tmp23a = tmp23 << 1 3203int32323232 tmp30 = (t215 & 0xffff) * (x1_03 & 0xffff) 3204int32323232 tmp31 = (t215 & 0xffff) * (x1_47 & 0xffff) 3205tmp21b = combine tmp20a and tmp21 by comb22 3206int32323232 tmp32 = (t215 & 0xffff) * (x1_811 & 0xffff) 3207tmp22b = combine tmp21a and tmp22 by comb22 3208int32323232 tmp33 = (t215 & 0xffff) * (x1_1215 & 0xffff) 3209tmp23b = combine tmp22a and tmp23 by comb22 3210int32323232 tmp34 = (t215 & 0xffff) * (x1_1619 & 0xffff) 3211tmp24b = combine tmp23a and tmp24 by comb22 3212int32323232 tmp24a <<= 1 3213int32323232 z3_1215 += tmp20b 3214int32323232 z3_1619 += tmp21b 3215int32323232 z3_2023 += tmp22b 3216int32323232 z3_2427 += tmp23b 3217int32323232 z3_2831 += tmp24b 3218int32323232 z3_3235 += tmp24a 3219 3220tmp30b = tmp30 >> (8 * 12) 3221int32323232 tmp30a = tmp30 << 1 3222int32323232 tmp31a = tmp31 << 1 3223int32323232 tmp32a = tmp32 << 1 3224tmp34a = tmp34 << (8 * 4) 3225int32323232 tmp33a = tmp33 << 1 3226int32323232 tmp00 = (t216 & 0xffff) * (x1_03 & 0xffff) 3227int32323232 tmp01 = (t216 & 0xffff) * (x1_47 & 0xffff) 3228tmp31b = combine tmp30a and tmp31 by comb31 3229int32323232 tmp02 = (t216 & 0xffff) * (x1_811 & 0xffff) 3230tmp32b = combine tmp31a and tmp32 by comb31 3231int32323232 tmp03 = (t216 & 0xffff) * (x1_1215 & 0xffff) 3232tmp33b = combine tmp32a and tmp33 by comb31 3233int32323232 tmp04 = (t216 & 0xffff) * (x1_1619 & 0xffff) 3234tmp34b = combine tmp33a and tmp34 by comb31 3235int32323232 tmp34a <<= 1 3236int32323232 z3_1215 += tmp30b 3237int32323232 z3_1619 += tmp31b 3238int32323232 z3_2023 += tmp32b 3239int32323232 z3_2427 += tmp33b 3240int32323232 z3_2831 += tmp34b 3241int32323232 z3_3235 += tmp34a 3242 3243################################################################# 3244### Multiplications by t216..t219 ### 3245################################################################# 3246 3247int32323232 tmp10 = (t217 & 0xffff) * (x1_03 & 0xffff) 3248int32323232 tmp11 = (t217 & 0xffff) * (x1_47 & 0xffff) 3249int32323232 tmp12 = (t217 & 0xffff) * (x1_811 & 0xffff) 3250int32323232 tmp13 = (t217 & 0xffff) * (x1_1215 & 0xffff) 3251int32323232 tmp14 = (t217 & 0xffff) * (x1_1619 & 0xffff) 3252 3253int32323232 z3_1619 += tmp00 3254int32323232 z3_2023 += tmp01 3255int32323232 z3_2427 += tmp02 3256int32323232 z3_2831 += tmp03 3257int32323232 z3_3235 += tmp04 3258 3259tmp10b = tmp10 >> (8 * 4) 3260int32323232 tmp10a = tmp10 << 1 3261int32323232 tmp11a = tmp11 << 1 3262int32323232 tmp12a = tmp12 << 1 3263tmp14a = tmp14 << (8 * 12) 3264int32323232 tmp13a = tmp13 << 1 3265int32323232 tmp20 = (t218 & 0xffff) * (x1_03 & 0xffff) 3266int32323232 tmp21 = (t218 & 0xffff) * (x1_47 & 0xffff) 3267tmp11b = combine tmp10a and tmp11 by comb13 3268int32323232 tmp22 = (t218 & 0xffff) * (x1_811 & 0xffff) 3269tmp12b = combine tmp11a and tmp12 by comb13 3270int32323232 tmp23 = (t218 & 0xffff) * (x1_1215 & 0xffff) 3271tmp13b = combine tmp12a and tmp13 by comb13 3272int32323232 tmp24 = (t218 & 0xffff) * (x1_1619 & 0xffff) 3273tmp14b = combine tmp13a and tmp14 by comb13 3274int32323232 z3_1619 += tmp10b 3275int32323232 z3_2023 += tmp11b 3276int32323232 z3_2427 += tmp12b 3277int32323232 z3_2831 += tmp13b 3278int32323232 z3_3235 += tmp14b 3279tmp20b = tmp20 >> (8 * 8) 3280int32323232 z3_3639 = tmp14a << 1 3281 3282int32323232 tmp20a = tmp20 << 1 3283int32323232 tmp21a = tmp21 << 1 3284int32323232 tmp22a = tmp22 << 1 3285int32323232 tmp23a = tmp23 << 1 3286tmp24a = tmp24 << (8 * 8) 3287int32323232 tmp30 = (t219 & 0xffff) * (x1_03 & 0xffff) 3288tmp21b = combine tmp20a and tmp21 by comb22 3289int32323232 tmp31 = (t219 & 0xffff) * (x1_47 & 0xffff) 3290tmp22b = combine tmp21a and tmp22 by comb22 3291int32323232 tmp32 = (t219 & 0xffff) * (x1_811 & 0xffff) 3292tmp23b = combine tmp22a and tmp23 by comb22 3293int32323232 tmp33 = (t219 & 0xffff) * (x1_1215 & 0xffff) 3294tmp24b = combine tmp23a and tmp24 by comb22 3295int32323232 tmp34 = (t219 & 0xffff) * (x1_1619 & 0xffff) 3296int32323232 tmp24a <<= 1 3297int32323232 z3_1619 += tmp20b 3298int32323232 z3_2023 += tmp21b 3299int32323232 tmp30a = tmp30 << 1 3300tmp30b = tmp30 >> (8 * 12) 3301int32323232 tmp31a = tmp31 << 1 3302int32323232 tmp32a = tmp32 << 1 3303int32323232 tmp33a = tmp33 << 1 3304tmp34a = tmp34 << (8 * 4) 3305 3306int32323232 z3_2427 += tmp22b 3307tmp31b = combine tmp30a and tmp31 by comb31 3308int32323232 z3_2831 += tmp23b 3309tmp32b = combine tmp31a and tmp32 by comb31 3310int32323232 z3_3235 += tmp24b 3311tmp33b = combine tmp32a and tmp33 by comb31 3312int32323232 z3_3639 += tmp24a 3313tmp34b = combine tmp33a and tmp34 by comb31 3314 3315int32323232 tmp34a <<= 1 3316int32323232 z3_1619 += tmp30b 3317int32323232 z3_2023 += tmp31b 3318int32323232 z3_2427 += tmp32b 3319int32323232 z3_2831 += tmp33b 3320int32323232 z3_3235 += tmp34b 3321int32323232 z3_3639 += tmp34a 3322 3323# Reduce coefficients 3324carry0 = select bytes from z3_2023 by sel01 3325carry1 = select bytes from z3_2427 by sel01 3326carry2 = select bytes from z3_2831 by sel01 3327carry3 = select bytes from z3_3235 by sel01 3328uint32323232 carry0 >>= 13 3329uint32323232 carry1 >>= 13 3330uint32323232 carry2 >>= 13 3331uint32323232 carry3 >>= 13 3332int32323232 z3_2023 += carry0 3333int32323232 z3_2427 += carry1 3334int32323232 z3_2831 += carry2 3335int32323232 z3_3235 += carry3 3336 3337carry0 = select bytes from z3_2023 by sel12 3338carry1 = select bytes from z3_2427 by sel12 3339carry2 = select bytes from z3_2831 by sel12 3340carry3 = select bytes from z3_3235 by sel12 3341uint32323232 carry0 >>= 13 3342uint32323232 carry1 >>= 13 3343uint32323232 carry2 >>= 13 3344uint32323232 carry3 >>= 13 3345int32323232 z3_2023 += carry0 3346int32323232 z3_2427 += carry1 3347int32323232 z3_2831 += carry2 3348int32323232 z3_3235 += carry3 3349 3350carry0 = select bytes from z3_2023 by sel23 3351carry1 = select bytes from z3_2427 by sel23 3352carry2 = select bytes from z3_2831 by sel23 3353carry3 = select bytes from z3_3235 by sel23 3354uint32323232 carry0 >>= 13 3355uint32323232 carry1 >>= 13 3356uint32323232 carry2 >>= 13 3357uint32323232 carry3 >>= 13 3358int32323232 z3_2023 += carry0 3359int32323232 z3_2427 += carry1 3360int32323232 z3_2831 += carry2 3361int32323232 z3_3235 += carry3 3362 3363carry0 = select bytes from z3_2023 by sel30 3364carry1 = select bytes from z3_2427 by sel30 3365carry2 = select bytes from z3_2831 by sel30 3366carry3 = select bytes from z3_3235 by sel30 3367uint32323232 carry0 >>= 12 3368uint32323232 carry1 >>= 12 3369uint32323232 carry2 >>= 12 3370uint32323232 carry3 >>= 12 3371z3_2023 &= redcoeffmask 3372z3_2427 &= redcoeffmask 3373z3_2831 &= redcoeffmask 3374z3_3235 &= redcoeffmask 3375 3376int32323232 z3_2427 += carry0 3377int32323232 z3_2831 += carry1 3378int32323232 z3_3235 += carry2 3379int32323232 z3_3639 += carry3 3380 3381 3382carry1 = select bytes from z3_2427 by sel01 3383carry2 = select bytes from z3_2831 by sel01 3384carry3 = select bytes from z3_3235 by sel01 3385carry4 = select bytes from z3_3639 by sel01 3386uint32323232 carry1 >>= 13 3387uint32323232 carry2 >>= 13 3388uint32323232 carry3 >>= 13 3389uint32323232 carry4 >>= 13 3390int32323232 z3_2427 += carry1 3391int32323232 z3_2831 += carry2 3392int32323232 z3_3235 += carry3 3393int32323232 z3_3639 += carry4 3394 3395carry1 = select bytes from z3_2427 by sel12 3396carry2 = select bytes from z3_2831 by sel12 3397carry3 = select bytes from z3_3235 by sel12 3398carry4 = select bytes from z3_3639 by sel12 3399uint32323232 carry1 >>= 13 3400uint32323232 carry2 >>= 13 3401uint32323232 carry3 >>= 13 3402uint32323232 carry4 >>= 13 3403int32323232 z3_2427 += carry1 3404int32323232 z3_2831 += carry2 3405int32323232 z3_3235 += carry3 3406int32323232 z3_3639 += carry4 3407 3408carry1 = select bytes from z3_2427 by sel23 3409carry2 = select bytes from z3_2831 by sel23 3410carry3 = select bytes from z3_3235 by sel23 3411carry4 = select bytes from z3_3639 by sel23 3412uint32323232 carry1 >>= 13 3413uint32323232 carry2 >>= 13 3414uint32323232 carry3 >>= 13 3415uint32323232 carry4 >>= 13 3416int32323232 z3_2427 += carry1 3417int32323232 z3_2831 += carry2 3418int32323232 z3_3235 += carry3 3419int32323232 z3_3639 += carry4 3420 3421carry1 = select bytes from z3_2427 by sel30 3422carry2 = select bytes from z3_2831 by sel30 3423carry3 = select bytes from z3_3235 by sel30 3424uint32323232 carry1 >>= 12 3425uint32323232 carry2 >>= 12 3426uint32323232 carry3 >>= 12 3427z3_2427 &= redcoeffmask 3428z3_2831 &= redcoeffmask 3429z3_3235 &= redcoeffmask 3430z3_3639 &= redcoeffmaskend 3431 3432int32323232 z3_2831 += carry1 3433int32323232 z3_3235 += carry2 3434int32323232 z3_3639 += carry3 3435 3436# Reduce polynomial 3437 3438uint32323232 red0 = (z3_2023 & 0xffff) * 19 3439uint32323232 red4 = (z3_3639 & 0xffff) * 19 3440uint32323232 red1 = (z3_2427 & 0xffff) * 19 3441uint32323232 red2 = (z3_2831 & 0xffff) * 19 3442uint32323232 red3 = (z3_3235 & 0xffff) * 19 3443 3444int32323232 z3_03 += red0 3445int32323232 z3_1619 += red4 3446int32323232 z3_47 += red1 3447int32323232 z3_811 += red2 3448int32323232 z3_1215 += red3 3449 3450# Reduce coefficients ctd. 3451carry = select bytes from z3_1619 by sel01 3452uint32323232 carry >>= 13 3453int32323232 z3_1619 += carry 3454 3455carry = select bytes from z3_1619 by sel12 3456uint32323232 carry >>= 13 3457int32323232 z3_1619 += carry 3458 3459carry = select bytes from z3_1619 by sel23 3460uint32323232 carry >>= 13 3461int32323232 z3_1619 += carry 3462 3463carry = select bytes from z3_1619 by sel30 3464uint32323232 carry >>= 12 3465#int32323232 red = (carry & 0xffff) * 19 3466int32323232 red = carry << 4 3467int32323232 red = red + carry 3468int32323232 red = red + carry 3469int32323232 red = red + carry 3470 3471int32323232 z3_03 += red 3472 3473z3_1619 &= redcoeffmask 3474 3475 3476carry0 = select bytes from z3_03 by sel01 3477carry1 = select bytes from z3_47 by sel01 3478carry2 = select bytes from z3_811 by sel01 3479carry3 = select bytes from z3_1215 by sel01 3480uint32323232 carry0 >>= 13 3481uint32323232 carry1 >>= 13 3482uint32323232 carry2 >>= 13 3483uint32323232 carry3 >>= 13 3484int32323232 z3_03 += carry0 3485int32323232 z3_47 += carry1 3486int32323232 z3_811 += carry2 3487int32323232 z3_1215 += carry3 3488 3489carry0 = select bytes from z3_03 by sel12 3490carry1 = select bytes from z3_47 by sel12 3491carry2 = select bytes from z3_811 by sel12 3492carry3 = select bytes from z3_1215 by sel12 3493uint32323232 carry0 >>= 13 3494uint32323232 carry1 >>= 13 3495uint32323232 carry2 >>= 13 3496uint32323232 carry3 >>= 13 3497int32323232 z3_03 += carry0 3498int32323232 z3_47 += carry1 3499int32323232 z3_811 += carry2 3500int32323232 z3_1215 += carry3 3501 3502carry0 = select bytes from z3_03 by sel23 3503carry1 = select bytes from z3_47 by sel23 3504carry2 = select bytes from z3_811 by sel23 3505carry3 = select bytes from z3_1215 by sel23 3506uint32323232 carry0 >>= 13 3507uint32323232 carry1 >>= 13 3508uint32323232 carry2 >>= 13 3509uint32323232 carry3 >>= 13 3510int32323232 z3_03 += carry0 3511int32323232 z3_47 += carry1 3512int32323232 z3_811 += carry2 3513int32323232 z3_1215 += carry3 3514 3515carry0 = select bytes from z3_03 by sel30 3516carry1 = select bytes from z3_47 by sel30 3517carry2 = select bytes from z3_811 by sel30 3518carry3 = select bytes from z3_1215 by sel30 3519uint32323232 carry0 >>= 12 3520uint32323232 carry1 >>= 12 3521uint32323232 carry2 >>= 12 3522uint32323232 carry3 >>= 12 3523z3_03 &= redcoeffmask 3524z3_47 &= redcoeffmask 3525z3_811 &= redcoeffmask 3526z3_1215 &= redcoeffmask 3527int32323232 z3_47 += carry0 3528int32323232 z3_811 += carry1 3529int32323232 z3_1215 += carry2 3530int32323232 z3_1619 += carry3 3531 3532 3533carry1 = select bytes from z3_47 by sel01 3534carry2 = select bytes from z3_811 by sel01 3535carry3 = select bytes from z3_1215 by sel01 3536carry4 = select bytes from z3_1619 by sel01 3537uint32323232 carry1 >>= 13 3538uint32323232 carry2 >>= 13 3539uint32323232 carry3 >>= 13 3540uint32323232 carry4 >>= 13 3541int32323232 z3_47 += carry1 3542int32323232 z3_811 += carry2 3543int32323232 z3_1215 += carry3 3544int32323232 z3_1619 += carry4 3545 3546carry1 = select bytes from z3_47 by sel12 3547carry2 = select bytes from z3_811 by sel12 3548carry3 = select bytes from z3_1215 by sel12 3549carry4 = select bytes from z3_1619 by sel12 3550uint32323232 carry1 >>= 13 3551uint32323232 carry2 >>= 13 3552uint32323232 carry3 >>= 13 3553uint32323232 carry4 >>= 13 3554int32323232 z3_47 += carry1 3555int32323232 z3_811 += carry2 3556int32323232 z3_1215 += carry3 3557int32323232 z3_1619 += carry4 3558 3559carry1 = select bytes from z3_47 by sel23 3560carry2 = select bytes from z3_811 by sel23 3561carry3 = select bytes from z3_1215 by sel23 3562carry4 = select bytes from z3_1619 by sel23 3563uint32323232 carry1 >>= 13 3564uint32323232 carry2 >>= 13 3565uint32323232 carry3 >>= 13 3566uint32323232 carry4 >>= 13 3567int32323232 z3_47 += carry1 3568int32323232 z3_811 += carry2 3569int32323232 z3_1215 += carry3 3570int32323232 z3_1619 += carry4 3571 3572z3_47 &= redcoeffmaskveryend 3573z3_811 &= redcoeffmaskveryend 3574z3_1215 &= redcoeffmaskveryend 3575z3_1619 &= redcoeffmaskveryend 3576 3577################################################################################### 3578########################## Write Result ############################## 3579#################################################################################### 3580# 3581##*(vec128 *) ((retp + 0) & ~15) = x2_03 3582##*(vec128 *) ((retp + 16) & ~15) = x2_47 3583##*(vec128 *) ((retp + 32) & ~15) = x2_811 3584##*(vec128 *) ((retp + 48) & ~15) = x2_1215 3585##*(vec128 *) ((retp + 64) & ~15) = x2_1619 3586##*(vec128 *) ((retp + 80) & ~15) = z2_03 3587##*(vec128 *) ((retp + 96) & ~15) = z2_47 3588##*(vec128 *) ((retp + 112) & ~15) = z2_811 3589##*(vec128 *) ((retp + 128) & ~15) = z2_1215 3590##*(vec128 *) ((retp + 144) & ~15) = z2_1619 3591##*(vec128 *) ((retp + 160) & ~15) = x3_03 3592##*(vec128 *) ((retp + 176) & ~15) = x3_47 3593##*(vec128 *) ((retp + 192) & ~15) = x3_811 3594##*(vec128 *) ((retp + 208) & ~15) = x3_1215 3595##*(vec128 *) ((retp + 224) & ~15) = x3_1619 3596##*(vec128 *) ((retp + 240) & ~15) = z3_03 3597##*(vec128 *) ((retp + 256) & ~15) = z3_47 3598##*(vec128 *) ((retp + 272) & ~15) = z3_811 3599##*(vec128 *) ((retp + 288) & ~15) = z3_1215 3600##*(vec128 *) ((retp + 304) & ~15) = z3_1619 3601 3602uint32323232 check = loopmask[0] | loopmask[1] | loopmask[2] | loopmask[3] 3603goto loop if (check & 0xffffffff) 3604 3605goto end if (done & 0xffff) 3606 3607prevextbit_stack = prevextbit 3608sk = *(vec128 *) ((skp + 0) & ~15) 3609uint32323232 loopmask = 1 3610loopmask <<= (8 * 15) 3611loopmask <<= (7 % 8) 3612sk = select bytes from sk by swapendian 3613prevextbit = prevextbit_stack 3614int32323232 done = 1 3615goto loop 3616 3617#################################################################################### 3618####################### Conditionally swap P2 and P3 ########################### 3619#################################################################################### 3620 3621end: 3622 3623flip = extbit 3624nflip = ~(flip | zero) 3625 3626tmp0 = x2_03 & nflip 3627tmp1 = x3_03 & flip 3628tmp2 = x2_03 & flip 3629tmp3 = x3_03 & nflip 3630x2_03 = tmp0 ^ tmp1 3631x3_03 = tmp2 ^ tmp3 3632 3633tmp0 = x2_47 & nflip 3634tmp1 = x3_47 & flip 3635tmp2 = x2_47 & flip 3636tmp3 = x3_47 & nflip 3637x2_47 = tmp0 ^ tmp1 3638x3_47 = tmp2 ^ tmp3 3639 3640tmp0 = x2_811 & nflip 3641tmp1 = x3_811 & flip 3642tmp2 = x2_811 & flip 3643tmp3 = x3_811 & nflip 3644x2_811 = tmp0 ^ tmp1 3645x3_811 = tmp2 ^ tmp3 3646 3647tmp0 = x2_1215 & nflip 3648tmp1 = x3_1215 & flip 3649tmp2 = x2_1215 & flip 3650tmp3 = x3_1215 & nflip 3651x2_1215 = tmp0 ^ tmp1 3652x3_1215 = tmp2 ^ tmp3 3653 3654tmp0 = x2_1619 & nflip 3655tmp1 = x3_1619 & flip 3656tmp2 = x2_1619 & flip 3657tmp3 = x3_1619 & nflip 3658x2_1619 = tmp0 ^ tmp1 3659x3_1619 = tmp2 ^ tmp3 3660 3661tmp0 = z2_03 & nflip 3662tmp1 = z3_03 & flip 3663tmp2 = z2_03 & flip 3664tmp3 = z3_03 & nflip 3665z2_03 = tmp0 ^ tmp1 3666z3_03 = tmp2 ^ tmp3 3667 3668tmp0 = z2_47 & nflip 3669tmp1 = z3_47 & flip 3670tmp2 = z2_47 & flip 3671tmp3 = z3_47 & nflip 3672z2_47 = tmp0 ^ tmp1 3673z3_47 = tmp2 ^ tmp3 3674 3675tmp0 = z2_811 & nflip 3676tmp1 = z3_811 & flip 3677tmp2 = z2_811 & flip 3678tmp3 = z3_811 & nflip 3679z2_811 = tmp0 ^ tmp1 3680z3_811 = tmp2 ^ tmp3 3681 3682tmp0 = z2_1215 & nflip 3683tmp1 = z3_1215 & flip 3684tmp2 = z2_1215 & flip 3685tmp3 = z3_1215 & nflip 3686z2_1215 = tmp0 ^ tmp1 3687z3_1215 = tmp2 ^ tmp3 3688 3689tmp0 = z2_1619 & nflip 3690tmp1 = z3_1619 & flip 3691tmp2 = z2_1619 & flip 3692tmp3 = z3_1619 & nflip 3693z2_1619 = tmp0 ^ tmp1 3694z3_1619 = tmp2 ^ tmp3 3695 3696 3697*(vec128 *) ((retp + 0) & ~15) = x2_03 3698*(vec128 *) ((retp + 16) & ~15) = x2_47 3699*(vec128 *) ((retp + 32) & ~15) = x2_811 3700*(vec128 *) ((retp + 48) & ~15) = x2_1215 3701*(vec128 *) ((retp + 64) & ~15) = x2_1619 3702*(vec128 *) ((retp + 80) & ~15) = z2_03 3703*(vec128 *) ((retp + 96) & ~15) = z2_47 3704*(vec128 *) ((retp + 112) & ~15) = z2_811 3705*(vec128 *) ((retp + 128) & ~15) = z2_1215 3706*(vec128 *) ((retp + 144) & ~15) = z2_1619 3707 3708call0 = call0_stack 3709call1 = call1_stack 3710call2 = call2_stack 3711call3 = call3_stack 3712call4 = call4_stack 3713call5 = call5_stack 3714call6 = call6_stack 3715call7 = call7_stack 3716call8 = call8_stack 3717call9 = call9_stack 3718call10 = call10_stack 3719call11 = call11_stack 3720call12 = call12_stack 3721call13 = call13_stack 3722call14 = call14_stack 3723call15 = call15_stack 3724call16 = call16_stack 3725call17 = call17_stack 3726call18 = call18_stack 3727call19 = call19_stack 3728call20 = call20_stack 3729call21 = call21_stack 3730call22 = call22_stack 3731call23 = call23_stack 3732call24 = call24_stack 3733call25 = call25_stack 3734call26 = call26_stack 3735call27 = call27_stack 3736call28 = call28_stack 3737call29 = call29_stack 3738call30 = call30_stack 3739call31 = call31_stack 3740call32 = call32_stack 3741call33 = call33_stack 3742call34 = call34_stack 3743call35 = call35_stack 3744call36 = call36_stack 3745call37 = call37_stack 3746call38 = call38_stack 3747call39 = call39_stack 3748call40 = call40_stack 3749call41 = call41_stack 3750call42 = call42_stack 3751call43 = call43_stack 3752call44 = call44_stack 3753call45 = call45_stack 3754call46 = call46_stack 3755call47 = call47_stack 3756 3757 3758leave 3759