1# version 20090331
2# Peter Schwabe & Neil Costigan
3# Public domain.
4
5vec128 retp
6vec128 skp
7vec128 xp
8
9input retp
10input skp
11input xp
12
13vec128 sk
14
15vec128 x1_03
16vec128 x1_47
17vec128 x1_811
18vec128 x1_1215
19vec128 x1_1619
20
21vec128 x2_03
22vec128 x2_47
23vec128 x2_811
24vec128 x2_1215
25vec128 x2_1619
26vec128 z2_03
27vec128 z2_47
28vec128 z2_811
29vec128 z2_1215
30vec128 z2_1619
31
32vec128 x3_03
33vec128 x3_47
34vec128 x3_811
35vec128 x3_1215
36vec128 x3_1619
37vec128 z3_03
38vec128 z3_47
39vec128 z3_811
40vec128 z3_1215
41vec128 z3_1619
42
43vec128 z3_2023
44vec128 z3_2427
45vec128 z3_2831
46vec128 z3_3235
47vec128 z3_3639
48
49vec128 a_03
50vec128 a_47
51vec128 a_811
52vec128 a_1215
53vec128 a_1619
54
55vec128 b_03
56vec128 b_47
57vec128 b_811
58vec128 b_1215
59vec128 b_1619
60
61vec128 c_03
62vec128 c_47
63vec128 c_811
64vec128 c_1215
65vec128 c_1619
66
67vec128 d_03
68vec128 d_47
69vec128 d_811
70vec128 d_1215
71vec128 d_1619
72
73vec128 acbd0
74vec128 acbd1
75vec128 acbd2
76vec128 acbd3
77vec128 acbd4
78vec128 acbd5
79vec128 acbd6
80vec128 acbd7
81vec128 acbd8
82vec128 acbd9
83vec128 acbd10
84vec128 acbd11
85vec128 acbd12
86vec128 acbd13
87vec128 acbd14
88vec128 acbd15
89vec128 acbd16
90vec128 acbd17
91vec128 acbd18
92vec128 acbd19
93
94vec128 t20
95vec128 t21
96vec128 t22
97vec128 t23
98vec128 t24
99vec128 t25
100vec128 t26
101vec128 t27
102vec128 t28
103vec128 t29
104vec128 t210
105vec128 t211
106vec128 t212
107vec128 t213
108vec128 t214
109vec128 t215
110vec128 t216
111vec128 t217
112vec128 t218
113vec128 t219
114
115
116vec128 abba0
117vec128 abba1
118vec128 abba2
119vec128 abba3
120vec128 abba4
121vec128 abba5
122vec128 abba6
123vec128 abba7
124vec128 abba8
125vec128 abba9
126vec128 abba10
127vec128 abba11
128vec128 abba12
129vec128 abba13
130vec128 abba14
131vec128 abba15
132vec128 abba16
133vec128 abba17
134vec128 abba18
135vec128 abba19
136
137vec128 et4t1t00
138vec128 et4t1t01
139vec128 et4t1t02
140vec128 et4t1t03
141vec128 et4t1t04
142vec128 et4t1t05
143vec128 et4t1t06
144vec128 et4t1t07
145vec128 et4t1t08
146vec128 et4t1t09
147vec128 et4t1t010
148vec128 et4t1t011
149vec128 et4t1t012
150vec128 et4t1t013
151vec128 et4t1t014
152vec128 et4t1t015
153vec128 et4t1t016
154vec128 et4t1t017
155vec128 et4t1t018
156vec128 et4t1t019
157
158vec128 aa_a24aadada0
159vec128 aa_a24aadada1
160vec128 aa_a24aadada2
161vec128 aa_a24aadada3
162vec128 aa_a24aadada4
163vec128 aa_a24aadada5
164vec128 aa_a24aadada6
165vec128 aa_a24aadada7
166vec128 aa_a24aadada8
167vec128 aa_a24aadada9
168vec128 aa_a24aadada10
169vec128 aa_a24aadada11
170vec128 aa_a24aadada12
171vec128 aa_a24aadada13
172vec128 aa_a24aadada14
173vec128 aa_a24aadada15
174vec128 aa_a24aadada16
175vec128 aa_a24aadada17
176vec128 aa_a24aadada18
177vec128 aa_a24aadada19
178
179vec128 bb_a24m1bbcb0
180vec128 bb_a24m1bbcb1
181vec128 bb_a24m1bbcb2
182vec128 bb_a24m1bbcb3
183vec128 bb_a24m1bbcb4
184vec128 bb_a24m1bbcb5
185vec128 bb_a24m1bbcb6
186vec128 bb_a24m1bbcb7
187vec128 bb_a24m1bbcb8
188vec128 bb_a24m1bbcb9
189vec128 bb_a24m1bbcb10
190vec128 bb_a24m1bbcb11
191vec128 bb_a24m1bbcb12
192vec128 bb_a24m1bbcb13
193vec128 bb_a24m1bbcb14
194vec128 bb_a24m1bbcb15
195vec128 bb_a24m1bbcb16
196vec128 bb_a24m1bbcb17
197vec128 bb_a24m1bbcb18
198vec128 bb_a24m1bbcb19
199
200
201vec128 2p2p2pcb0
202vec128 2p2p2pcb1
203vec128 2p2p2pcb2
204vec128 2p2p2pcb3
205vec128 2p2p2pcb4
206vec128 2p2p2pcb5
207vec128 2p2p2pcb6
208vec128 2p2p2pcb7
209vec128 2p2p2pcb8
210vec128 2p2p2pcb9
211vec128 2p2p2pcb10
212vec128 2p2p2pcb11
213vec128 2p2p2pcb12
214vec128 2p2p2pcb13
215vec128 2p2p2pcb14
216vec128 2p2p2pcb15
217vec128 2p2p2pcb16
218vec128 2p2p2pcb17
219vec128 2p2p2pcb18
220vec128 2p2p2pcb19
221
222vec128 vec19
223
224vec128 tmp0
225vec128 tmp1
226vec128 tmp2
227vec128 tmp3
228vec128 tmp4
229vec128 tmp5
230vec128 tmp6
231vec128 tmp7
232vec128 tmp8
233vec128 tmp9
234vec128 tmp10
235vec128 tmp11
236vec128 tmp12
237vec128 tmp13
238vec128 tmp14
239vec128 tmp15
240vec128 tmp16
241vec128 tmp17
242vec128 tmp18
243vec128 tmp19
244
245vec128 et0aat10
246vec128 et0aat11
247vec128 et0aat12
248vec128 et0aat13
249vec128 et0aat14
250vec128 et0aat15
251vec128 et0aat16
252vec128 et0aat17
253vec128 et0aat18
254vec128 et0aat19
255vec128 et0aat110
256vec128 et0aat111
257vec128 et0aat112
258vec128 et0aat113
259vec128 et0aat114
260vec128 et0aat115
261vec128 et0aat116
262vec128 et0aat117
263vec128 et0aat118
264vec128 et0aat119
265
266vec128 t4t0bbt10
267vec128 t4t0bbt11
268vec128 t4t0bbt12
269vec128 t4t0bbt13
270vec128 t4t0bbt14
271vec128 t4t0bbt15
272vec128 t4t0bbt16
273vec128 t4t0bbt17
274vec128 t4t0bbt18
275vec128 t4t0bbt19
276vec128 t4t0bbt110
277vec128 t4t0bbt111
278vec128 t4t0bbt112
279vec128 t4t0bbt113
280vec128 t4t0bbt114
281vec128 t4t0bbt115
282vec128 t4t0bbt116
283vec128 t4t0bbt117
284vec128 t4t0bbt118
285vec128 t4t0bbt119
286
287vec128 aacbbbda0
288vec128 aacbbbda1
289vec128 aacbbbda2
290vec128 aacbbbda3
291vec128 aacbbbda4
292vec128 aacbbbda5
293vec128 aacbbbda6
294vec128 aacbbbda7
295vec128 aacbbbda8
296vec128 aacbbbda9
297vec128 aacbbbda10
298vec128 aacbbbda11
299vec128 aacbbbda12
300vec128 aacbbbda13
301vec128 aacbbbda14
302vec128 aacbbbda15
303vec128 aacbbbda16
304vec128 aacbbbda17
305vec128 aacbbbda18
306vec128 aacbbbda19
307vec128 aacbbbda20
308vec128 aacbbbda21
309vec128 aacbbbda22
310vec128 aacbbbda23
311vec128 aacbbbda24
312vec128 aacbbbda25
313vec128 aacbbbda26
314vec128 aacbbbda27
315vec128 aacbbbda28
316vec128 aacbbbda29
317vec128 aacbbbda30
318vec128 aacbbbda31
319vec128 aacbbbda32
320vec128 aacbbbda33
321vec128 aacbbbda34
322vec128 aacbbbda35
323vec128 aacbbbda36
324vec128 aacbbbda37
325vec128 aacbbbda38
326vec128 aacbbbda39
327
328vec128 z4x5x4t20
329vec128 z4x5x4t21
330vec128 z4x5x4t22
331vec128 z4x5x4t23
332vec128 z4x5x4t24
333vec128 z4x5x4t25
334vec128 z4x5x4t26
335vec128 z4x5x4t27
336vec128 z4x5x4t28
337vec128 z4x5x4t29
338vec128 z4x5x4t210
339vec128 z4x5x4t211
340vec128 z4x5x4t212
341vec128 z4x5x4t213
342vec128 z4x5x4t214
343vec128 z4x5x4t215
344vec128 z4x5x4t216
345vec128 z4x5x4t217
346vec128 z4x5x4t218
347vec128 z4x5x4t219
348vec128 z4x5x4t220
349vec128 z4x5x4t221
350vec128 z4x5x4t222
351vec128 z4x5x4t223
352vec128 z4x5x4t224
353vec128 z4x5x4t225
354vec128 z4x5x4t226
355vec128 z4x5x4t227
356vec128 z4x5x4t228
357vec128 z4x5x4t229
358vec128 z4x5x4t230
359vec128 z4x5x4t231
360vec128 z4x5x4t232
361vec128 z4x5x4t233
362vec128 z4x5x4t234
363vec128 z4x5x4t235
364vec128 z4x5x4t236
365vec128 z4x5x4t237
366vec128 z4x5x4t238
367vec128 z4x5x4t239
368
369
370vec128 carry
371vec128 carry0
372vec128 carry1
373vec128 carry2
374vec128 carry3
375vec128 carry4
376vec128 carry5
377vec128 carry6
378vec128 carry7
379vec128 carry8
380vec128 carry9
381vec128 carry10
382vec128 carry11
383vec128 carry12
384vec128 carry13
385vec128 carry14
386vec128 carry15
387vec128 carry16
388vec128 carry17
389vec128 carry18
390vec128 carry19
391vec128 red
392vec128 red0
393vec128 red1
394vec128 red2
395vec128 red3
396vec128 red4
397
398vec128 comb13
399vec128 comb22
400vec128 comb31
401vec128 redcoeffmask
402vec128 redcoeffmaskend
403vec128 redcoeffmaskveryend
404vec128 shuf0_01
405vec128 shuf0_2
406vec128 shuf0_3
407vec128 shuf1_01
408vec128 shuf1_2
409vec128 shuf1_3
410vec128 shuf2_01
411vec128 shuf2_2
412vec128 shuf2_3
413vec128 shuf3_01
414vec128 shuf3_2
415vec128 shuf3_3
416vec128 selw0220
417vec128 selw0105
418vec128 selw2325
419vec128 selw0433
420vec128 selw261c0
421vec128 selw0342
422vec128 selw1362
423vec128 selw3333
424vec128 sel01
425vec128 sel12
426vec128 sel23
427vec128 sel30
428vec128 mask12
429vec128 mask13
430
431
432vec128 bit
433vec128 done
434vec128 extbit
435vec128 check
436vec128 prevextbit
437vec128 nprevextbit
438vec128 loopmask
439vec128 flip
440vec128 nflip
441vec128 zero
442vec128 one
443vec128 a24vec
444vec128 2pconsts0
445vec128 2pconsts
446vec128 2p_03
447vec128 2p_47
448vec128 2p_811
449vec128 2p_1215
450vec128 2p_1619
451vec128 swapendian
452
453vec128 tmp00
454vec128 tmp00b
455vec128 tmp01
456vec128 tmp01b
457vec128 tmp02
458vec128 tmp02b
459vec128 tmp03
460vec128 tmp03b
461vec128 tmp04
462vec128 tmp04n
463vec128 tmp04b
464vec128 tmp04bn
465vec128 tmp10a
466vec128 tmp10b
467vec128 tmp11a
468vec128 tmp11b
469vec128 tmp12a
470vec128 tmp12b
471vec128 tmp13a
472vec128 tmp13b
473vec128 tmp14n
474vec128 tmp14a
475vec128 tmp14an
476vec128 tmp14b
477vec128 tmp14bn
478vec128 tmp20
479vec128 tmp20a
480vec128 tmp20b
481vec128 tmp21
482vec128 tmp21a
483vec128 tmp21b
484vec128 tmp22
485vec128 tmp22a
486vec128 tmp22b
487vec128 tmp23
488vec128 tmp23a
489vec128 tmp23b
490vec128 tmp24
491vec128 tmp24n
492vec128 tmp24a
493vec128 tmp24an
494vec128 tmp24b
495vec128 tmp24bn
496vec128 tmp30
497vec128 tmp30a
498vec128 tmp30b
499vec128 tmp31
500vec128 tmp31a
501vec128 tmp31b
502vec128 tmp32
503vec128 tmp32a
504vec128 tmp32b
505vec128 tmp33
506vec128 tmp33a
507vec128 tmp33b
508vec128 tmp34
509vec128 tmp34n
510vec128 tmp34a
511vec128 tmp34an
512vec128 tmp34b
513vec128 tmp34bn
514
515
516vec128 call0
517vec128 call1
518vec128 call2
519vec128 call3
520vec128 call4
521vec128 call5
522vec128 call6
523vec128 call7
524vec128 call8
525vec128 call9
526vec128 call10
527vec128 call11
528vec128 call12
529vec128 call13
530vec128 call14
531vec128 call15
532vec128 call16
533vec128 call17
534vec128 call18
535vec128 call19
536vec128 call20
537vec128 call21
538vec128 call22
539vec128 call23
540vec128 call24
541vec128 call25
542vec128 call26
543vec128 call27
544vec128 call28
545vec128 call29
546vec128 call30
547vec128 call31
548vec128 call32
549vec128 call33
550vec128 call34
551vec128 call35
552vec128 call36
553vec128 call37
554vec128 call38
555vec128 call39
556vec128 call40
557vec128 call41
558vec128 call42
559vec128 call43
560vec128 call44
561vec128 call45
562vec128 call46
563vec128 call47
564
565caller call0
566caller call1
567caller call2
568caller call3
569caller call4
570caller call5
571caller call6
572caller call7
573caller call8
574caller call9
575caller call10
576caller call11
577caller call12
578caller call13
579caller call14
580caller call15
581caller call16
582caller call17
583caller call18
584caller call19
585caller call20
586caller call21
587caller call22
588caller call23
589caller call24
590caller call25
591caller call26
592caller call27
593caller call28
594caller call29
595caller call30
596caller call31
597caller call32
598caller call33
599caller call34
600caller call35
601caller call36
602caller call37
603caller call38
604caller call39
605caller call40
606caller call41
607caller call42
608caller call43
609caller call44
610caller call45
611caller call46
612caller call47
613
614stack128 call0_stack
615stack128 call1_stack
616stack128 call2_stack
617stack128 call3_stack
618stack128 call4_stack
619stack128 call5_stack
620stack128 call6_stack
621stack128 call7_stack
622stack128 call8_stack
623stack128 call9_stack
624stack128 call10_stack
625stack128 call11_stack
626stack128 call12_stack
627stack128 call13_stack
628stack128 call14_stack
629stack128 call15_stack
630stack128 call16_stack
631stack128 call17_stack
632stack128 call18_stack
633stack128 call19_stack
634stack128 call20_stack
635stack128 call21_stack
636stack128 call22_stack
637stack128 call23_stack
638stack128 call24_stack
639stack128 call25_stack
640stack128 call26_stack
641stack128 call27_stack
642stack128 call28_stack
643stack128 call29_stack
644stack128 call30_stack
645stack128 call31_stack
646stack128 call32_stack
647stack128 call33_stack
648stack128 call34_stack
649stack128 call35_stack
650stack128 call36_stack
651stack128 call37_stack
652stack128 call38_stack
653stack128 call39_stack
654stack128 call40_stack
655stack128 call41_stack
656stack128 call42_stack
657stack128 call43_stack
658stack128 call44_stack
659stack128 call45_stack
660stack128 call46_stack
661stack128 call47_stack
662
663stack128 prevextbit_stack
664
665vec128 try
666
667enter mladder
668
669call0_stack = call0
670call1_stack = call1
671call2_stack = call2
672call3_stack = call3
673call4_stack = call4
674call5_stack = call5
675call6_stack = call6
676call7_stack = call7
677call8_stack = call8
678call9_stack = call9
679call10_stack = call10
680call11_stack = call11
681call12_stack = call12
682call13_stack = call13
683call14_stack = call14
684call15_stack = call15
685call16_stack = call16
686call17_stack = call17
687call18_stack = call18
688call19_stack = call19
689call20_stack = call20
690call21_stack = call21
691call22_stack = call22
692call23_stack = call23
693call24_stack = call24
694call25_stack = call25
695call26_stack = call26
696call27_stack = call27
697call28_stack = call28
698call29_stack = call29
699call30_stack = call30
700call31_stack = call31
701call32_stack = call32
702call33_stack = call33
703call34_stack = call34
704call35_stack = call35
705call36_stack = call36
706call37_stack = call37
707call38_stack = call38
708call39_stack = call39
709call40_stack = call40
710call41_stack = call41
711call42_stack = call42
712call43_stack = call43
713call44_stack = call44
714call45_stack = call45
715call46_stack = call46
716call47_stack = call47
717
718int32323232 zero = 0
719one = extern(_one)
720uint32323232 loopmask = 1
721loopmask <<= (8 * 15)
722loopmask <<= (6 % 8)
723uint32323232 prevextbit = 0
724
725swapendian = extern(_swapendian)
726shuf0_01 = extern(_shuf0_01)
727shuf0_2 = extern(_shuf0_2)
728shuf0_3 = extern(_shuf0_3)
729shuf1_01 = extern(_shuf1_01)
730shuf1_2 = extern(_shuf1_2)
731shuf1_3 = extern(_shuf1_3)
732shuf2_01 = extern(_shuf2_01)
733shuf2_2 = extern(_shuf2_2)
734shuf2_3 = extern(_shuf2_3)
735shuf3_01 = extern(_shuf3_01)
736shuf3_2 = extern(_shuf3_2)
737shuf3_3 = extern(_shuf3_3)
738mask12 = extern(_mask12)
739mask13 = extern(_mask13)
740selw0220 = extern(_selw0220)
741selw0105 = extern(_selw0105)
742selw2325 = extern(_selw2325)
743selw0433 = extern (_selw0433)
744selw261c0 = extern(_selw261c0)
745selw0342 = extern(_selw0342)
746selw1362 = extern(_selw1362)
747selw3333 = extern(_selw3333)
748sel01 = extern(select01)
749sel12 = extern(select12)
750sel23 = extern(select23)
751sel30 = extern(select30)
7522pconsts0 = extern(_2pconsts0)
7532pconsts = extern(_2pconsts)
754redcoeffmask = extern(redCoeffMask)
755redcoeffmaskend = extern(redCoeffMaskEnd)
756redcoeffmaskveryend = extern(redCoeffMaskVeryEnd)
757
758comb13 = extern(combine13)
759comb22 = extern(combine22)
760comb31 = extern(combine31)
761comb31 = extern(combine31)
762
7632p_03   = extern(_2p_03)
7642p_47   = extern(_2p_47)
7652p_811  = extern(_2p_811)
7662p_1215 = extern(_2p_1215)
7672p_1619 = extern(_2p_1619)
768
769a24vec = extern(_a24vec)
770
771# Initialization
772int32323232 done = 0
773sk = *(vec128 *) ((skp + 16) & ~15)
774sk = select bytes from sk by swapendian
775
776x1_03   = *(vec128 *) ((xp +  0) & ~15)
777x1_47   = *(vec128 *) ((xp + 16) & ~15)
778x1_811  = *(vec128 *) ((xp + 32) & ~15)
779x1_1215 = *(vec128 *) ((xp + 48) & ~15)
780x1_1619 = *(vec128 *) ((xp + 64) & ~15)
781
782x2_03     = one
783x2_47     = zero
784x2_811    = zero
785x2_1215   = zero
786x2_1619   = zero
787
788z2_03     = zero
789z2_47     = zero
790z2_811    = zero
791z2_1215   = zero
792z2_1619   = zero
793
794x3_03   = x1_03
795x3_47   = x1_47
796x3_811  = x1_811
797x3_1215 = x1_1215
798x3_1619 = x1_1619
799
800z3_03   = one
801z3_47   = zero
802z3_811  = zero
803z3_1215 = zero
804z3_1619 = zero
805
806loop:
807
808bit = sk & loopmask
809uint32323232 extbit = 0 - (bit > 0)
810tmp0 = extbit <<< (8*4)
811extbit ^= tmp0
812tmp0 = extbit <<< (8*8)
813extbit ^= tmp0
814loopmask >>= (1 % 8)
815
816####################################################################################
817#######################   Conditionally swap P2 and P3   ###########################
818####################################################################################
819
820flip = prevextbit ^ extbit
821nflip = ~(flip | zero)
822prevextbit = extbit
823
824tmp0 = x2_03   & nflip
825tmp1 = x3_03   & flip
826tmp2 = x2_03   & flip
827tmp3 = x3_03   & nflip
828x2_03 = tmp0 ^ tmp1
829x3_03 = tmp2 ^ tmp3
830
831tmp0 = x2_47   & nflip
832tmp1 = x3_47   & flip
833tmp2 = x2_47   & flip
834tmp3 = x3_47   & nflip
835x2_47 = tmp0 ^ tmp1
836x3_47 = tmp2 ^ tmp3
837
838tmp0 = x2_811  & nflip
839tmp1 = x3_811  & flip
840tmp2 = x2_811  & flip
841tmp3 = x3_811  & nflip
842x2_811 = tmp0 ^ tmp1
843x3_811 = tmp2 ^ tmp3
844
845tmp0 = x2_1215 & nflip
846tmp1 = x3_1215 & flip
847tmp2 = x2_1215 & flip
848tmp3 = x3_1215 & nflip
849x2_1215 = tmp0 ^ tmp1
850x3_1215 = tmp2 ^ tmp3
851
852tmp0 = x2_1619 & nflip
853tmp1 = x3_1619 & flip
854tmp2 = x2_1619 & flip
855tmp3 = x3_1619 & nflip
856x2_1619 = tmp0 ^ tmp1
857x3_1619 = tmp2 ^ tmp3
858
859tmp0 = z2_03   & nflip
860tmp1 = z3_03   & flip
861tmp2 = z2_03   & flip
862tmp3 = z3_03   & nflip
863z2_03 = tmp0 ^ tmp1
864z3_03 = tmp2 ^ tmp3
865
866tmp0 = z2_47   & nflip
867tmp1 = z3_47   & flip
868tmp2 = z2_47   & flip
869tmp3 = z3_47   & nflip
870z2_47 = tmp0 ^ tmp1
871z3_47 = tmp2 ^ tmp3
872
873tmp0 = z2_811  & nflip
874tmp1 = z3_811  & flip
875tmp2 = z2_811  & flip
876tmp3 = z3_811  & nflip
877z2_811 = tmp0 ^ tmp1
878z3_811 = tmp2 ^ tmp3
879
880tmp0 = z2_1215 & nflip
881tmp1 = z3_1215 & flip
882tmp2 = z2_1215 & flip
883tmp3 = z3_1215 & nflip
884z2_1215 = tmp0 ^ tmp1
885z3_1215 = tmp2 ^ tmp3
886
887tmp0 = z2_1619 & nflip
888tmp1 = z3_1619 & flip
889tmp2 = z2_1619 & flip
890tmp3 = z3_1619 & nflip
891z2_1619 = tmp0 ^ tmp1
892z3_1619 = tmp2 ^ tmp3
893
894
895###################################################################################
896#########################          A = X2+Z2         ##############################
897#########################          C = X3+Z3         ##############################
898#########################          B = X2-Z2         ##############################
899#########################          D = X3-Z3         ##############################
900###################################################################################
901
902int32323232 b_1619 = x2_1619 + 2p_1619
903int32323232 d_1619 = x3_1619 + 2p_1619
904int32323232 a_1619 = x2_1619 + z2_1619
905int32323232 c_1619 = x3_1619 + z3_1619
906int32323232 b_1619 -= z2_1619
907int32323232 d_1619 -= z3_1619
908
909
910int32323232 b_03 = x2_03 + 2p_03
911acbd16 = combine a_1619 and c_1619 by shuf0_01
912int32323232 d_03 = x3_03 + 2p_03
913acbd17 = combine a_1619 and c_1619 by shuf1_01
914int32323232 a_03 = x2_03 + z2_03
915acbd18 = combine a_1619 and c_1619 by shuf2_01
916int32323232 c_03 = x3_03 + z3_03
917acbd19 = combine a_1619 and c_1619 by shuf3_01
918int32323232 b_03 -= z2_03
919acbd16 = combine acbd16 and b_1619 by shuf0_2
920int32323232 d_03 -= z3_03
921acbd17 = combine acbd17 and b_1619 by shuf1_2
922
923int32323232 b_47 = x2_47 + 2p_47
924acbd18 = combine acbd18 and b_1619 by shuf2_2
925int32323232 d_47 = x3_47 + 2p_47
926acbd19 = combine acbd19 and b_1619 by shuf3_2
927int32323232 a_47 = x2_47 + z2_47
928acbd16 = combine acbd16 and d_1619 by shuf0_3
929int32323232 c_47 = x3_47 + z3_47
930acbd17 = combine acbd17 and d_1619 by shuf1_3
931int32323232 b_47 -= z2_47
932acbd18 = combine acbd18 and d_1619 by shuf2_3
933int32323232 d_47 -= z3_47
934acbd19 = combine acbd19 and d_1619 by shuf3_3
935
936int32323232 b_811 = x2_811 + 2p_811
937acbd0 = combine a_03 and c_03 by shuf0_01
938int32323232 d_811 = x3_811 + 2p_811
939acbd1 = combine a_03 and c_03 by shuf1_01
940int32323232 a_811 = x2_811 + z2_811
941acbd4 = combine a_47 and c_47 by shuf0_01
942int32323232 c_811 = x3_811 + z3_811
943acbd5 = combine a_47 and c_47 by shuf1_01
944int32323232 b_811 -= z2_811
945acbd0 = combine acbd0 and b_03 by shuf0_2
946int32323232 d_811 -= z3_811
947acbd1 = combine acbd1 and b_03 by shuf1_2
948
949int32323232 b_1215 = x2_1215 + 2p_1215
950acbd4 = combine acbd4 and b_47 by shuf0_2
951int32323232 d_1215 = x3_1215 + 2p_1215
952acbd5 = combine acbd5 and b_47 by shuf1_2
953int32323232 a_1215 = x2_1215 + z2_1215
954acbd0 = combine acbd0 and d_03 by shuf0_3
955int32323232 c_1215 = x3_1215 + z3_1215
956acbd1 = combine acbd1 and d_03 by shuf1_3
957int32323232 b_1215 -= z2_1215
958acbd4 = combine acbd4 and d_47 by shuf0_3
959int32323232 d_1215 -= z3_1215
960acbd5 = combine acbd5 and d_47 by shuf1_3
961
962uint32323232 carry = acbd19 >> 12
963acbd8 = combine a_811 and c_811 by shuf0_01
964
965acbd9 = combine a_811 and c_811 by shuf1_01
966acbd12 = combine a_1215 and c_1215 by shuf0_01
967
968acbd19 &= mask12
969acbd13 = combine a_1215 and c_1215 by shuf1_01
970
971uint32323232 carry = (carry & 0xffff) * 19
972acbd8 = combine acbd8 and b_811 by shuf0_2
973
974acbd9 = combine acbd9 and b_811 by shuf1_2
975acbd12 = combine acbd12 and b_1215 by shuf0_2
976acbd13 = combine acbd13 and b_1215 by shuf1_2
977acbd8 = combine acbd8 and d_811 by shuf0_3
978acbd9 = combine acbd9 and d_811 by shuf1_3
979acbd12 = combine acbd12 and d_1215 by shuf0_3
980
981int32323232 acbd0 += carry
982acbd13 = combine acbd13 and d_1215 by shuf1_3
983
984uint32323232 carry1 = acbd4  >> 13
985vec19 = extern(_vec19)
986uint32323232 carry0 = acbd0  >> 13
987lnop
988uint32323232 carry2 = acbd8  >> 13
989uint32323232 carry3 = acbd12 >> 13
990
991int32323232 acbd5  += carry1
992acbd2 = combine a_03 and c_03 by shuf2_01
993acbd4  &= mask13
994acbd6 = combine a_47 and c_47 by shuf2_01
995int32323232 acbd1  += carry0
996acbd10 = combine a_811 and c_811 by shuf2_01
997acbd0  &= mask13
998acbd14 = combine a_1215 and c_1215 by shuf2_01
999int32323232 acbd9  += carry2
1000acbd2 = combine acbd2 and b_03 by shuf2_2
1001acbd8  &= mask13
1002acbd6 = combine acbd6 and b_47 by shuf2_2
1003int32323232 acbd13 += carry3
1004acbd10 = combine acbd10 and b_811 by shuf2_2
1005acbd12 &= mask13
1006acbd14 = combine acbd14 and b_1215 by shuf2_2
1007
1008uint32323232 carry0 = acbd1  >> 13
1009acbd2 = combine acbd2 and d_03 by shuf2_3
1010uint32323232 carry1 = acbd5  >> 13
1011acbd6 = combine acbd6 and d_47 by shuf2_3
1012uint32323232 carry2 = acbd9  >> 13
1013acbd10 = combine acbd10 and d_811 by shuf2_3
1014uint32323232 carry3 = acbd13 >> 13
1015acbd14 = combine acbd14 and d_1215 by shuf2_3
1016
1017int32323232 acbd2  += carry0
1018acbd3 = combine a_03 and c_03 by shuf3_01
1019acbd1  &= mask13
1020acbd7 = combine a_47 and c_47 by shuf3_01
1021int32323232 acbd6  += carry1
1022acbd11 = combine a_811 and c_811 by shuf3_01
1023acbd5  &= mask13
1024acbd15 = combine a_1215 and c_1215 by shuf3_01
1025int32323232 acbd10 += carry2
1026acbd3 = combine acbd3 and b_03 by shuf3_2
1027acbd9  &= mask13
1028acbd7 = combine acbd7 and b_47 by shuf3_2
1029int32323232 acbd14 += carry3
1030acbd11 = combine acbd11 and b_811 by shuf3_2
1031acbd13 &= mask13
1032acbd15 = combine acbd15 and b_1215 by shuf3_2
1033
1034uint32323232 carry0 = acbd2  >> 13
1035acbd3 = combine acbd3 and d_03 by shuf3_3
1036uint32323232 carry1 = acbd6  >> 13
1037acbd7 = combine acbd7 and d_47 by shuf3_3
1038uint32323232 carry2 = acbd10 >> 13
1039acbd11 = combine acbd11 and d_811 by shuf3_3
1040uint32323232 carry3 = acbd14 >> 13
1041acbd15 = combine acbd15 and d_1215 by shuf3_3
1042
1043acbd2  &= mask13
1044acbd6  &= mask13
1045acbd10 &= mask13
1046acbd14 &= mask13
1047
1048int32323232 acbd3  += carry0
1049int32323232 acbd7  += carry1
1050int32323232 acbd11 += carry2
1051int32323232 acbd15 += carry3
1052
1053uint32323232 carry0 = acbd3  >> 12
1054uint32323232 carry1 = acbd7  >> 12
1055uint32323232 carry2 = acbd11 >> 12
1056uint32323232 carry3 = acbd15 >> 12
1057
1058acbd3  &= mask12
1059acbd7  &= mask12
1060acbd11 &= mask12
1061abba0  = select bytes from acbd0  by selw0220
1062acbd15 &= mask12
1063abba1  = select bytes from acbd1  by selw0220
1064
1065int32323232 acbd4  += carry0
1066abba2  = select bytes from acbd2  by selw0220
1067int32323232 acbd8  += carry1
1068abba3  = select bytes from acbd3  by selw0220
1069int32323232 acbd12 += carry2
1070int32323232 acbd16 += carry3
1071
1072uint32323232 carry1 = acbd4  >> 13
1073uint32323232 carry2 = acbd8  >> 13
1074uint32323232 carry3 = acbd12 >> 13
1075uint32323232 carry4 = acbd16 >> 13
1076
1077acbd4  &= mask13
1078acbd8  &= mask13
1079acbd12 &= mask13
1080abba4  = select bytes from acbd4  by selw0220
1081acbd16  &= mask13
1082abba8  = select bytes from acbd8  by selw0220
1083
1084int32323232 acbd5  += carry1
1085abba12 = select bytes from acbd12 by selw0220
1086int32323232 acbd9  += carry2
1087abba16 = select bytes from acbd16 by selw0220
1088int32323232 acbd13 += carry3
1089int32323232 acbd17  += carry4
1090
1091uint32323232 carry1 = acbd5  >> 13
1092uint32323232 carry2 = acbd9  >> 13
1093uint32323232 carry3 = acbd13 >> 13
1094uint32323232 carry4 = acbd17 >> 13
1095
1096acbd5  &= mask13
1097acbd9  &= mask13
1098acbd13 &= mask13
1099abba5  = select bytes from acbd5  by selw0220
1100acbd17 &= mask13
1101abba9  = select bytes from acbd9  by selw0220
1102
1103int32323232 acbd6  += carry1
1104abba13 = select bytes from acbd13 by selw0220
1105int32323232 acbd10 += carry2
1106abba17 = select bytes from acbd17 by selw0220
1107int32323232 acbd14 += carry3
1108int32323232 acbd18 += carry4
1109
1110uint32323232 carry1 = acbd6  >> 13
1111uint32323232 carry2 = acbd10 >> 13
1112uint32323232 carry3 = acbd14 >> 13
1113uint32323232 carry4 = acbd18  >> 13
1114
1115acbd6  &= mask13
1116acbd10 &= mask13
1117acbd14 &= mask13
1118abba6  = select bytes from acbd6  by selw0220
1119acbd18  &= mask13
1120abba10 = select bytes from acbd10 by selw0220
1121
1122int32323232 acbd7  += carry1
1123abba14 = select bytes from acbd14 by selw0220
1124int32323232 acbd11 += carry2
1125abba18 = select bytes from acbd18 by selw0220
1126int32323232 acbd15 += carry3
1127int32323232 acbd19  += carry4
1128
1129
1130
1131###################################################################################
1132#########################          AA = A^2          ##############################
1133#########################          CB = C*B          ##############################
1134#########################          BB = B^2          ##############################
1135#########################          DA = D*A          ##############################
1136###################################################################################
1137
1138int32323232 aacbbbda0  = (acbd0 & 0xffff) * (abba0 & 0xffff)
1139abba7  = select bytes from acbd7  by selw0220
1140int32323232 aacbbbda1  = (acbd0 & 0xffff) * (abba1 & 0xffff)
1141abba11 = select bytes from acbd11 by selw0220
1142int32323232 aacbbbda2  = (acbd0 & 0xffff) * (abba2 & 0xffff)
1143abba15 = select bytes from acbd15 by selw0220
1144int32323232 aacbbbda3  = (acbd0 & 0xffff) * (abba3 & 0xffff)
1145abba19 = select bytes from acbd19 by selw0220
1146int32323232 aacbbbda4  = (acbd1 & 0xffff) * (abba3 & 0xffff)
1147int32323232 aacbbbda5  = (acbd2 & 0xffff) * (abba3 & 0xffff)
1148int32323232 aacbbbda6  = (acbd3 & 0xffff) * (abba3 & 0xffff)
1149int32323232 aacbbbda7  = (acbd0 & 0xffff) * (abba7 & 0xffff)
1150int32323232 aacbbbda1 += (acbd1 & 0xffff) * (abba0 & 0xffff)
1151int32323232 aacbbbda2 += (acbd1 & 0xffff) * (abba1 & 0xffff)
1152int32323232 aacbbbda3 += (acbd1 & 0xffff) * (abba2 & 0xffff)
1153int32323232 aacbbbda4 += (acbd2 & 0xffff) * (abba2 & 0xffff)
1154int32323232 aacbbbda5 += (acbd3 & 0xffff) * (abba2 & 0xffff)
1155int32323232 aacbbbda6 <<= 1
1156int32323232 aacbbbda7 += (acbd1 & 0xffff) * (abba6 & 0xffff)
1157int32323232 aacbbbda8  = (acbd1 & 0xffff) * (abba7 & 0xffff)
1158int32323232 aacbbbda2 += (acbd2 & 0xffff) * (abba0 & 0xffff)
1159int32323232 aacbbbda3 += (acbd2 & 0xffff) * (abba1 & 0xffff)
1160int32323232 aacbbbda4 += (acbd3 & 0xffff) * (abba1 & 0xffff)
1161int32323232 aacbbbda5 <<= 1
1162int32323232 aacbbbda6 += (acbd0 & 0xffff) * (abba6 & 0xffff)
1163int32323232 aacbbbda7 += (acbd2 & 0xffff) * (abba5 & 0xffff)
1164int32323232 aacbbbda8 += (acbd2 & 0xffff) * (abba6 & 0xffff)
1165int32323232 aacbbbda9  = (acbd2 & 0xffff) * (abba7 & 0xffff)
1166int32323232 aacbbbda3 += (acbd3 & 0xffff) * (abba0 & 0xffff)
1167int32323232 aacbbbda4 <<= 1
1168int32323232 aacbbbda5 += (acbd0 & 0xffff) * (abba5 & 0xffff)
1169int32323232 aacbbbda6 += (acbd1 & 0xffff) * (abba5 & 0xffff)
1170int32323232 aacbbbda7 += (acbd3 & 0xffff) * (abba4 & 0xffff)
1171int32323232 aacbbbda8 += (acbd3 & 0xffff) * (abba5 & 0xffff)
1172int32323232 aacbbbda9 += (acbd3 & 0xffff) * (abba6 & 0xffff)
1173int32323232 aacbbbda10  = (acbd3  & 0xffff) * (abba7  & 0xffff)
1174int32323232 aacbbbda4 += (acbd0 & 0xffff) * (abba4 & 0xffff)
1175int32323232 aacbbbda5 += (acbd1 & 0xffff) * (abba4 & 0xffff)
1176int32323232 aacbbbda6 += (acbd2 & 0xffff) * (abba4 & 0xffff)
1177int32323232 aacbbbda7 += (acbd4 & 0xffff) * (abba3 & 0xffff)
1178int32323232 aacbbbda8 += (acbd5 & 0xffff) * (abba3 & 0xffff)
1179int32323232 aacbbbda9 += (acbd6 & 0xffff) * (abba3 & 0xffff)
1180int32323232 aacbbbda10 += (acbd7  & 0xffff) * (abba3  & 0xffff)
1181int32323232 aacbbbda11  = (acbd11 & 0xffff) * (abba0  & 0xffff)
1182int32323232 aacbbbda4 += (acbd4 & 0xffff) * (abba0 & 0xffff)
1183int32323232 aacbbbda5 += (acbd4 & 0xffff) * (abba1 & 0xffff)
1184int32323232 aacbbbda6 += (acbd4 & 0xffff) * (abba2 & 0xffff)
1185int32323232 aacbbbda7 += (acbd5 & 0xffff) * (abba2 & 0xffff)
1186int32323232 aacbbbda8 += (acbd6 & 0xffff) * (abba2 & 0xffff)
1187int32323232 aacbbbda9 += (acbd7 & 0xffff) * (abba2 & 0xffff)
1188int32323232 aacbbbda10 <<= 1
1189int32323232 aacbbbda11 += (acbd10 & 0xffff) * (abba1  & 0xffff)
1190int32323232 aacbbbda12  = (acbd1  & 0xffff) * (abba11 & 0xffff)
1191int32323232 aacbbbda5 += (acbd5 & 0xffff) * (abba0 & 0xffff)
1192int32323232 aacbbbda6 += (acbd5 & 0xffff) * (abba1 & 0xffff)
1193int32323232 aacbbbda7 += (acbd6 & 0xffff) * (abba1 & 0xffff)
1194int32323232 aacbbbda8 += (acbd7 & 0xffff) * (abba1 & 0xffff)
1195int32323232 aacbbbda9 <<= 1
1196int32323232 aacbbbda10 += (acbd0  & 0xffff) * (abba10 & 0xffff)
1197int32323232 aacbbbda11 += (acbd9  & 0xffff) * (abba2  & 0xffff)
1198int32323232 aacbbbda12 += (acbd2  & 0xffff) * (abba10 & 0xffff)
1199int32323232 aacbbbda6 += (acbd6 & 0xffff) * (abba0 & 0xffff)
1200int32323232 aacbbbda7 += (acbd7 & 0xffff) * (abba0 & 0xffff)
1201int32323232 aacbbbda8 <<= 1
1202int32323232 aacbbbda9 += (acbd0 & 0xffff) * (abba9 & 0xffff)
1203int32323232 aacbbbda10 += (acbd1  & 0xffff) * (abba9  & 0xffff)
1204int32323232 aacbbbda11 += (acbd8  & 0xffff) * (abba3  & 0xffff)
1205int32323232 aacbbbda12 += (acbd3  & 0xffff) * (abba9  & 0xffff)
1206int32323232 aacbbbda13  = (acbd2  & 0xffff) * (abba11 & 0xffff)
1207int32323232 aacbbbda14  = (acbd3  & 0xffff) * (abba11 & 0xffff)
1208int32323232 aacbbbda8 += (acbd0 & 0xffff) * (abba8 & 0xffff)
1209int32323232 aacbbbda9 += (acbd1 & 0xffff) * (abba8 & 0xffff)
1210int32323232 aacbbbda10 += (acbd2  & 0xffff) * (abba8  & 0xffff)
1211int32323232 aacbbbda11 += (acbd7  & 0xffff) * (abba4  & 0xffff)
1212int32323232 aacbbbda12 += (acbd5  & 0xffff) * (abba7  & 0xffff)
1213int32323232 aacbbbda13 += (acbd3  & 0xffff) * (abba10 & 0xffff)
1214int32323232 aacbbbda14 += (acbd7  & 0xffff) * (abba7  & 0xffff)
1215int32323232 aacbbbda8 += (acbd4 & 0xffff) * (abba4 & 0xffff)
1216int32323232 aacbbbda9 += (acbd4 & 0xffff) * (abba5 & 0xffff)
1217int32323232 aacbbbda10 += (acbd4  & 0xffff) * (abba6  & 0xffff)
1218int32323232 aacbbbda11 += (acbd6  & 0xffff) * (abba5  & 0xffff)
1219int32323232 aacbbbda12 += (acbd6  & 0xffff) * (abba6  & 0xffff)
1220int32323232 aacbbbda13 += (acbd6  & 0xffff) * (abba7  & 0xffff)
1221int32323232 aacbbbda14 += (acbd11 & 0xffff) * (abba3  & 0xffff)
1222int32323232 aacbbbda8 += (acbd8 & 0xffff) * (abba0 & 0xffff)
1223int32323232 aacbbbda9 += (acbd5 & 0xffff) * (abba4 & 0xffff)
1224int32323232 aacbbbda10 += (acbd5  & 0xffff) * (abba5  & 0xffff)
1225int32323232 aacbbbda11 += (acbd5  & 0xffff) * (abba6  & 0xffff)
1226int32323232 aacbbbda12 += (acbd7  & 0xffff) * (abba5  & 0xffff)
1227int32323232 aacbbbda13 += (acbd7  & 0xffff) * (abba6  & 0xffff)
1228int32323232 aacbbbda14 <<= 1
1229int32323232 aacbbbda15  = (acbd0  & 0xffff) * (abba15 & 0xffff)
1230int32323232 aacbbbda9 += (acbd8 & 0xffff) * (abba1 & 0xffff)
1231int32323232 aacbbbda10 += (acbd6  & 0xffff) * (abba4  & 0xffff)
1232int32323232 aacbbbda11 += (acbd4  & 0xffff) * (abba7  & 0xffff)
1233int32323232 aacbbbda12 += (acbd9  & 0xffff) * (abba3  & 0xffff)
1234int32323232 aacbbbda13 += (acbd10 & 0xffff) * (abba3  & 0xffff)
1235int32323232 aacbbbda14 += (acbd0  & 0xffff) * (abba14 & 0xffff)
1236int32323232 aacbbbda15 += (acbd1  & 0xffff) * (abba14 & 0xffff)
1237int32323232 aacbbbda9 += (acbd9 & 0xffff) * (abba0 & 0xffff)
1238int32323232 aacbbbda10 += (acbd8  & 0xffff) * (abba2  & 0xffff)
1239int32323232 aacbbbda11 += (acbd3  & 0xffff) * (abba8  & 0xffff)
1240int32323232 aacbbbda12 += (acbd10 & 0xffff) * (abba2  & 0xffff)
1241int32323232 aacbbbda13 += (acbd11 & 0xffff) * (abba2  & 0xffff)
1242int32323232 aacbbbda14 += (acbd1  & 0xffff) * (abba13 & 0xffff)
1243int32323232 aacbbbda15 += (acbd2  & 0xffff) * (abba13 & 0xffff)
1244int32323232 aacbbbda16  = (acbd1  & 0xffff) * (abba15 & 0xffff)
1245int32323232 aacbbbda10 += (acbd9  & 0xffff) * (abba1  & 0xffff)
1246int32323232 aacbbbda11 += (acbd2  & 0xffff) * (abba9  & 0xffff)
1247int32323232 aacbbbda12 += (acbd11 & 0xffff) * (abba1  & 0xffff)
1248int32323232 aacbbbda13 <<= 1
1249int32323232 aacbbbda14 += (acbd2  & 0xffff) * (abba12 & 0xffff)
1250int32323232 aacbbbda15 += (acbd3  & 0xffff) * (abba12 & 0xffff)
1251int32323232 aacbbbda16 += (acbd2  & 0xffff) * (abba14 & 0xffff)
1252int32323232 aacbbbda10 += (acbd10 & 0xffff) * (abba0  & 0xffff)
1253int32323232 aacbbbda11 += (acbd1  & 0xffff) * (abba10 & 0xffff)
1254int32323232 aacbbbda12 <<= 1
1255int32323232 aacbbbda13 += (acbd0  & 0xffff) * (abba13 & 0xffff)
1256int32323232 aacbbbda14 += (acbd4  & 0xffff) * (abba10 & 0xffff)
1257int32323232 aacbbbda15 += (acbd4  & 0xffff) * (abba11 & 0xffff)
1258int32323232 aacbbbda16 += (acbd3  & 0xffff) * (abba13 & 0xffff)
1259int32323232 aacbbbda17  = (acbd2  & 0xffff) * (abba15 & 0xffff)
1260int32323232 aacbbbda11 += (acbd0  & 0xffff) * (abba11 & 0xffff)
1261int32323232 aacbbbda12 += (acbd0  & 0xffff) * (abba12 & 0xffff)
1262int32323232 aacbbbda13 += (acbd1  & 0xffff) * (abba12 & 0xffff)
1263int32323232 aacbbbda14 += (acbd5  & 0xffff) * (abba9  & 0xffff)
1264int32323232 aacbbbda15 += (acbd5  & 0xffff) * (abba10 & 0xffff)
1265int32323232 aacbbbda16 += (acbd5  & 0xffff) * (abba11 & 0xffff)
1266int32323232 aacbbbda17 += (acbd3  & 0xffff) * (abba14 & 0xffff)
1267int32323232 aacbbbda18  = (acbd3  & 0xffff) * (abba15 & 0xffff)
1268int32323232 aacbbbda12 += (acbd4  & 0xffff) * (abba8  & 0xffff)
1269int32323232 aacbbbda13 += (acbd4  & 0xffff) * (abba9  & 0xffff)
1270int32323232 aacbbbda14 += (acbd6  & 0xffff) * (abba8  & 0xffff)
1271int32323232 aacbbbda15 += (acbd6  & 0xffff) * (abba9  & 0xffff)
1272int32323232 aacbbbda16 += (acbd6  & 0xffff) * (abba10 & 0xffff)
1273int32323232 aacbbbda17 += (acbd6  & 0xffff) * (abba11 & 0xffff)
1274int32323232 aacbbbda18 += (acbd7  & 0xffff) * (abba11 & 0xffff)
1275int32323232 aacbbbda12 += (acbd8  & 0xffff) * (abba4  & 0xffff)
1276int32323232 aacbbbda13 += (acbd5  & 0xffff) * (abba8  & 0xffff)
1277int32323232 aacbbbda14 += (acbd8  & 0xffff) * (abba6  & 0xffff)
1278int32323232 aacbbbda15 += (acbd7  & 0xffff) * (abba8  & 0xffff)
1279int32323232 aacbbbda16 += (acbd7  & 0xffff) * (abba9  & 0xffff)
1280int32323232 aacbbbda17 += (acbd7  & 0xffff) * (abba10 & 0xffff)
1281int32323232 aacbbbda18 += (acbd11 & 0xffff) * (abba7  & 0xffff)
1282int32323232 aacbbbda12 += (acbd12 & 0xffff) * (abba0  & 0xffff)
1283int32323232 aacbbbda13 += (acbd8  & 0xffff) * (abba5  & 0xffff)
1284int32323232 aacbbbda14 += (acbd9  & 0xffff) * (abba5  & 0xffff)
1285int32323232 aacbbbda15 += (acbd8  & 0xffff) * (abba7  & 0xffff)
1286int32323232 aacbbbda16 += (acbd9  & 0xffff) * (abba7  & 0xffff)
1287int32323232 aacbbbda17 += (acbd10 & 0xffff) * (abba7  & 0xffff)
1288int32323232 aacbbbda18 += (acbd15 & 0xffff) * (abba3  & 0xffff)
1289int32323232 aacbbbda19  = (acbd0  & 0xffff) * (abba19 & 0xffff)
1290int32323232 aacbbbda13 += (acbd9  & 0xffff) * (abba4  & 0xffff)
1291int32323232 aacbbbda14 += (acbd10 & 0xffff) * (abba4  & 0xffff)
1292int32323232 aacbbbda15 += (acbd9  & 0xffff) * (abba6  & 0xffff)
1293int32323232 aacbbbda16 += (acbd10 & 0xffff) * (abba6  & 0xffff)
1294int32323232 aacbbbda17 += (acbd11 & 0xffff) * (abba6  & 0xffff)
1295int32323232 aacbbbda18 <<= 1
1296int32323232 aacbbbda19 += (acbd1  & 0xffff) * (abba18 & 0xffff)
1297int32323232 aacbbbda13 += (acbd12 & 0xffff) * (abba1  & 0xffff)
1298int32323232 aacbbbda14 += (acbd12 & 0xffff) * (abba2  & 0xffff)
1299int32323232 aacbbbda15 += (acbd10 & 0xffff) * (abba5  & 0xffff)
1300int32323232 aacbbbda16 += (acbd11 & 0xffff) * (abba5  & 0xffff)
1301int32323232 aacbbbda17 += (acbd14 & 0xffff) * (abba3  & 0xffff)
1302int32323232 aacbbbda18 += (acbd0  & 0xffff) * (abba18 & 0xffff)
1303int32323232 aacbbbda19 += (acbd2  & 0xffff) * (abba17 & 0xffff)
1304int32323232 aacbbbda13 += (acbd13 & 0xffff) * (abba0  & 0xffff)
1305int32323232 aacbbbda14 += (acbd13 & 0xffff) * (abba1  & 0xffff)
1306int32323232 aacbbbda15 += (acbd11 & 0xffff) * (abba4  & 0xffff)
1307int32323232 aacbbbda16 += (acbd13 & 0xffff) * (abba3  & 0xffff)
1308int32323232 aacbbbda17 += (acbd15 & 0xffff) * (abba2  & 0xffff)
1309int32323232 aacbbbda18 += (acbd1  & 0xffff) * (abba17 & 0xffff)
1310int32323232 aacbbbda19 += (acbd3  & 0xffff) * (abba16 & 0xffff)
1311int32323232 aacbbbda20  = (acbd1  & 0xffff) * (abba19 & 0xffff)
1312int32323232 aacbbbda14 += (acbd14 & 0xffff) * (abba0  & 0xffff)
1313int32323232 aacbbbda15 += (acbd12 & 0xffff) * (abba3  & 0xffff)
1314int32323232 aacbbbda16 += (acbd14 & 0xffff) * (abba2  & 0xffff)
1315int32323232 aacbbbda17 <<= 1
1316int32323232 aacbbbda18 += (acbd2  & 0xffff) * (abba16 & 0xffff)
1317int32323232 aacbbbda19 += (acbd4  & 0xffff) * (abba15 & 0xffff)
1318int32323232 aacbbbda20 += (acbd2  & 0xffff) * (abba18 & 0xffff)
1319int32323232 aacbbbda21  = (acbd2  & 0xffff) * (abba19 & 0xffff)
1320int32323232 aacbbbda15 += (acbd13 & 0xffff) * (abba2  & 0xffff)
1321int32323232 aacbbbda16 += (acbd15 & 0xffff) * (abba1  & 0xffff)
1322int32323232 aacbbbda17 += (acbd0  & 0xffff) * (abba17 & 0xffff)
1323int32323232 aacbbbda18 += (acbd4  & 0xffff) * (abba14 & 0xffff)
1324int32323232 aacbbbda19 += (acbd5  & 0xffff) * (abba14 & 0xffff)
1325int32323232 aacbbbda20 += (acbd3  & 0xffff) * (abba17 & 0xffff)
1326int32323232 aacbbbda21 += (acbd3  & 0xffff) * (abba18 & 0xffff)
1327int32323232 aacbbbda15 += (acbd14 & 0xffff) * (abba1  & 0xffff)
1328int32323232 aacbbbda16 <<= 1
1329int32323232 aacbbbda17 += (acbd1  & 0xffff) * (abba16 & 0xffff)
1330int32323232 aacbbbda18 += (acbd5  & 0xffff) * (abba13 & 0xffff)
1331int32323232 aacbbbda19 += (acbd6  & 0xffff) * (abba13 & 0xffff)
1332int32323232 aacbbbda20 += (acbd5  & 0xffff) * (abba15 & 0xffff)
1333int32323232 aacbbbda21 += (acbd6  & 0xffff) * (abba15 & 0xffff)
1334int32323232 aacbbbda15 += (acbd15 & 0xffff) * (abba0  & 0xffff)
1335int32323232 aacbbbda16 += (acbd0  & 0xffff) * (abba16 & 0xffff)
1336int32323232 aacbbbda17 += (acbd4  & 0xffff) * (abba13 & 0xffff)
1337int32323232 aacbbbda18 += (acbd6  & 0xffff) * (abba12 & 0xffff)
1338int32323232 aacbbbda19 += (acbd7  & 0xffff) * (abba12 & 0xffff)
1339int32323232 aacbbbda20 += (acbd6  & 0xffff) * (abba14 & 0xffff)
1340int32323232 aacbbbda21 += (acbd7  & 0xffff) * (abba14 & 0xffff)
1341int32323232 aacbbbda22  = (acbd3  & 0xffff) * (abba19 & 0xffff)
1342int32323232 aacbbbda16 += (acbd4  & 0xffff) * (abba12 & 0xffff)
1343int32323232 aacbbbda17 += (acbd5  & 0xffff) * (abba12 & 0xffff)
1344int32323232 aacbbbda18 += (acbd8  & 0xffff) * (abba10 & 0xffff)
1345int32323232 aacbbbda19 += (acbd8  & 0xffff) * (abba11 & 0xffff)
1346int32323232 aacbbbda20 += (acbd7  & 0xffff) * (abba13 & 0xffff)
1347int32323232 aacbbbda21 += (acbd10 & 0xffff) * (abba11 & 0xffff)
1348int32323232 aacbbbda22 += (acbd7  & 0xffff) * (abba15 & 0xffff)
1349int32323232 aacbbbda16 += (acbd8  & 0xffff) * (abba8  & 0xffff)
1350int32323232 aacbbbda17 += (acbd8  & 0xffff) * (abba9  & 0xffff)
1351int32323232 aacbbbda18 += (acbd9  & 0xffff) * (abba9  & 0xffff)
1352int32323232 aacbbbda19 += (acbd9  & 0xffff) * (abba10 & 0xffff)
1353int32323232 aacbbbda20 += (acbd9  & 0xffff) * (abba11 & 0xffff)
1354int32323232 aacbbbda21 += (acbd11 & 0xffff) * (abba10 & 0xffff)
1355int32323232 aacbbbda22 += (acbd11 & 0xffff) * (abba11 & 0xffff)
1356int32323232 aacbbbda16 += (acbd12 & 0xffff) * (abba4  & 0xffff)
1357int32323232 aacbbbda17 += (acbd9  & 0xffff) * (abba8  & 0xffff)
1358int32323232 aacbbbda18 += (acbd10 & 0xffff) * (abba8  & 0xffff)
1359int32323232 aacbbbda19 += (acbd10 & 0xffff) * (abba9  & 0xffff)
1360int32323232 aacbbbda20 += (acbd10 & 0xffff) * (abba10 & 0xffff)
1361int32323232 aacbbbda21 += (acbd14 & 0xffff) * (abba7  & 0xffff)
1362int32323232 aacbbbda22 += (acbd15 & 0xffff) * (abba7  & 0xffff)
1363int32323232 aacbbbda16 += (acbd16 & 0xffff) * (abba0  & 0xffff)
1364int32323232 aacbbbda17 += (acbd12 & 0xffff) * (abba5  & 0xffff)
1365int32323232 aacbbbda18 += (acbd12 & 0xffff) * (abba6  & 0xffff)
1366int32323232 aacbbbda19 += (acbd11 & 0xffff) * (abba8  & 0xffff)
1367int32323232 aacbbbda20 += (acbd11 & 0xffff) * (abba9  & 0xffff)
1368int32323232 aacbbbda21 += (acbd15 & 0xffff) * (abba6  & 0xffff)
1369int32323232 aacbbbda22 += (acbd19 & 0xffff) * (abba3  & 0xffff)
1370int32323232 aacbbbda23  = (acbd4  & 0xffff) * (abba19 & 0xffff)
1371int32323232 aacbbbda17 += (acbd13 & 0xffff) * (abba4  & 0xffff)
1372int32323232 aacbbbda18 += (acbd13 & 0xffff) * (abba5  & 0xffff)
1373int32323232 aacbbbda19 += (acbd12 & 0xffff) * (abba7  & 0xffff)
1374int32323232 aacbbbda20 += (acbd13 & 0xffff) * (abba7  & 0xffff)
1375int32323232 aacbbbda21 += (acbd18 & 0xffff) * (abba3  & 0xffff)
1376int32323232 aacbbbda22 <<= 1
1377int32323232 aacbbbda23 += (acbd5  & 0xffff) * (abba18 & 0xffff)
1378int32323232 aacbbbda17 += (acbd16 & 0xffff) * (abba1  & 0xffff)
1379int32323232 aacbbbda18 += (acbd14 & 0xffff) * (abba4  & 0xffff)
1380int32323232 aacbbbda19 += (acbd13 & 0xffff) * (abba6  & 0xffff)
1381int32323232 aacbbbda20 += (acbd14 & 0xffff) * (abba6  & 0xffff)
1382int32323232 aacbbbda21 += (acbd19 & 0xffff) * (abba2  & 0xffff)
1383int32323232 aacbbbda22 += (acbd4  & 0xffff) * (abba18 & 0xffff)
1384int32323232 aacbbbda23 += (acbd6  & 0xffff) * (abba17 & 0xffff)
1385int32323232 aacbbbda17 += (acbd17 & 0xffff) * (abba0  & 0xffff)
1386int32323232 aacbbbda18 += (acbd16 & 0xffff) * (abba2  & 0xffff)
1387int32323232 aacbbbda19 += (acbd14 & 0xffff) * (abba5  & 0xffff)
1388int32323232 aacbbbda20 += (acbd15 & 0xffff) * (abba5  & 0xffff)
1389int32323232 aacbbbda21 <<= 1
1390int32323232 aacbbbda22 += (acbd5  & 0xffff) * (abba17 & 0xffff)
1391int32323232 aacbbbda23 += (acbd7  & 0xffff) * (abba16 & 0xffff)
1392int32323232 aacbbbda24  = (acbd5  & 0xffff) * (abba19  & 0xffff)
1393int32323232 aacbbbda18 += (acbd17 & 0xffff) * (abba1  & 0xffff)
1394int32323232 aacbbbda19 += (acbd15 & 0xffff) * (abba4  & 0xffff)
1395int32323232 aacbbbda20 += (acbd17 & 0xffff) * (abba3  & 0xffff)
1396int32323232 aacbbbda21 += (acbd4  & 0xffff) * (abba17  & 0xffff)
1397int32323232 aacbbbda22 += (acbd6  & 0xffff) * (abba16 & 0xffff)
1398int32323232 aacbbbda23 += (acbd8  & 0xffff) * (abba15 & 0xffff)
1399int32323232 aacbbbda24 += (acbd6  & 0xffff) * (abba18  & 0xffff)
1400int32323232 aacbbbda18 += (acbd18 & 0xffff) * (abba0  & 0xffff)
1401int32323232 aacbbbda19 += (acbd16 & 0xffff) * (abba3  & 0xffff)
1402int32323232 aacbbbda20 += (acbd18 & 0xffff) * (abba2  & 0xffff)
1403int32323232 aacbbbda21 += (acbd5  & 0xffff) * (abba16  & 0xffff)
1404int32323232 aacbbbda22 += (acbd8  & 0xffff) * (abba14 & 0xffff)
1405int32323232 aacbbbda23 += (acbd9  & 0xffff) * (abba14 & 0xffff)
1406int32323232 aacbbbda24 += (acbd7  & 0xffff) * (abba17  & 0xffff)
1407int32323232 aacbbbda25  = (acbd6  & 0xffff) * (abba19 & 0xffff)
1408int32323232 aacbbbda19 += (acbd17 & 0xffff) * (abba2  & 0xffff)
1409int32323232 aacbbbda20 += (acbd19 & 0xffff) * (abba1  & 0xffff)
1410int32323232 aacbbbda21 += (acbd8  & 0xffff) * (abba13  & 0xffff)
1411int32323232 aacbbbda22 += (acbd9  & 0xffff) * (abba13 & 0xffff)
1412int32323232 aacbbbda23 += (acbd10 & 0xffff) * (abba13 & 0xffff)
1413int32323232 aacbbbda24 += (acbd9  & 0xffff) * (abba15  & 0xffff)
1414int32323232 aacbbbda25 += (acbd7  & 0xffff) * (abba18 & 0xffff)
1415int32323232 aacbbbda19 += (acbd18 & 0xffff) * (abba1  & 0xffff)
1416int32323232 aacbbbda20 <<= 1
1417int32323232 aacbbbda21 += (acbd9  & 0xffff) * (abba12  & 0xffff)
1418int32323232 aacbbbda22 += (acbd10 & 0xffff) * (abba12 & 0xffff)
1419int32323232 aacbbbda23 += (acbd11 & 0xffff) * (abba12 & 0xffff)
1420int32323232 aacbbbda24 += (acbd10 & 0xffff) * (abba14  & 0xffff)
1421int32323232 aacbbbda25 += (acbd10 & 0xffff) * (abba15 & 0xffff)
1422int32323232 aacbbbda19 += (acbd19 & 0xffff) * (abba0  & 0xffff)
1423int32323232 aacbbbda20 += (acbd4  & 0xffff) * (abba16 & 0xffff)
1424int32323232 aacbbbda21 += (acbd12 & 0xffff) * (abba9  & 0xffff)
1425int32323232 aacbbbda22 += (acbd12 & 0xffff) * (abba10 & 0xffff)
1426int32323232 aacbbbda23 += (acbd12 & 0xffff) * (abba11 & 0xffff)
1427int32323232 aacbbbda24 += (acbd11 & 0xffff) * (abba13  & 0xffff)
1428int32323232 aacbbbda25 += (acbd11 & 0xffff) * (abba14 & 0xffff)
1429int32323232 aacbbbda26  = (acbd7  & 0xffff) * (abba19 & 0xffff)
1430int32323232 aacbbbda20 += (acbd8  & 0xffff) * (abba12 & 0xffff)
1431int32323232 aacbbbda21 += (acbd13 & 0xffff) * (abba8  & 0xffff)
1432int32323232 aacbbbda22 += (acbd13 & 0xffff) * (abba9  & 0xffff)
1433int32323232 aacbbbda23 += (acbd13 & 0xffff) * (abba10 & 0xffff)
1434int32323232 aacbbbda24 += (acbd13 & 0xffff) * (abba11  & 0xffff)
1435int32323232 aacbbbda25 += (acbd14 & 0xffff) * (abba11 & 0xffff)
1436int32323232 aacbbbda26 += (acbd11 & 0xffff) * (abba15 & 0xffff)
1437int32323232 aacbbbda20 += (acbd12 & 0xffff) * (abba8  & 0xffff)
1438int32323232 aacbbbda21 += (acbd16 & 0xffff) * (abba5  & 0xffff)
1439int32323232 aacbbbda22 += (acbd14 & 0xffff) * (abba8  & 0xffff)
1440int32323232 aacbbbda23 += (acbd14 & 0xffff) * (abba9  & 0xffff)
1441int32323232 aacbbbda24 += (acbd14 & 0xffff) * (abba10  & 0xffff)
1442int32323232 aacbbbda25 += (acbd15 & 0xffff) * (abba10 & 0xffff)
1443int32323232 aacbbbda26 += (acbd15 & 0xffff) * (abba11 & 0xffff)
1444int32323232 aacbbbda20 += (acbd16 & 0xffff) * (abba4  & 0xffff)
1445int32323232 aacbbbda21 += (acbd17 & 0xffff) * (abba4  & 0xffff)
1446int32323232 aacbbbda22 += (acbd16 & 0xffff) * (abba6  & 0xffff)
1447int32323232 aacbbbda23 += (acbd15 & 0xffff) * (abba8  & 0xffff)
1448int32323232 aacbbbda24 += (acbd15 & 0xffff) * (abba9  & 0xffff)
1449int32323232 aacbbbda25 += (acbd18 & 0xffff) * (abba7  & 0xffff)
1450int32323232 aacbbbda26 += (acbd19 & 0xffff) * (abba7  & 0xffff)
1451int32323232 aacbbbda27  = (acbd8  & 0xffff) * (abba19 & 0xffff)
1452int32323232 aacbbbda28  = (acbd9  & 0xffff) * (abba19 & 0xffff)
1453int32323232 aacbbbda22 += (acbd17 & 0xffff) * (abba5  & 0xffff)
1454int32323232 aacbbbda23 += (acbd16 & 0xffff) * (abba7  & 0xffff)
1455int32323232 aacbbbda24 += (acbd17 & 0xffff) * (abba7  & 0xffff)
1456int32323232 aacbbbda25 += (acbd19 & 0xffff) * (abba6  & 0xffff)
1457int32323232 aacbbbda26 <<= 1
1458int32323232 aacbbbda27 += (acbd9  & 0xffff) * (abba18 & 0xffff)
1459int32323232 aacbbbda28 += (acbd10 & 0xffff) * (abba18 & 0xffff)
1460int32323232 aacbbbda22 += (acbd18 & 0xffff) * (abba4  & 0xffff)
1461int32323232 aacbbbda23 += (acbd17 & 0xffff) * (abba6  & 0xffff)
1462int32323232 aacbbbda24 += (acbd18 & 0xffff) * (abba6  & 0xffff)
1463int32323232 aacbbbda25 <<= 1
1464int32323232 aacbbbda26 += (acbd8  & 0xffff) * (abba18 & 0xffff)
1465int32323232 aacbbbda27 += (acbd10 & 0xffff) * (abba17 & 0xffff)
1466int32323232 aacbbbda28 += (acbd11 & 0xffff) * (abba17 & 0xffff)
1467int32323232 aacbbbda29  = (acbd10 & 0xffff) * (abba19 & 0xffff)
1468int32323232 aacbbbda23 += (acbd18 & 0xffff) * (abba5  & 0xffff)
1469int32323232 aacbbbda24 += (acbd19 & 0xffff) * (abba5  & 0xffff)
1470int32323232 aacbbbda25 += (acbd8  & 0xffff) * (abba17 & 0xffff)
1471int32323232 aacbbbda26 += (acbd9  & 0xffff) * (abba17 & 0xffff)
1472int32323232 aacbbbda27 += (acbd11 & 0xffff) * (abba16 & 0xffff)
1473int32323232 aacbbbda28 += (acbd13 & 0xffff) * (abba15 & 0xffff)
1474int32323232 aacbbbda29 += (acbd11 & 0xffff) * (abba18 & 0xffff)
1475int32323232 aacbbbda23 += (acbd19 & 0xffff) * (abba4  & 0xffff)
1476int32323232 aacbbbda24 <<= 1
1477int32323232 aacbbbda25 += (acbd9  & 0xffff) * (abba16 & 0xffff)
1478int32323232 aacbbbda26 += (acbd10 & 0xffff) * (abba16 & 0xffff)
1479int32323232 aacbbbda27 += (acbd12 & 0xffff) * (abba15 & 0xffff)
1480int32323232 aacbbbda28 += (acbd14 & 0xffff) * (abba14 & 0xffff)
1481int32323232 aacbbbda29 += (acbd14 & 0xffff) * (abba15 & 0xffff)
1482int32323232 aacbbbda30  = (acbd11 & 0xffff) * (abba19 & 0xffff)
1483int32323232 aacbbbda24 += (acbd8  & 0xffff) * (abba16 & 0xffff)
1484int32323232 aacbbbda25 += (acbd12 & 0xffff) * (abba13 & 0xffff)
1485int32323232 aacbbbda26 += (acbd12 & 0xffff) * (abba14 & 0xffff)
1486int32323232 aacbbbda27 += (acbd13 & 0xffff) * (abba14 & 0xffff)
1487int32323232 aacbbbda28 += (acbd15 & 0xffff) * (abba13 & 0xffff)
1488int32323232 aacbbbda29 += (acbd15 & 0xffff) * (abba14 & 0xffff)
1489int32323232 aacbbbda30 += (acbd15 & 0xffff) * (abba15 & 0xffff)
1490int32323232 aacbbbda24 += (acbd12 & 0xffff) * (abba12 & 0xffff)
1491int32323232 aacbbbda25 += (acbd13 & 0xffff) * (abba12 & 0xffff)
1492int32323232 aacbbbda26 += (acbd13 & 0xffff) * (abba13 & 0xffff)
1493int32323232 aacbbbda27 += (acbd14 & 0xffff) * (abba13 & 0xffff)
1494int32323232 aacbbbda28 += (acbd17 & 0xffff) * (abba11 & 0xffff)
1495int32323232 aacbbbda29 += (acbd18 & 0xffff) * (abba11 & 0xffff)
1496int32323232 aacbbbda30 += (acbd19 & 0xffff) * (abba11 & 0xffff)
1497int32323232 aacbbbda24 += (acbd16 & 0xffff) * (abba8  & 0xffff)
1498int32323232 aacbbbda25 += (acbd16 & 0xffff) * (abba9  & 0xffff)
1499int32323232 aacbbbda26 += (acbd14 & 0xffff) * (abba12 & 0xffff)
1500int32323232 aacbbbda27 += (acbd15 & 0xffff) * (abba12 & 0xffff)
1501int32323232 aacbbbda28 += (acbd18 & 0xffff) * (abba10 & 0xffff)
1502int32323232 aacbbbda29 += (acbd19 & 0xffff) * (abba10 & 0xffff)
1503int32323232 aacbbbda30 <<= 1
1504int32323232 aacbbbda31  = (acbd12 & 0xffff) * (abba19 & 0xffff)
1505int32323232 aacbbbda25 += (acbd17 & 0xffff) * (abba8  & 0xffff)
1506int32323232 aacbbbda26 += (acbd16 & 0xffff) * (abba10 & 0xffff)
1507int32323232 aacbbbda27 += (acbd16 & 0xffff) * (abba11 & 0xffff)
1508int32323232 aacbbbda28 += (acbd19 & 0xffff) * (abba9  & 0xffff)
1509int32323232 aacbbbda29 <<= 1
1510int32323232 aacbbbda30 += (acbd12 & 0xffff) * (abba18 & 0xffff)
1511int32323232 aacbbbda31 += (acbd13 & 0xffff) * (abba18 & 0xffff)
1512int32323232 aacbbbda32  = (acbd13 & 0xffff) * (abba19 & 0xffff)
1513int32323232 aacbbbda26 += (acbd17 & 0xffff) * (abba9  & 0xffff)
1514int32323232 aacbbbda27 += (acbd17 & 0xffff) * (abba10 & 0xffff)
1515int32323232 aacbbbda28 <<= 1
1516int32323232 aacbbbda29 += (acbd12 & 0xffff) * (abba17 & 0xffff)
1517int32323232 aacbbbda30 += (acbd13 & 0xffff) * (abba17 & 0xffff)
1518int32323232 aacbbbda31 += (acbd14 & 0xffff) * (abba17 & 0xffff)
1519int32323232 aacbbbda32 += (acbd14 & 0xffff) * (abba18 & 0xffff)
1520int32323232 aacbbbda26 += (acbd18 & 0xffff) * (abba8  & 0xffff)
1521int32323232 aacbbbda27 += (acbd18 & 0xffff) * (abba9  & 0xffff)
1522int32323232 aacbbbda28 += (acbd12 & 0xffff) * (abba16 & 0xffff)
1523int32323232 aacbbbda29 += (acbd13 & 0xffff) * (abba16 & 0xffff)
1524int32323232 aacbbbda30 += (acbd14 & 0xffff) * (abba16 & 0xffff)
1525int32323232 aacbbbda31 += (acbd15 & 0xffff) * (abba16 & 0xffff)
1526int32323232 aacbbbda32 += (acbd15 & 0xffff) * (abba17 & 0xffff)
1527int32323232 aacbbbda33  = (acbd14 & 0xffff) * (abba19 & 0xffff)
1528int32323232 aacbbbda27 += (acbd19 & 0xffff) * (abba8  & 0xffff)
1529int32323232 aacbbbda28 += (acbd16 & 0xffff) * (abba12 & 0xffff)
1530int32323232 aacbbbda29 += (acbd16 & 0xffff) * (abba13 & 0xffff)
1531int32323232 aacbbbda30 += (acbd16 & 0xffff) * (abba14 & 0xffff)
1532int32323232 aacbbbda31 += (acbd16 & 0xffff) * (abba15 & 0xffff)
1533int32323232 aacbbbda32 += (acbd17 & 0xffff) * (abba15 & 0xffff)
1534int32323232 aacbbbda33 += (acbd15 & 0xffff) * (abba18 & 0xffff)
1535int32323232 aacbbbda34 = (acbd15 & 0xffff) * (abba19 & 0xffff)
1536int32323232 aacbbbda35 = (acbd16 & 0xffff) * (abba19 & 0xffff)
1537int32323232 aacbbbda29 += (acbd17 & 0xffff) * (abba12 & 0xffff)
1538int32323232 aacbbbda30 += (acbd17 & 0xffff) * (abba13 & 0xffff)
1539int32323232 aacbbbda31 += (acbd17 & 0xffff) * (abba14 & 0xffff)
1540int32323232 aacbbbda32 += (acbd18 & 0xffff) * (abba14 & 0xffff)
1541int32323232 aacbbbda33 += (acbd18 & 0xffff) * (abba15 & 0xffff)
1542int32323232 aacbbbda34 += (acbd19 & 0xffff) * (abba15 & 0xffff)
1543int32323232 aacbbbda35 += (acbd17 & 0xffff) * (abba18 & 0xffff)
1544int32323232 aacbbbda36 = (acbd17 & 0xffff) * (abba19 & 0xffff)
1545int32323232 aacbbbda30 += (acbd18 & 0xffff) * (abba12 & 0xffff)
1546int32323232 aacbbbda31 += (acbd18 & 0xffff) * (abba13 & 0xffff)
1547int32323232 aacbbbda32 += (acbd19 & 0xffff) * (abba13 & 0xffff)
1548int32323232 aacbbbda33 += (acbd19 & 0xffff) * (abba14 & 0xffff)
1549int32323232 aacbbbda34 <<= 1
1550int32323232 aacbbbda35 += (acbd18 & 0xffff) * (abba17 & 0xffff)
1551int32323232 aacbbbda36 += (acbd18 & 0xffff) * (abba18 & 0xffff)
1552int32323232 aacbbbda37 = (acbd18 & 0xffff) * (abba19 & 0xffff)
1553int32323232 aacbbbda31 += (acbd19 & 0xffff) * (abba12 & 0xffff)
1554int32323232 aacbbbda32 <<= 1
1555int32323232 aacbbbda33 <<= 1
1556int32323232 aacbbbda34 += (acbd16 & 0xffff) * (abba18 & 0xffff)
1557int32323232 aacbbbda35 += (acbd19 & 0xffff) * (abba16 & 0xffff)
1558int32323232 aacbbbda36 += (acbd19 & 0xffff) * (abba17 & 0xffff)
1559int32323232 aacbbbda37 += (acbd19 & 0xffff) * (abba18 & 0xffff)
1560int32323232 aacbbbda38 = (acbd19 & 0xffff) * (abba19 & 0xffff)
1561int32323232 aacbbbda32 += (acbd16 & 0xffff) * (abba16 & 0xffff)
1562int32323232 aacbbbda33 += (acbd16 & 0xffff) * (abba17 & 0xffff)
1563int32323232 aacbbbda34 += (acbd17 & 0xffff) * (abba17 & 0xffff)
1564int32323232 aacbbbda36 <<= 1
1565int32323232 aacbbbda37 <<= 1
1566int32323232 aacbbbda38 <<= 1
1567int32323232 aacbbbda33 += (acbd17 & 0xffff) * (abba16 & 0xffff)
1568int32323232 aacbbbda34 += (acbd18 & 0xffff) * (abba16 & 0xffff)
1569
1570## Reduction
1571
1572uint32323232 carry0 = aacbbbda20 >> 13
1573uint32323232 carry1 = aacbbbda24 >> 13
1574uint32323232 carry2 = aacbbbda28 >> 13
1575uint32323232 carry3 = aacbbbda32 >> 13
1576
1577int32323232 aacbbbda21 += carry0
1578aacbbbda20 &= mask13
1579int32323232 aacbbbda25 += carry1
1580aacbbbda24 &= mask13
1581int32323232 aacbbbda29 += carry2
1582aacbbbda28 &= mask13
1583int32323232 aacbbbda33 += carry3
1584aacbbbda32 &= mask13
1585
1586uint32323232 carry0 = aacbbbda21 >> 13
1587uint32323232 carry1 = aacbbbda25 >> 13
1588uint32323232 carry2 = aacbbbda29 >> 13
1589uint32323232 carry3 = aacbbbda33 >> 13
1590
1591int32323232 aacbbbda22 += carry0
1592aacbbbda21 &= mask13
1593int32323232 aacbbbda26 += carry1
1594aacbbbda25 &= mask13
1595int32323232 aacbbbda30 += carry2
1596aacbbbda29 &= mask13
1597int32323232 aacbbbda34 += carry3
1598aacbbbda33 &= mask13
1599
1600uint32323232 carry0 = aacbbbda22 >> 13
1601uint32323232 carry1 = aacbbbda26 >> 13
1602uint32323232 carry2 = aacbbbda30 >> 13
1603uint32323232 carry3 = aacbbbda34 >> 13
1604
1605int32323232 aacbbbda23 += carry0
1606aacbbbda22 &= mask13
1607int32323232 aacbbbda27 += carry1
1608aacbbbda26 &= mask13
1609int32323232 aacbbbda31 += carry2
1610aacbbbda30 &= mask13
1611int32323232 aacbbbda35 += carry3
1612aacbbbda34 &= mask13
1613
1614uint32323232 carry0 = aacbbbda23 >> 12
1615uint32323232 carry1 = aacbbbda27 >> 12
1616uint32323232 carry2 = aacbbbda31 >> 12
1617uint32323232 carry3 = aacbbbda35 >> 12
1618
1619int32323232 aacbbbda24 += carry0
1620aacbbbda23 &= mask12
1621int32323232 aacbbbda28 += carry1
1622aacbbbda27 &= mask12
1623int32323232 aacbbbda32 += carry2
1624aacbbbda31 &= mask12
1625int32323232 aacbbbda36 += carry3
1626aacbbbda35 &= mask12
1627
1628uint32323232 carry1 = aacbbbda24 >> 13
1629uint32323232 carry2 = aacbbbda28 >> 13
1630uint32323232 carry3 = aacbbbda32 >> 13
1631uint32323232 carry4 = aacbbbda36 >> 13
1632
1633int32323232 aacbbbda25 += carry1
1634aacbbbda24 &= mask13
1635int32323232 aacbbbda29 += carry2
1636aacbbbda28 &= mask13
1637int32323232 aacbbbda33 += carry3
1638aacbbbda32 &= mask13
1639int32323232 aacbbbda37 += carry4
1640aacbbbda36 &= mask13
1641
1642uint32323232 carry1 = aacbbbda25 >> 13
1643uint32323232 carry2 = aacbbbda29 >> 13
1644uint32323232 carry3 = aacbbbda33 >> 13
1645uint32323232 carry4 = aacbbbda37 >> 13
1646
1647int32323232 aacbbbda26 += carry1
1648aacbbbda25 &= mask13
1649int32323232 aacbbbda30 += carry2
1650aacbbbda29 &= mask13
1651int32323232 aacbbbda34 += carry3
1652aacbbbda33 &= mask13
1653int32323232 aacbbbda38 += carry4
1654aacbbbda37 &= mask13
1655
1656uint32323232 carry1 = aacbbbda26 >> 13
1657uint32323232 carry2 = aacbbbda30 >> 13
1658uint32323232 carry3 = aacbbbda34 >> 13
1659uint32323232 aacbbbda39 = aacbbbda38 >> 13
1660
1661int32323232 aacbbbda27 += carry1
1662aacbbbda26 &= mask13
1663int32323232 aacbbbda31 += carry2
1664aacbbbda30 &= mask13
1665int32323232 aacbbbda35 += carry3
1666aacbbbda34 &= mask13
1667
1668uint32323232 carry1 = aacbbbda27 >> 12
1669aacbbbda38 &= mask13
1670uint32323232 carry2 = aacbbbda31 >> 12
1671uint32323232 carry3 = aacbbbda35 >> 12
1672
1673int32323232 aacbbbda28 += carry1
1674aacbbbda27 &= mask12
1675int32323232 aacbbbda32 += carry2
1676aacbbbda31 &= mask12
1677int32323232 aacbbbda36 += carry3
1678aacbbbda35 &= mask12
1679
1680int32323232 aacbbbda0  += (aacbbbda20 & 0xffff) * (vec19 & 0xffff)
1681int32323232 aacbbbda1  += (aacbbbda21 & 0xffff) * (vec19 & 0xffff)
1682int32323232 aacbbbda2  += (aacbbbda22 & 0xffff) * (vec19 & 0xffff)
1683int32323232 aacbbbda3  += (aacbbbda23 & 0xffff) * (vec19 & 0xffff)
1684int32323232 aacbbbda4  += (aacbbbda24 & 0xffff) * (vec19 & 0xffff)
1685int32323232 aacbbbda5  += (aacbbbda25 & 0xffff) * (vec19 & 0xffff)
1686int32323232 aacbbbda6  += (aacbbbda26 & 0xffff) * (vec19 & 0xffff)
1687int32323232 aacbbbda7  += (aacbbbda27 & 0xffff) * (vec19 & 0xffff)
1688int32323232 aacbbbda8  += (aacbbbda28 & 0xffff) * (vec19 & 0xffff)
1689int32323232 aacbbbda9  += (aacbbbda29 & 0xffff) * (vec19 & 0xffff)
1690int32323232 aacbbbda10 += (aacbbbda30 & 0xffff) * (vec19 & 0xffff)
1691int32323232 aacbbbda11 += (aacbbbda31 & 0xffff) * (vec19 & 0xffff)
1692int32323232 aacbbbda12 += (aacbbbda32 & 0xffff) * (vec19 & 0xffff)
1693int32323232 aacbbbda13 += (aacbbbda33 & 0xffff) * (vec19 & 0xffff)
1694int32323232 aacbbbda14 += (aacbbbda34 & 0xffff) * (vec19 & 0xffff)
1695int32323232 aacbbbda15 += (aacbbbda35 & 0xffff) * (vec19 & 0xffff)
1696int32323232 aacbbbda16 += (aacbbbda36 & 0xffff) * (vec19 & 0xffff)
1697int32323232 aacbbbda17 += (aacbbbda37 & 0xffff) * (vec19 & 0xffff)
1698int32323232 aacbbbda18 += (aacbbbda38 & 0xffff) * (vec19 & 0xffff)
1699int32323232 aacbbbda19 += (aacbbbda39 & 0xffff) * (vec19 & 0xffff)
1700
1701uint32323232 carry = aacbbbda16 >> 13
1702int32323232 aacbbbda17 += carry
1703uint32323232 carry = aacbbbda17 >> 13
1704int32323232 aacbbbda18 += carry
1705uint32323232 carry = aacbbbda18 >> 13
1706int32323232 aacbbbda19 += carry
1707uint32323232 carry = aacbbbda19 >> 12
1708#Multiply carry by 19
1709int32323232 red = carry << 4
1710int32323232 red += carry
1711int32323232 red += carry
1712int32323232 red += carry
1713
1714int32323232 aacbbbda0 += red
1715aacbbbda19 &= mask12
1716
1717aacbbbda16 &= mask13
1718aacbbbda17 &= mask13
1719aacbbbda18 &= mask13
1720aacbbbda19 &= mask12
1721
1722uint32323232 carry0 = aacbbbda0  >> 13
1723uint32323232 carry1 = aacbbbda4  >> 13
1724uint32323232 carry2 = aacbbbda8  >> 13
1725uint32323232 carry3 = aacbbbda12 >> 13
1726
1727int32323232 aacbbbda1  += carry0
1728int32323232 aacbbbda5  += carry1
1729int32323232 aacbbbda9  += carry2
1730int32323232 aacbbbda13 += carry3
1731
1732aacbbbda0  &= mask13
1733aacbbbda4  &= mask13
1734aacbbbda8  &= mask13
1735aacbbbda12 &= mask13
1736
1737uint32323232 carry0 = aacbbbda1  >> 13
1738uint32323232 carry1 = aacbbbda5  >> 13
1739uint32323232 carry2 = aacbbbda9  >> 13
1740uint32323232 carry3 = aacbbbda13 >> 13
1741
1742int32323232 aacbbbda2  += carry0
1743int32323232 aacbbbda6  += carry1
1744int32323232 aacbbbda10 += carry2
1745int32323232 aacbbbda14 += carry3
1746
1747aacbbbda1  &= mask13
1748aacbbbda5  &= mask13
1749aacbbbda9  &= mask13
1750aacbbbda13 &= mask13
1751
1752uint32323232 carry0 = aacbbbda2  >> 13
1753uint32323232 carry1 = aacbbbda6  >> 13
1754uint32323232 carry2 = aacbbbda10 >> 13
1755uint32323232 carry3 = aacbbbda14 >> 13
1756
1757int32323232 aacbbbda3  += carry0
1758int32323232 aacbbbda7  += carry1
1759int32323232 aacbbbda11 += carry2
1760int32323232 aacbbbda15 += carry3
1761
1762aacbbbda2  &= mask13
1763aacbbbda6  &= mask13
1764aacbbbda10 &= mask13
1765aacbbbda14 &= mask13
1766
1767uint32323232 carry0 = aacbbbda3  >> 12
1768uint32323232 carry1 = aacbbbda7  >> 12
1769uint32323232 carry2 = aacbbbda11 >> 12
1770uint32323232 carry3 = aacbbbda15 >> 12
1771
1772int32323232 aacbbbda4  += carry0
1773int32323232 aacbbbda8  += carry1
1774int32323232 aacbbbda12 += carry2
1775int32323232 aacbbbda16 += carry3
1776
1777aacbbbda3  &= mask12
1778aacbbbda7  &= mask12
1779aacbbbda11 &= mask12
1780aacbbbda15 &= mask12
1781
1782uint32323232 carry1 = aacbbbda4  >> 13
1783uint32323232 carry2 = aacbbbda8  >> 13
1784uint32323232 carry3 = aacbbbda12 >> 13
1785uint32323232 carry4 = aacbbbda16 >> 13
1786
1787int32323232 aacbbbda5  += carry1
1788int32323232 aacbbbda9  += carry2
1789int32323232 aacbbbda13 += carry3
1790int32323232 aacbbbda17  += carry4
1791
1792aacbbbda4  &= mask13
1793aacbbbda8  &= mask13
1794aacbbbda12 &= mask13
1795aacbbbda16 &= mask13
1796
1797uint32323232 carry1 = aacbbbda5  >> 13
1798uint32323232 carry2 = aacbbbda9  >> 13
1799uint32323232 carry3 = aacbbbda13 >> 13
1800uint32323232 carry4 = aacbbbda17 >> 13
1801
1802int32323232 aacbbbda6  += carry1
1803int32323232 aacbbbda10 += carry2
1804int32323232 aacbbbda14 += carry3
1805int32323232 aacbbbda18 += carry4
1806
1807aacbbbda5  &= mask13
1808aacbbbda9  &= mask13
1809aacbbbda13 &= mask13
1810aacbbbda17 &= mask13
1811
1812uint32323232 carry1 = aacbbbda6  >> 13
1813uint32323232 carry2 = aacbbbda10 >> 13
1814uint32323232 carry3 = aacbbbda14 >> 13
1815uint32323232 carry4 = aacbbbda18  >> 13
1816
1817int32323232 aacbbbda7  += carry1
1818int32323232 aacbbbda11 += carry2
1819int32323232 aacbbbda15 += carry3
1820int32323232 aacbbbda19  += carry4
1821
1822aacbbbda6  &= mask13
1823aacbbbda10 &= mask13
1824aacbbbda14 &= mask13
1825aacbbbda18  &= mask13
1826
1827###################################################################################
1828################         Multiply aa with a24 and bb with a24-1       #############
1829###################################################################################
1830
1831int32323232 tmp0 =  aacbbbda0 << 16
18322p2p2pcb0 = combine 2pconsts0 and aacbbbda0 by selw0105
1833int32323232 tmp1 =  aacbbbda1 << 16
18342p2p2pcb1 = combine 2pconsts and aacbbbda1 by selw0105
1835int32323232 tmp2 =  aacbbbda2 << 16
18362p2p2pcb2 = combine 2pconsts and aacbbbda2 by selw0105
1837int32323232 tmp3 =  aacbbbda3 << 16
18382p2p2pcb3 = combine 2pconsts and aacbbbda3 by selw2325
1839uint32323232 tmp0  += (tmp0  >> 16) * (a24vec >> 16)
18402p2p2pcb4 = combine 2pconsts and aacbbbda4 by selw0105
1841uint32323232 tmp1  += (tmp1  >> 16) * (a24vec >> 16)
18422p2p2pcb5 = combine 2pconsts and aacbbbda5 by selw0105
1843uint32323232 tmp2  += (tmp2  >> 16) * (a24vec >> 16)
18442p2p2pcb6 = combine 2pconsts and aacbbbda6 by selw0105
1845uint32323232 tmp3  += (tmp3  >> 16) * (a24vec >> 16)
18462p2p2pcb7 = combine 2pconsts and aacbbbda7 by selw2325
1847int32323232 tmp4 =  aacbbbda4 << 16
18482p2p2pcb8 = combine 2pconsts and aacbbbda8 by selw0105
1849int32323232 tmp5 =  aacbbbda5 << 16
18502p2p2pcb9 = combine 2pconsts and aacbbbda9 by selw0105
1851int32323232 tmp6 =  aacbbbda6 << 16
18522p2p2pcb10 = combine 2pconsts and aacbbbda10 by selw0105
1853int32323232 tmp7 =  aacbbbda7 << 16
18542p2p2pcb11 = combine 2pconsts and aacbbbda11 by selw2325
1855uint32323232 tmp4  += (tmp4  >> 16) * (a24vec >> 16)
18562p2p2pcb12 = combine 2pconsts and aacbbbda12 by selw0105
1857uint32323232 tmp5  += (tmp5  >> 16) * (a24vec >> 16)
18582p2p2pcb13 = combine 2pconsts and aacbbbda13 by selw0105
1859uint32323232 tmp6  += (tmp6  >> 16) * (a24vec >> 16)
18602p2p2pcb14 = combine 2pconsts and aacbbbda14 by selw0105
1861uint32323232 tmp7  += (tmp7  >> 16) * (a24vec >> 16)
18622p2p2pcb15 = combine 2pconsts and aacbbbda15 by selw2325
1863int32323232 tmp8 =  aacbbbda8 << 16
18642p2p2pcb16 = combine 2pconsts and aacbbbda16 by selw0105
1865int32323232 tmp9 =  aacbbbda9 << 16
18662p2p2pcb17 = combine 2pconsts and aacbbbda17 by selw0105
1867int32323232 tmp10 = aacbbbda10 << 16
18682p2p2pcb18 = combine 2pconsts and aacbbbda18 by selw0105
1869int32323232 tmp11 = aacbbbda11 << 16
18702p2p2pcb19 = combine 2pconsts and aacbbbda19 by selw2325
1871uint32323232 tmp8  += (tmp8  >> 16) * (a24vec >> 16)
1872aa_a24aadada0 = combine aacbbbda0 and tmp0 by selw0433
1873uint32323232 tmp9  += (tmp9  >> 16) * (a24vec >> 16)
1874aa_a24aadada1 = combine aacbbbda1 and tmp1 by selw0433
1875uint32323232 tmp10 += (tmp10 >> 16) * (a24vec >> 16)
1876aa_a24aadada2 = combine aacbbbda2 and tmp2 by selw0433
1877uint32323232 tmp11 += (tmp11 >> 16) * (a24vec >> 16)
1878aa_a24aadada3 = combine aacbbbda3 and tmp3 by selw0433
1879int32323232 tmp12 = aacbbbda12 << 16
1880aa_a24aadada4 = combine aacbbbda4 and tmp4 by selw0433
1881int32323232 tmp13 = aacbbbda13 << 16
1882aa_a24aadada5 = combine aacbbbda5 and tmp5 by selw0433
1883int32323232 tmp14 = aacbbbda14 << 16
1884aa_a24aadada6 = combine aacbbbda6 and tmp6 by selw0433
1885int32323232 tmp15 = aacbbbda15 << 16
1886aa_a24aadada7 = combine aacbbbda7 and tmp7 by selw0433
1887uint32323232 tmp12 += (tmp12 >> 16) * (a24vec >> 16)
1888aa_a24aadada8 = combine aacbbbda8 and tmp8 by selw0433
1889uint32323232 tmp13 += (tmp13 >> 16) * (a24vec >> 16)
1890aa_a24aadada9 = combine aacbbbda9 and tmp9 by selw0433
1891uint32323232 tmp14 += (tmp14 >> 16) * (a24vec >> 16)
1892bb_a24m1bbcb0 = combine aacbbbda0 and tmp0 by selw261c0
1893uint32323232 tmp15 += (tmp15 >> 16) * (a24vec >> 16)
1894bb_a24m1bbcb1 = combine aacbbbda1 and tmp1 by selw261c0
1895int32323232 tmp16 = aacbbbda16 << 16
1896bb_a24m1bbcb2 = combine aacbbbda2 and tmp2 by selw261c0
1897int32323232 tmp17 = aacbbbda17 << 16
1898bb_a24m1bbcb3 = combine aacbbbda3 and tmp3 by selw261c0
1899int32323232 tmp18 = aacbbbda18 << 16
1900bb_a24m1bbcb4 = combine aacbbbda4 and tmp4 by selw261c0
1901int32323232 tmp19 = aacbbbda19 << 16
1902bb_a24m1bbcb5 = combine aacbbbda5 and tmp5 by selw261c0
1903uint32323232 tmp16 += (tmp16 >> 16) * (a24vec >> 16)
1904bb_a24m1bbcb6 = combine aacbbbda6 and tmp6 by selw261c0
1905uint32323232 tmp17 += (tmp17 >> 16) * (a24vec >> 16)
1906bb_a24m1bbcb7 = combine aacbbbda7 and tmp7 by selw261c0
1907uint32323232 tmp18 += (tmp18 >> 16) * (a24vec >> 16)
1908bb_a24m1bbcb8 = combine aacbbbda8 and tmp8 by selw261c0
1909uint32323232 tmp19 += (tmp19 >> 16) * (a24vec >> 16)
1910bb_a24m1bbcb9 = combine aacbbbda9 and tmp9 by selw261c0
1911
1912###################################################################################
1913#########################          E = AA-BB 		    #######################
1914#########################          t4 = a24AA - a24m1BB     #######################
1915#########################          t1 = DA-CB               #######################
1916#########################          t0 = DA+CB               #######################
1917###################################################################################
1918
1919
1920int32323232 et4t1t00 = aa_a24aadada0 + 2p2p2pcb0
1921aa_a24aadada10 = combine aacbbbda10 and tmp10 by selw0433
1922int32323232 et4t1t01 = aa_a24aadada1 + 2p2p2pcb1
1923aa_a24aadada11 = combine aacbbbda11 and tmp11 by selw0433
1924int32323232 et4t1t02 = aa_a24aadada2 + 2p2p2pcb2
1925aa_a24aadada12 = combine aacbbbda12 and tmp12 by selw0433
1926int32323232 et4t1t03 = aa_a24aadada3 + 2p2p2pcb3
1927aa_a24aadada13 = combine aacbbbda13 and tmp13 by selw0433
1928int32323232 et4t1t04 = aa_a24aadada4 + 2p2p2pcb4
1929aa_a24aadada14 = combine aacbbbda14 and tmp14 by selw0433
1930int32323232 et4t1t05 = aa_a24aadada5 + 2p2p2pcb5
1931aa_a24aadada15 = combine aacbbbda15 and tmp15 by selw0433
1932int32323232 et4t1t06 = aa_a24aadada6 + 2p2p2pcb6
1933aa_a24aadada16 = combine aacbbbda16 and tmp16 by selw0433
1934int32323232 et4t1t07 = aa_a24aadada7 + 2p2p2pcb7
1935aa_a24aadada17 = combine aacbbbda17 and tmp17 by selw0433
1936int32323232 et4t1t08 = aa_a24aadada8 + 2p2p2pcb8
1937aa_a24aadada18 = combine aacbbbda18 and tmp18 by selw0433
1938int32323232 et4t1t09 = aa_a24aadada9 + 2p2p2pcb9
1939aa_a24aadada19 = combine aacbbbda19 and tmp19 by selw0433
1940int32323232 et4t1t010 = aa_a24aadada10 + 2p2p2pcb10
1941bb_a24m1bbcb10 = combine aacbbbda10 and tmp10 by selw261c0
1942int32323232 et4t1t011 = aa_a24aadada11 + 2p2p2pcb11
1943bb_a24m1bbcb11 = combine aacbbbda11 and tmp11 by selw261c0
1944int32323232 et4t1t012 = aa_a24aadada12 + 2p2p2pcb12
1945bb_a24m1bbcb12 = combine aacbbbda12 and tmp12 by selw261c0
1946int32323232 et4t1t013 = aa_a24aadada13 + 2p2p2pcb13
1947bb_a24m1bbcb13 = combine aacbbbda13 and tmp13 by selw261c0
1948int32323232 et4t1t014 = aa_a24aadada14 + 2p2p2pcb14
1949bb_a24m1bbcb14 = combine aacbbbda14 and tmp14 by selw261c0
1950int32323232 et4t1t015 = aa_a24aadada15 + 2p2p2pcb15
1951bb_a24m1bbcb15 = combine aacbbbda15 and tmp15 by selw261c0
1952int32323232 et4t1t016 = aa_a24aadada16 + 2p2p2pcb16
1953bb_a24m1bbcb16 = combine aacbbbda16 and tmp16 by selw261c0
1954int32323232 et4t1t017 = aa_a24aadada17 + 2p2p2pcb17
1955bb_a24m1bbcb17 = combine aacbbbda17 and tmp17 by selw261c0
1956int32323232 et4t1t018 = aa_a24aadada18 + 2p2p2pcb18
1957bb_a24m1bbcb18 = combine aacbbbda18 and tmp18 by selw261c0
1958int32323232 et4t1t019 = aa_a24aadada19 + 2p2p2pcb19
1959bb_a24m1bbcb19 = combine aacbbbda19 and tmp19 by selw261c0
1960
1961int32323232 et4t1t00 = et4t1t00 - bb_a24m1bbcb0
1962int32323232 et4t1t01 = et4t1t01 - bb_a24m1bbcb1
1963int32323232 et4t1t02 = et4t1t02 - bb_a24m1bbcb2
1964int32323232 et4t1t03 = et4t1t03 - bb_a24m1bbcb3
1965int32323232 et4t1t04 = et4t1t04 - bb_a24m1bbcb4
1966int32323232 et4t1t05 = et4t1t05 - bb_a24m1bbcb5
1967int32323232 et4t1t06 = et4t1t06 - bb_a24m1bbcb6
1968int32323232 et4t1t07 = et4t1t07 - bb_a24m1bbcb7
1969int32323232 et4t1t08 = et4t1t08 - bb_a24m1bbcb8
1970int32323232 et4t1t09 = et4t1t09 - bb_a24m1bbcb9
1971int32323232 et4t1t010 = et4t1t010 - bb_a24m1bbcb10
1972int32323232 et4t1t011 = et4t1t011 - bb_a24m1bbcb11
1973int32323232 et4t1t012 = et4t1t012 - bb_a24m1bbcb12
1974int32323232 et4t1t013 = et4t1t013 - bb_a24m1bbcb13
1975int32323232 et4t1t014 = et4t1t014 - bb_a24m1bbcb14
1976int32323232 et4t1t015 = et4t1t015 - bb_a24m1bbcb15
1977int32323232 et4t1t016 = et4t1t016 - bb_a24m1bbcb16
1978int32323232 et4t1t017 = et4t1t017 - bb_a24m1bbcb17
1979int32323232 et4t1t018 = et4t1t018 - bb_a24m1bbcb18
1980int32323232 et4t1t019 = et4t1t019 - bb_a24m1bbcb19
1981
1982## Reduction
1983uint32323232 carry0 = et4t1t016 >> 13
1984int32323232 et4t1t017 += carry0
1985uint32323232 carry1 = et4t1t017 >> 13
1986int32323232 et4t1t018 += carry1
1987uint32323232 carry = et4t1t018 >> 13
1988int32323232 et4t1t019 += carry
1989uint32323232 carry = et4t1t019 >> 12
1990
1991#Multiply carry by 19
1992int32323232 red = carry << 4
1993int32323232 red += carry
1994int32323232 red += carry
1995int32323232 red += carry
1996int32323232 et4t1t00 += red
1997
1998et4t1t016 &= mask13
1999et4t1t017 &= mask13
2000et4t1t018 &= mask13
2001et4t1t019 &= mask12
2002
2003uint32323232 carry0 = et4t1t00  >> 13
2004uint32323232 carry1 = et4t1t04  >> 13
2005uint32323232 carry2 = et4t1t08  >> 13
2006uint32323232 carry3 = et4t1t012 >> 13
2007
2008et4t1t00  &= mask13
2009et4t1t04  &= mask13
2010et4t1t08  &= mask13
2011et4t1t012 &= mask13
2012
2013int32323232 et4t1t01  += carry0
2014int32323232 et4t1t05  += carry1
2015int32323232 et4t1t09  += carry2
2016int32323232 et4t1t013 += carry3
2017
2018uint32323232 carry0 = et4t1t01  >> 13
2019uint32323232 carry1 = et4t1t05  >> 13
2020uint32323232 carry2 = et4t1t09  >> 13
2021uint32323232 carry3 = et4t1t013 >> 13
2022
2023et4t1t01  &= mask13
2024et4t1t05  &= mask13
2025et4t1t09  &= mask13
2026et4t1t013 &= mask13
2027
2028int32323232 et4t1t02  += carry0
2029int32323232 et4t1t06  += carry1
2030int32323232 et4t1t010 += carry2
2031int32323232 et4t1t014 += carry3
2032
2033uint32323232 carry0 = et4t1t02  >> 13
2034uint32323232 carry1 = et4t1t06  >> 13
2035uint32323232 carry2 = et4t1t010 >> 13
2036uint32323232 carry3 = et4t1t014 >> 13
2037
2038et4t1t02  &= mask13
2039et4t1t06  &= mask13
2040et4t1t010 &= mask13
2041et4t1t014 &= mask13
2042
2043int32323232 et4t1t03  += carry0
2044int32323232 et4t1t07  += carry1
2045int32323232 et4t1t011 += carry2
2046int32323232 et4t1t015 += carry3
2047
2048uint32323232 carry0 = et4t1t03  >> 12
2049uint32323232 carry1 = et4t1t07  >> 12
2050uint32323232 carry2 = et4t1t011 >> 12
2051uint32323232 carry3 = et4t1t015 >> 12
2052
2053et4t1t03  &= mask12
2054et4t1t07  &= mask12
2055et4t1t011 &= mask12
2056et0aat10 = combine et4t1t00 and aacbbbda0 by selw0342
2057et4t1t015 &= mask12
2058et0aat11 = combine et4t1t01 and aacbbbda1 by selw0342
2059
2060int32323232 et4t1t04  += carry0
2061et0aat12 = combine et4t1t02 and aacbbbda2 by selw0342
2062int32323232 et4t1t08  += carry1
2063et0aat13 = combine et4t1t03 and aacbbbda3 by selw0342
2064int32323232 et4t1t012 += carry2
2065t4t0bbt10 = combine et4t1t00 and aacbbbda0 by selw1362
2066int32323232 et4t1t016 += carry3
2067t4t0bbt11 = combine et4t1t01 and aacbbbda1 by selw1362
2068
2069uint32323232 carry1 = et4t1t04  >> 13
2070t4t0bbt12 = combine et4t1t02 and aacbbbda2 by selw1362
2071uint32323232 carry2 = et4t1t08  >> 13
2072t4t0bbt13 = combine et4t1t03 and aacbbbda3 by selw1362
2073uint32323232 carry3 = et4t1t012 >> 13
2074uint32323232 carry4 = et4t1t016 >> 13
2075
2076et4t1t04  &= mask13
2077et4t1t08  &= mask13
2078et4t1t012 &= mask13
2079et0aat14 = combine et4t1t04 and aacbbbda4 by selw0342
2080et4t1t016 &= mask13
2081et0aat18 = combine et4t1t08 and aacbbbda8 by selw0342
2082
2083int32323232 et4t1t05  += carry1
2084et0aat112 = combine et4t1t012 and aacbbbda12 by selw0342
2085int32323232 et4t1t09  += carry2
2086et0aat116 = combine et4t1t016 and aacbbbda16 by selw0342
2087int32323232 et4t1t013 += carry3
2088t4t0bbt14 = combine et4t1t04 and aacbbbda4 by selw1362
2089int32323232 et4t1t017  += carry4
2090t4t0bbt18 = combine et4t1t08 and aacbbbda8 by selw1362
2091
2092uint32323232 carry1 = et4t1t05  >> 13
2093t4t0bbt112 = combine et4t1t012 and aacbbbda12 by selw1362
2094uint32323232 carry2 = et4t1t09  >> 13
2095t4t0bbt116 = combine et4t1t016 and aacbbbda16 by selw1362
2096uint32323232 carry3 = et4t1t013 >> 13
2097uint32323232 carry4 = et4t1t017 >> 13
2098
2099et4t1t05  &= mask13
2100et4t1t09  &= mask13
2101et4t1t013 &= mask13
2102et0aat15 = combine et4t1t05 and aacbbbda5 by selw0342
2103et4t1t017 &= mask13
2104et0aat19 = combine et4t1t09 and aacbbbda9 by selw0342
2105
2106int32323232 et4t1t06  += carry1
2107et0aat113 = combine et4t1t013 and aacbbbda13 by selw0342
2108int32323232 et4t1t010 += carry2
2109et0aat117 = combine et4t1t017 and aacbbbda17 by selw0342
2110int32323232 et4t1t014 += carry3
2111t4t0bbt15 = combine et4t1t05 and aacbbbda5 by selw1362
2112int32323232 et4t1t018 += carry4
2113t4t0bbt19 = combine et4t1t09 and aacbbbda9 by selw1362
2114
2115uint32323232 carry1 = et4t1t06  >> 13
2116t4t0bbt113 = combine et4t1t013 and aacbbbda13 by selw1362
2117uint32323232 carry2 = et4t1t010 >> 13
2118t4t0bbt117 = combine et4t1t017 and aacbbbda17 by selw1362
2119uint32323232 carry3 = et4t1t014 >> 13
2120uint32323232 carry4 = et4t1t018  >> 13
2121
2122et4t1t06  &= mask13
2123et4t1t010 &= mask13
2124et4t1t014 &= mask13
2125et0aat16 = combine et4t1t06 and aacbbbda6 by selw0342
2126et4t1t018 &= mask13
2127et0aat110 = combine et4t1t010 and aacbbbda10 by selw0342
2128
2129int32323232 et4t1t07  += carry1
2130et0aat114 = combine et4t1t014 and aacbbbda14 by selw0342
2131int32323232 et4t1t011 += carry2
2132et0aat118 = combine et4t1t018 and aacbbbda18 by selw0342
2133int32323232 et4t1t015 += carry3
2134t4t0bbt16 = combine et4t1t06 and aacbbbda6 by selw1362
2135int32323232 et4t1t019  += carry4
2136t4t0bbt110 = combine et4t1t010 and aacbbbda10 by selw1362
2137
2138##################################################################################
2139#########################            Z4 = E*t4           #########################
2140#########################            X5 = t0^2           #########################
2141#########################            X4 = AA*BB          #########################
2142#########################            t2 = t1^2           #########################
2143##################################################################################
2144
2145int32323232 z4x5x4t20  = (t4t0bbt10 & 0xffff) * (et0aat10 & 0xffff)
2146et0aat17 = combine et4t1t07 and aacbbbda7 by selw0342
2147int32323232 z4x5x4t21  = (t4t0bbt10 & 0xffff) * (et0aat11 & 0xffff)
2148t4t0bbt17 = combine et4t1t07 and aacbbbda7 by selw1362
2149int32323232 z4x5x4t22  = (t4t0bbt10 & 0xffff) * (et0aat12 & 0xffff)
2150et0aat111 = combine et4t1t011 and aacbbbda11 by selw0342
2151int32323232 z4x5x4t23  = (t4t0bbt10 & 0xffff) * (et0aat13 & 0xffff)
2152t4t0bbt111 = combine et4t1t011 and aacbbbda11 by selw1362
2153int32323232 z4x5x4t24  = (t4t0bbt11 & 0xffff) * (et0aat13 & 0xffff)
2154t4t0bbt114 = combine et4t1t014 and aacbbbda14 by selw1362
2155int32323232 z4x5x4t25  = (t4t0bbt12 & 0xffff) * (et0aat13 & 0xffff)
2156et0aat115 = combine et4t1t015 and aacbbbda15 by selw0342
2157int32323232 z4x5x4t26  = (t4t0bbt13 & 0xffff) * (et0aat13 & 0xffff)
2158t4t0bbt115 = combine et4t1t015 and aacbbbda15 by selw1362
2159int32323232 z4x5x4t27  = (t4t0bbt10 & 0xffff) * (et0aat17 & 0xffff)
2160t4t0bbt118 = combine et4t1t018 and aacbbbda18 by selw1362
2161int32323232 z4x5x4t21 += (t4t0bbt11 & 0xffff) * (et0aat10 & 0xffff)
2162et0aat119 = combine et4t1t019 and aacbbbda19 by selw0342
2163int32323232 z4x5x4t22 += (t4t0bbt11 & 0xffff) * (et0aat11 & 0xffff)
2164t4t0bbt119 = combine et4t1t019 and aacbbbda19 by selw1362
2165int32323232 z4x5x4t23 += (t4t0bbt11 & 0xffff) * (et0aat12 & 0xffff)
2166int32323232 z4x5x4t24 += (t4t0bbt12 & 0xffff) * (et0aat12 & 0xffff)
2167int32323232 z4x5x4t25 += (t4t0bbt13 & 0xffff) * (et0aat12 & 0xffff)
2168int32323232 z4x5x4t26 <<= 1
2169int32323232 z4x5x4t27 += (t4t0bbt11 & 0xffff) * (et0aat16 & 0xffff)
2170int32323232 z4x5x4t28  = (t4t0bbt11 & 0xffff) * (et0aat17 & 0xffff)
2171int32323232 z4x5x4t22 += (t4t0bbt12 & 0xffff) * (et0aat10 & 0xffff)
2172int32323232 z4x5x4t23 += (t4t0bbt12 & 0xffff) * (et0aat11 & 0xffff)
2173int32323232 z4x5x4t24 += (t4t0bbt13 & 0xffff) * (et0aat11 & 0xffff)
2174int32323232 z4x5x4t25 <<= 1
2175int32323232 z4x5x4t26 += (t4t0bbt10 & 0xffff) * (et0aat16 & 0xffff)
2176int32323232 z4x5x4t27 += (t4t0bbt12 & 0xffff) * (et0aat15 & 0xffff)
2177int32323232 z4x5x4t28 += (t4t0bbt12 & 0xffff) * (et0aat16 & 0xffff)
2178int32323232 z4x5x4t29  = (t4t0bbt12 & 0xffff) * (et0aat17 & 0xffff)
2179int32323232 z4x5x4t23 += (t4t0bbt13 & 0xffff) * (et0aat10 & 0xffff)
2180int32323232 z4x5x4t24 <<= 1
2181int32323232 z4x5x4t25 += (t4t0bbt10 & 0xffff) * (et0aat15 & 0xffff)
2182int32323232 z4x5x4t26 += (t4t0bbt11 & 0xffff) * (et0aat15 & 0xffff)
2183int32323232 z4x5x4t27 += (t4t0bbt13 & 0xffff) * (et0aat14 & 0xffff)
2184int32323232 z4x5x4t28 += (t4t0bbt13 & 0xffff) * (et0aat15 & 0xffff)
2185int32323232 z4x5x4t29 += (t4t0bbt13 & 0xffff) * (et0aat16 & 0xffff)
2186int32323232 z4x5x4t210  = (t4t0bbt13  & 0xffff) * (et0aat17  & 0xffff)
2187int32323232 z4x5x4t24 += (t4t0bbt10 & 0xffff) * (et0aat14 & 0xffff)
2188int32323232 z4x5x4t25 += (t4t0bbt11 & 0xffff) * (et0aat14 & 0xffff)
2189int32323232 z4x5x4t26 += (t4t0bbt12 & 0xffff) * (et0aat14 & 0xffff)
2190int32323232 z4x5x4t27 += (t4t0bbt14 & 0xffff) * (et0aat13 & 0xffff)
2191int32323232 z4x5x4t28 += (t4t0bbt15 & 0xffff) * (et0aat13 & 0xffff)
2192int32323232 z4x5x4t29 += (t4t0bbt16 & 0xffff) * (et0aat13 & 0xffff)
2193int32323232 z4x5x4t210 += (t4t0bbt17  & 0xffff) * (et0aat13  & 0xffff)
2194int32323232 z4x5x4t211  = (t4t0bbt111 & 0xffff) * (et0aat10  & 0xffff)
2195int32323232 z4x5x4t24 += (t4t0bbt14 & 0xffff) * (et0aat10 & 0xffff)
2196int32323232 z4x5x4t25 += (t4t0bbt14 & 0xffff) * (et0aat11 & 0xffff)
2197int32323232 z4x5x4t26 += (t4t0bbt14 & 0xffff) * (et0aat12 & 0xffff)
2198int32323232 z4x5x4t27 += (t4t0bbt15 & 0xffff) * (et0aat12 & 0xffff)
2199int32323232 z4x5x4t28 += (t4t0bbt16 & 0xffff) * (et0aat12 & 0xffff)
2200int32323232 z4x5x4t29 += (t4t0bbt17 & 0xffff) * (et0aat12 & 0xffff)
2201int32323232 z4x5x4t210 <<= 1
2202int32323232 z4x5x4t211 += (t4t0bbt110 & 0xffff) * (et0aat11  & 0xffff)
2203int32323232 z4x5x4t212  = (t4t0bbt11  & 0xffff) * (et0aat111 & 0xffff)
2204int32323232 z4x5x4t25 += (t4t0bbt15 & 0xffff) * (et0aat10 & 0xffff)
2205int32323232 z4x5x4t26 += (t4t0bbt15 & 0xffff) * (et0aat11 & 0xffff)
2206int32323232 z4x5x4t27 += (t4t0bbt16 & 0xffff) * (et0aat11 & 0xffff)
2207int32323232 z4x5x4t28 += (t4t0bbt17 & 0xffff) * (et0aat11 & 0xffff)
2208int32323232 z4x5x4t29 <<= 1
2209int32323232 z4x5x4t210 += (t4t0bbt10  & 0xffff) * (et0aat110 & 0xffff)
2210int32323232 z4x5x4t211 += (t4t0bbt19  & 0xffff) * (et0aat12  & 0xffff)
2211int32323232 z4x5x4t212 += (t4t0bbt12  & 0xffff) * (et0aat110 & 0xffff)
2212int32323232 z4x5x4t26 += (t4t0bbt16 & 0xffff) * (et0aat10 & 0xffff)
2213int32323232 z4x5x4t27 += (t4t0bbt17 & 0xffff) * (et0aat10 & 0xffff)
2214int32323232 z4x5x4t28 <<= 1
2215int32323232 z4x5x4t29 += (t4t0bbt10 & 0xffff) * (et0aat19 & 0xffff)
2216int32323232 z4x5x4t210 += (t4t0bbt11  & 0xffff) * (et0aat19  & 0xffff)
2217int32323232 z4x5x4t211 += (t4t0bbt18  & 0xffff) * (et0aat13  & 0xffff)
2218int32323232 z4x5x4t212 += (t4t0bbt13  & 0xffff) * (et0aat19  & 0xffff)
2219int32323232 z4x5x4t213  = (t4t0bbt12  & 0xffff) * (et0aat111 & 0xffff)
2220int32323232 z4x5x4t214  = (t4t0bbt13  & 0xffff) * (et0aat111 & 0xffff)
2221int32323232 z4x5x4t28 += (t4t0bbt10 & 0xffff) * (et0aat18 & 0xffff)
2222int32323232 z4x5x4t29 += (t4t0bbt11 & 0xffff) * (et0aat18 & 0xffff)
2223int32323232 z4x5x4t210 += (t4t0bbt12  & 0xffff) * (et0aat18  & 0xffff)
2224int32323232 z4x5x4t211 += (t4t0bbt17  & 0xffff) * (et0aat14  & 0xffff)
2225int32323232 z4x5x4t212 += (t4t0bbt15  & 0xffff) * (et0aat17  & 0xffff)
2226int32323232 z4x5x4t213 += (t4t0bbt13  & 0xffff) * (et0aat110 & 0xffff)
2227int32323232 z4x5x4t214 += (t4t0bbt17  & 0xffff) * (et0aat17  & 0xffff)
2228int32323232 z4x5x4t28 += (t4t0bbt14 & 0xffff) * (et0aat14 & 0xffff)
2229int32323232 z4x5x4t29 += (t4t0bbt14 & 0xffff) * (et0aat15 & 0xffff)
2230int32323232 z4x5x4t210 += (t4t0bbt14  & 0xffff) * (et0aat16  & 0xffff)
2231int32323232 z4x5x4t211 += (t4t0bbt16  & 0xffff) * (et0aat15  & 0xffff)
2232int32323232 z4x5x4t212 += (t4t0bbt16  & 0xffff) * (et0aat16  & 0xffff)
2233int32323232 z4x5x4t213 += (t4t0bbt16  & 0xffff) * (et0aat17  & 0xffff)
2234int32323232 z4x5x4t214 += (t4t0bbt111 & 0xffff) * (et0aat13  & 0xffff)
2235int32323232 z4x5x4t28 += (t4t0bbt18 & 0xffff) * (et0aat10 & 0xffff)
2236int32323232 z4x5x4t29 += (t4t0bbt15 & 0xffff) * (et0aat14 & 0xffff)
2237int32323232 z4x5x4t210 += (t4t0bbt15  & 0xffff) * (et0aat15  & 0xffff)
2238int32323232 z4x5x4t211 += (t4t0bbt15  & 0xffff) * (et0aat16  & 0xffff)
2239int32323232 z4x5x4t212 += (t4t0bbt17  & 0xffff) * (et0aat15  & 0xffff)
2240int32323232 z4x5x4t213 += (t4t0bbt17  & 0xffff) * (et0aat16  & 0xffff)
2241int32323232 z4x5x4t214 <<= 1
2242int32323232 z4x5x4t215  = (t4t0bbt10  & 0xffff) * (et0aat115 & 0xffff)
2243int32323232 z4x5x4t29 += (t4t0bbt18 & 0xffff) * (et0aat11 & 0xffff)
2244int32323232 z4x5x4t210 += (t4t0bbt16  & 0xffff) * (et0aat14  & 0xffff)
2245int32323232 z4x5x4t211 += (t4t0bbt14  & 0xffff) * (et0aat17  & 0xffff)
2246int32323232 z4x5x4t212 += (t4t0bbt19  & 0xffff) * (et0aat13  & 0xffff)
2247int32323232 z4x5x4t213 += (t4t0bbt110 & 0xffff) * (et0aat13  & 0xffff)
2248int32323232 z4x5x4t214 += (t4t0bbt10  & 0xffff) * (et0aat114 & 0xffff)
2249int32323232 z4x5x4t215 += (t4t0bbt11  & 0xffff) * (et0aat114 & 0xffff)
2250int32323232 z4x5x4t29 += (t4t0bbt19 & 0xffff) * (et0aat10 & 0xffff)
2251int32323232 z4x5x4t210 += (t4t0bbt18  & 0xffff) * (et0aat12  & 0xffff)
2252int32323232 z4x5x4t211 += (t4t0bbt13  & 0xffff) * (et0aat18  & 0xffff)
2253int32323232 z4x5x4t212 += (t4t0bbt110 & 0xffff) * (et0aat12  & 0xffff)
2254int32323232 z4x5x4t213 += (t4t0bbt111 & 0xffff) * (et0aat12  & 0xffff)
2255int32323232 z4x5x4t214 += (t4t0bbt11  & 0xffff) * (et0aat113 & 0xffff)
2256int32323232 z4x5x4t215 += (t4t0bbt12  & 0xffff) * (et0aat113 & 0xffff)
2257int32323232 z4x5x4t216  = (t4t0bbt11  & 0xffff) * (et0aat115 & 0xffff)
2258int32323232 z4x5x4t210 += (t4t0bbt19  & 0xffff) * (et0aat11  & 0xffff)
2259int32323232 z4x5x4t211 += (t4t0bbt12  & 0xffff) * (et0aat19  & 0xffff)
2260int32323232 z4x5x4t212 += (t4t0bbt111 & 0xffff) * (et0aat11  & 0xffff)
2261int32323232 z4x5x4t213 <<= 1
2262int32323232 z4x5x4t214 += (t4t0bbt12  & 0xffff) * (et0aat112 & 0xffff)
2263int32323232 z4x5x4t215 += (t4t0bbt13  & 0xffff) * (et0aat112 & 0xffff)
2264int32323232 z4x5x4t216 += (t4t0bbt12  & 0xffff) * (et0aat114 & 0xffff)
2265int32323232 z4x5x4t210 += (t4t0bbt110 & 0xffff) * (et0aat10  & 0xffff)
2266int32323232 z4x5x4t211 += (t4t0bbt11  & 0xffff) * (et0aat110 & 0xffff)
2267int32323232 z4x5x4t212 <<= 1
2268int32323232 z4x5x4t213 += (t4t0bbt10  & 0xffff) * (et0aat113 & 0xffff)
2269int32323232 z4x5x4t214 += (t4t0bbt14  & 0xffff) * (et0aat110 & 0xffff)
2270int32323232 z4x5x4t215 += (t4t0bbt14  & 0xffff) * (et0aat111 & 0xffff)
2271int32323232 z4x5x4t216 += (t4t0bbt13  & 0xffff) * (et0aat113 & 0xffff)
2272int32323232 z4x5x4t217  = (t4t0bbt12  & 0xffff) * (et0aat115 & 0xffff)
2273int32323232 z4x5x4t211 += (t4t0bbt10  & 0xffff) * (et0aat111 & 0xffff)
2274int32323232 z4x5x4t212 += (t4t0bbt10  & 0xffff) * (et0aat112 & 0xffff)
2275int32323232 z4x5x4t213 += (t4t0bbt11  & 0xffff) * (et0aat112 & 0xffff)
2276int32323232 z4x5x4t214 += (t4t0bbt15  & 0xffff) * (et0aat19  & 0xffff)
2277int32323232 z4x5x4t215 += (t4t0bbt15  & 0xffff) * (et0aat110 & 0xffff)
2278int32323232 z4x5x4t216 += (t4t0bbt15  & 0xffff) * (et0aat111 & 0xffff)
2279int32323232 z4x5x4t217 += (t4t0bbt13  & 0xffff) * (et0aat114 & 0xffff)
2280int32323232 z4x5x4t218  = (t4t0bbt13  & 0xffff) * (et0aat115 & 0xffff)
2281int32323232 z4x5x4t212 += (t4t0bbt14  & 0xffff) * (et0aat18  & 0xffff)
2282int32323232 z4x5x4t213 += (t4t0bbt14  & 0xffff) * (et0aat19  & 0xffff)
2283int32323232 z4x5x4t214 += (t4t0bbt16  & 0xffff) * (et0aat18  & 0xffff)
2284int32323232 z4x5x4t215 += (t4t0bbt16  & 0xffff) * (et0aat19  & 0xffff)
2285int32323232 z4x5x4t216 += (t4t0bbt16  & 0xffff) * (et0aat110 & 0xffff)
2286int32323232 z4x5x4t217 += (t4t0bbt16  & 0xffff) * (et0aat111 & 0xffff)
2287int32323232 z4x5x4t218 += (t4t0bbt17  & 0xffff) * (et0aat111 & 0xffff)
2288int32323232 z4x5x4t212 += (t4t0bbt18  & 0xffff) * (et0aat14  & 0xffff)
2289int32323232 z4x5x4t213 += (t4t0bbt15  & 0xffff) * (et0aat18  & 0xffff)
2290int32323232 z4x5x4t214 += (t4t0bbt18  & 0xffff) * (et0aat16  & 0xffff)
2291int32323232 z4x5x4t215 += (t4t0bbt17  & 0xffff) * (et0aat18  & 0xffff)
2292int32323232 z4x5x4t216 += (t4t0bbt17  & 0xffff) * (et0aat19  & 0xffff)
2293int32323232 z4x5x4t217 += (t4t0bbt17  & 0xffff) * (et0aat110 & 0xffff)
2294int32323232 z4x5x4t218 += (t4t0bbt111 & 0xffff) * (et0aat17  & 0xffff)
2295int32323232 z4x5x4t212 += (t4t0bbt112 & 0xffff) * (et0aat10  & 0xffff)
2296int32323232 z4x5x4t213 += (t4t0bbt18  & 0xffff) * (et0aat15  & 0xffff)
2297int32323232 z4x5x4t214 += (t4t0bbt19  & 0xffff) * (et0aat15  & 0xffff)
2298int32323232 z4x5x4t215 += (t4t0bbt18  & 0xffff) * (et0aat17  & 0xffff)
2299int32323232 z4x5x4t216 += (t4t0bbt19  & 0xffff) * (et0aat17  & 0xffff)
2300int32323232 z4x5x4t217 += (t4t0bbt110 & 0xffff) * (et0aat17  & 0xffff)
2301int32323232 z4x5x4t218 += (t4t0bbt115 & 0xffff) * (et0aat13  & 0xffff)
2302int32323232 z4x5x4t219  = (t4t0bbt10  & 0xffff) * (et0aat119 & 0xffff)
2303int32323232 z4x5x4t213 += (t4t0bbt19  & 0xffff) * (et0aat14  & 0xffff)
2304int32323232 z4x5x4t214 += (t4t0bbt110 & 0xffff) * (et0aat14  & 0xffff)
2305int32323232 z4x5x4t215 += (t4t0bbt19  & 0xffff) * (et0aat16  & 0xffff)
2306int32323232 z4x5x4t216 += (t4t0bbt110 & 0xffff) * (et0aat16  & 0xffff)
2307int32323232 z4x5x4t217 += (t4t0bbt111 & 0xffff) * (et0aat16  & 0xffff)
2308int32323232 z4x5x4t218 <<= 1
2309int32323232 z4x5x4t219 += (t4t0bbt11  & 0xffff) * (et0aat118 & 0xffff)
2310int32323232 z4x5x4t213 += (t4t0bbt112 & 0xffff) * (et0aat11  & 0xffff)
2311int32323232 z4x5x4t214 += (t4t0bbt112 & 0xffff) * (et0aat12  & 0xffff)
2312int32323232 z4x5x4t215 += (t4t0bbt110 & 0xffff) * (et0aat15  & 0xffff)
2313int32323232 z4x5x4t216 += (t4t0bbt111 & 0xffff) * (et0aat15  & 0xffff)
2314int32323232 z4x5x4t217 += (t4t0bbt114 & 0xffff) * (et0aat13  & 0xffff)
2315int32323232 z4x5x4t218 += (t4t0bbt10  & 0xffff) * (et0aat118 & 0xffff)
2316int32323232 z4x5x4t219 += (t4t0bbt12  & 0xffff) * (et0aat117 & 0xffff)
2317int32323232 z4x5x4t213 += (t4t0bbt113 & 0xffff) * (et0aat10  & 0xffff)
2318int32323232 z4x5x4t214 += (t4t0bbt113 & 0xffff) * (et0aat11  & 0xffff)
2319int32323232 z4x5x4t215 += (t4t0bbt111 & 0xffff) * (et0aat14  & 0xffff)
2320int32323232 z4x5x4t216 += (t4t0bbt113 & 0xffff) * (et0aat13  & 0xffff)
2321int32323232 z4x5x4t217 += (t4t0bbt115 & 0xffff) * (et0aat12  & 0xffff)
2322int32323232 z4x5x4t218 += (t4t0bbt11  & 0xffff) * (et0aat117 & 0xffff)
2323int32323232 z4x5x4t219 += (t4t0bbt13  & 0xffff) * (et0aat116 & 0xffff)
2324int32323232 z4x5x4t220  = (t4t0bbt11  & 0xffff) * (et0aat119 & 0xffff)
2325int32323232 z4x5x4t214 += (t4t0bbt114 & 0xffff) * (et0aat10  & 0xffff)
2326int32323232 z4x5x4t215 += (t4t0bbt112 & 0xffff) * (et0aat13  & 0xffff)
2327int32323232 z4x5x4t216 += (t4t0bbt114 & 0xffff) * (et0aat12  & 0xffff)
2328int32323232 z4x5x4t217 <<= 1
2329int32323232 z4x5x4t218 += (t4t0bbt12  & 0xffff) * (et0aat116 & 0xffff)
2330int32323232 z4x5x4t219 += (t4t0bbt14  & 0xffff) * (et0aat115 & 0xffff)
2331int32323232 z4x5x4t220 += (t4t0bbt12  & 0xffff) * (et0aat118 & 0xffff)
2332int32323232 z4x5x4t221  = (t4t0bbt12  & 0xffff) * (et0aat119 & 0xffff)
2333int32323232 z4x5x4t215 += (t4t0bbt113 & 0xffff) * (et0aat12  & 0xffff)
2334int32323232 z4x5x4t216 += (t4t0bbt115 & 0xffff) * (et0aat11  & 0xffff)
2335int32323232 z4x5x4t217 += (t4t0bbt10  & 0xffff) * (et0aat117 & 0xffff)
2336int32323232 z4x5x4t218 += (t4t0bbt14  & 0xffff) * (et0aat114 & 0xffff)
2337int32323232 z4x5x4t219 += (t4t0bbt15  & 0xffff) * (et0aat114 & 0xffff)
2338int32323232 z4x5x4t220 += (t4t0bbt13  & 0xffff) * (et0aat117 & 0xffff)
2339int32323232 z4x5x4t221 += (t4t0bbt13  & 0xffff) * (et0aat118 & 0xffff)
2340int32323232 z4x5x4t215 += (t4t0bbt114 & 0xffff) * (et0aat11  & 0xffff)
2341int32323232 z4x5x4t216 <<= 1
2342int32323232 z4x5x4t217 += (t4t0bbt11  & 0xffff) * (et0aat116 & 0xffff)
2343int32323232 z4x5x4t218 += (t4t0bbt15  & 0xffff) * (et0aat113 & 0xffff)
2344int32323232 z4x5x4t219 += (t4t0bbt16  & 0xffff) * (et0aat113 & 0xffff)
2345int32323232 z4x5x4t220 += (t4t0bbt15  & 0xffff) * (et0aat115 & 0xffff)
2346int32323232 z4x5x4t221 += (t4t0bbt16  & 0xffff) * (et0aat115 & 0xffff)
2347int32323232 z4x5x4t215 += (t4t0bbt115 & 0xffff) * (et0aat10  & 0xffff)
2348int32323232 z4x5x4t216 += (t4t0bbt10  & 0xffff) * (et0aat116 & 0xffff)
2349int32323232 z4x5x4t217 += (t4t0bbt14  & 0xffff) * (et0aat113 & 0xffff)
2350int32323232 z4x5x4t218 += (t4t0bbt16  & 0xffff) * (et0aat112 & 0xffff)
2351int32323232 z4x5x4t219 += (t4t0bbt17  & 0xffff) * (et0aat112 & 0xffff)
2352int32323232 z4x5x4t220 += (t4t0bbt16  & 0xffff) * (et0aat114 & 0xffff)
2353int32323232 z4x5x4t221 += (t4t0bbt17  & 0xffff) * (et0aat114 & 0xffff)
2354int32323232 z4x5x4t222  = (t4t0bbt13  & 0xffff) * (et0aat119 & 0xffff)
2355int32323232 z4x5x4t216 += (t4t0bbt14  & 0xffff) * (et0aat112 & 0xffff)
2356int32323232 z4x5x4t217 += (t4t0bbt15  & 0xffff) * (et0aat112 & 0xffff)
2357int32323232 z4x5x4t218 += (t4t0bbt18  & 0xffff) * (et0aat110 & 0xffff)
2358int32323232 z4x5x4t219 += (t4t0bbt18  & 0xffff) * (et0aat111 & 0xffff)
2359int32323232 z4x5x4t220 += (t4t0bbt17  & 0xffff) * (et0aat113 & 0xffff)
2360int32323232 z4x5x4t221 += (t4t0bbt110 & 0xffff) * (et0aat111 & 0xffff)
2361int32323232 z4x5x4t222 += (t4t0bbt17  & 0xffff) * (et0aat115 & 0xffff)
2362int32323232 z4x5x4t216 += (t4t0bbt18  & 0xffff) * (et0aat18  & 0xffff)
2363int32323232 z4x5x4t217 += (t4t0bbt18  & 0xffff) * (et0aat19  & 0xffff)
2364int32323232 z4x5x4t218 += (t4t0bbt19  & 0xffff) * (et0aat19  & 0xffff)
2365int32323232 z4x5x4t219 += (t4t0bbt19  & 0xffff) * (et0aat110 & 0xffff)
2366int32323232 z4x5x4t220 += (t4t0bbt19  & 0xffff) * (et0aat111 & 0xffff)
2367int32323232 z4x5x4t221 += (t4t0bbt111 & 0xffff) * (et0aat110 & 0xffff)
2368int32323232 z4x5x4t222 += (t4t0bbt111 & 0xffff) * (et0aat111 & 0xffff)
2369int32323232 z4x5x4t216 += (t4t0bbt112 & 0xffff) * (et0aat14  & 0xffff)
2370int32323232 z4x5x4t217 += (t4t0bbt19  & 0xffff) * (et0aat18  & 0xffff)
2371int32323232 z4x5x4t218 += (t4t0bbt110 & 0xffff) * (et0aat18  & 0xffff)
2372int32323232 z4x5x4t219 += (t4t0bbt110 & 0xffff) * (et0aat19  & 0xffff)
2373int32323232 z4x5x4t220 += (t4t0bbt110 & 0xffff) * (et0aat110 & 0xffff)
2374int32323232 z4x5x4t221 += (t4t0bbt114 & 0xffff) * (et0aat17  & 0xffff)
2375int32323232 z4x5x4t222 += (t4t0bbt115 & 0xffff) * (et0aat17  & 0xffff)
2376int32323232 z4x5x4t216 += (t4t0bbt116 & 0xffff) * (et0aat10  & 0xffff)
2377int32323232 z4x5x4t217 += (t4t0bbt112 & 0xffff) * (et0aat15  & 0xffff)
2378int32323232 z4x5x4t218 += (t4t0bbt112 & 0xffff) * (et0aat16  & 0xffff)
2379int32323232 z4x5x4t219 += (t4t0bbt111 & 0xffff) * (et0aat18  & 0xffff)
2380int32323232 z4x5x4t220 += (t4t0bbt111 & 0xffff) * (et0aat19  & 0xffff)
2381int32323232 z4x5x4t221 += (t4t0bbt115 & 0xffff) * (et0aat16  & 0xffff)
2382int32323232 z4x5x4t222 += (t4t0bbt119 & 0xffff) * (et0aat13  & 0xffff)
2383int32323232 z4x5x4t223  = (t4t0bbt14  & 0xffff) * (et0aat119 & 0xffff)
2384int32323232 z4x5x4t217 += (t4t0bbt113 & 0xffff) * (et0aat14  & 0xffff)
2385int32323232 z4x5x4t218 += (t4t0bbt113 & 0xffff) * (et0aat15  & 0xffff)
2386int32323232 z4x5x4t219 += (t4t0bbt112 & 0xffff) * (et0aat17  & 0xffff)
2387int32323232 z4x5x4t220 += (t4t0bbt113 & 0xffff) * (et0aat17  & 0xffff)
2388int32323232 z4x5x4t221 += (t4t0bbt118 & 0xffff) * (et0aat13  & 0xffff)
2389int32323232 z4x5x4t222 <<= 1
2390int32323232 z4x5x4t223 += (t4t0bbt15  & 0xffff) * (et0aat118 & 0xffff)
2391int32323232 z4x5x4t217 += (t4t0bbt116 & 0xffff) * (et0aat11  & 0xffff)
2392int32323232 z4x5x4t218 += (t4t0bbt114 & 0xffff) * (et0aat14  & 0xffff)
2393int32323232 z4x5x4t219 += (t4t0bbt113 & 0xffff) * (et0aat16  & 0xffff)
2394int32323232 z4x5x4t220 += (t4t0bbt114 & 0xffff) * (et0aat16  & 0xffff)
2395int32323232 z4x5x4t221 += (t4t0bbt119 & 0xffff) * (et0aat12  & 0xffff)
2396int32323232 z4x5x4t222 += (t4t0bbt14  & 0xffff) * (et0aat118 & 0xffff)
2397int32323232 z4x5x4t223 += (t4t0bbt16  & 0xffff) * (et0aat117 & 0xffff)
2398int32323232 z4x5x4t217 += (t4t0bbt117 & 0xffff) * (et0aat10  & 0xffff)
2399int32323232 z4x5x4t218 += (t4t0bbt116 & 0xffff) * (et0aat12  & 0xffff)
2400int32323232 z4x5x4t219 += (t4t0bbt114 & 0xffff) * (et0aat15  & 0xffff)
2401int32323232 z4x5x4t220 += (t4t0bbt115 & 0xffff) * (et0aat15  & 0xffff)
2402int32323232 z4x5x4t221 <<= 1
2403int32323232 z4x5x4t222 += (t4t0bbt15  & 0xffff) * (et0aat117 & 0xffff)
2404int32323232 z4x5x4t223 += (t4t0bbt17  & 0xffff) * (et0aat116 & 0xffff)
2405int32323232 z4x5x4t224  = (t4t0bbt15  & 0xffff) * (et0aat119  & 0xffff)
2406int32323232 z4x5x4t218 += (t4t0bbt117 & 0xffff) * (et0aat11  & 0xffff)
2407int32323232 z4x5x4t219 += (t4t0bbt115 & 0xffff) * (et0aat14  & 0xffff)
2408int32323232 z4x5x4t220 += (t4t0bbt117 & 0xffff) * (et0aat13  & 0xffff)
2409int32323232 z4x5x4t221 += (t4t0bbt14  & 0xffff) * (et0aat117  & 0xffff)
2410int32323232 z4x5x4t222 += (t4t0bbt16  & 0xffff) * (et0aat116 & 0xffff)
2411int32323232 z4x5x4t223 += (t4t0bbt18  & 0xffff) * (et0aat115 & 0xffff)
2412int32323232 z4x5x4t224 += (t4t0bbt16  & 0xffff) * (et0aat118  & 0xffff)
2413int32323232 z4x5x4t218 += (t4t0bbt118 & 0xffff) * (et0aat10  & 0xffff)
2414int32323232 z4x5x4t219 += (t4t0bbt116 & 0xffff) * (et0aat13  & 0xffff)
2415int32323232 z4x5x4t220 += (t4t0bbt118 & 0xffff) * (et0aat12  & 0xffff)
2416int32323232 z4x5x4t221 += (t4t0bbt15  & 0xffff) * (et0aat116  & 0xffff)
2417int32323232 z4x5x4t222 += (t4t0bbt18  & 0xffff) * (et0aat114 & 0xffff)
2418int32323232 z4x5x4t223 += (t4t0bbt19  & 0xffff) * (et0aat114 & 0xffff)
2419int32323232 z4x5x4t224 += (t4t0bbt17  & 0xffff) * (et0aat117  & 0xffff)
2420int32323232 z4x5x4t225  = (t4t0bbt16  & 0xffff) * (et0aat119 & 0xffff)
2421int32323232 z4x5x4t219 += (t4t0bbt117 & 0xffff) * (et0aat12  & 0xffff)
2422int32323232 z4x5x4t220 += (t4t0bbt119 & 0xffff) * (et0aat11  & 0xffff)
2423int32323232 z4x5x4t221 += (t4t0bbt18  & 0xffff) * (et0aat113  & 0xffff)
2424int32323232 z4x5x4t222 += (t4t0bbt19  & 0xffff) * (et0aat113 & 0xffff)
2425int32323232 z4x5x4t223 += (t4t0bbt110 & 0xffff) * (et0aat113 & 0xffff)
2426int32323232 z4x5x4t224 += (t4t0bbt19  & 0xffff) * (et0aat115  & 0xffff)
2427int32323232 z4x5x4t225 += (t4t0bbt17  & 0xffff) * (et0aat118 & 0xffff)
2428int32323232 z4x5x4t219 += (t4t0bbt118 & 0xffff) * (et0aat11  & 0xffff)
2429int32323232 z4x5x4t220 <<= 1
2430int32323232 z4x5x4t221 += (t4t0bbt19  & 0xffff) * (et0aat112  & 0xffff)
2431int32323232 z4x5x4t222 += (t4t0bbt110 & 0xffff) * (et0aat112 & 0xffff)
2432int32323232 z4x5x4t223 += (t4t0bbt111 & 0xffff) * (et0aat112 & 0xffff)
2433int32323232 z4x5x4t224 += (t4t0bbt110 & 0xffff) * (et0aat114  & 0xffff)
2434int32323232 z4x5x4t225 += (t4t0bbt110 & 0xffff) * (et0aat115 & 0xffff)
2435int32323232 z4x5x4t219 += (t4t0bbt119 & 0xffff) * (et0aat10  & 0xffff)
2436int32323232 z4x5x4t220 += (t4t0bbt14  & 0xffff) * (et0aat116 & 0xffff)
2437int32323232 z4x5x4t221 += (t4t0bbt112 & 0xffff) * (et0aat19  & 0xffff)
2438int32323232 z4x5x4t222 += (t4t0bbt112 & 0xffff) * (et0aat110 & 0xffff)
2439int32323232 z4x5x4t223 += (t4t0bbt112 & 0xffff) * (et0aat111 & 0xffff)
2440int32323232 z4x5x4t224 += (t4t0bbt111 & 0xffff) * (et0aat113  & 0xffff)
2441int32323232 z4x5x4t225 += (t4t0bbt111 & 0xffff) * (et0aat114 & 0xffff)
2442int32323232 z4x5x4t226  = (t4t0bbt17  & 0xffff) * (et0aat119 & 0xffff)
2443int32323232 z4x5x4t220 += (t4t0bbt18  & 0xffff) * (et0aat112 & 0xffff)
2444int32323232 z4x5x4t221 += (t4t0bbt113 & 0xffff) * (et0aat18  & 0xffff)
2445int32323232 z4x5x4t222 += (t4t0bbt113 & 0xffff) * (et0aat19  & 0xffff)
2446int32323232 z4x5x4t223 += (t4t0bbt113 & 0xffff) * (et0aat110 & 0xffff)
2447int32323232 z4x5x4t224 += (t4t0bbt113 & 0xffff) * (et0aat111  & 0xffff)
2448int32323232 z4x5x4t225 += (t4t0bbt114 & 0xffff) * (et0aat111 & 0xffff)
2449int32323232 z4x5x4t226 += (t4t0bbt111 & 0xffff) * (et0aat115 & 0xffff)
2450int32323232 z4x5x4t220 += (t4t0bbt112 & 0xffff) * (et0aat18  & 0xffff)
2451int32323232 z4x5x4t221 += (t4t0bbt116 & 0xffff) * (et0aat15  & 0xffff)
2452int32323232 z4x5x4t222 += (t4t0bbt114 & 0xffff) * (et0aat18  & 0xffff)
2453int32323232 z4x5x4t223 += (t4t0bbt114 & 0xffff) * (et0aat19  & 0xffff)
2454int32323232 z4x5x4t224 += (t4t0bbt114 & 0xffff) * (et0aat110  & 0xffff)
2455int32323232 z4x5x4t225 += (t4t0bbt115 & 0xffff) * (et0aat110 & 0xffff)
2456int32323232 z4x5x4t226 += (t4t0bbt115 & 0xffff) * (et0aat111 & 0xffff)
2457int32323232 z4x5x4t220 += (t4t0bbt116 & 0xffff) * (et0aat14  & 0xffff)
2458int32323232 z4x5x4t221 += (t4t0bbt117 & 0xffff) * (et0aat14  & 0xffff)
2459int32323232 z4x5x4t222 += (t4t0bbt116 & 0xffff) * (et0aat16  & 0xffff)
2460int32323232 z4x5x4t223 += (t4t0bbt115 & 0xffff) * (et0aat18  & 0xffff)
2461int32323232 z4x5x4t224 += (t4t0bbt115 & 0xffff) * (et0aat19  & 0xffff)
2462int32323232 z4x5x4t225 += (t4t0bbt118 & 0xffff) * (et0aat17  & 0xffff)
2463int32323232 z4x5x4t226 += (t4t0bbt119 & 0xffff) * (et0aat17  & 0xffff)
2464int32323232 z4x5x4t227  = (t4t0bbt18  & 0xffff) * (et0aat119 & 0xffff)
2465int32323232 z4x5x4t228  = (t4t0bbt19  & 0xffff) * (et0aat119 & 0xffff)
2466int32323232 z4x5x4t222 += (t4t0bbt117 & 0xffff) * (et0aat15  & 0xffff)
2467int32323232 z4x5x4t223 += (t4t0bbt116 & 0xffff) * (et0aat17  & 0xffff)
2468int32323232 z4x5x4t224 += (t4t0bbt117 & 0xffff) * (et0aat17  & 0xffff)
2469int32323232 z4x5x4t225 += (t4t0bbt119 & 0xffff) * (et0aat16  & 0xffff)
2470int32323232 z4x5x4t226 <<= 1
2471int32323232 z4x5x4t227 += (t4t0bbt19  & 0xffff) * (et0aat118 & 0xffff)
2472int32323232 z4x5x4t228 += (t4t0bbt110 & 0xffff) * (et0aat118 & 0xffff)
2473int32323232 z4x5x4t222 += (t4t0bbt118 & 0xffff) * (et0aat14  & 0xffff)
2474int32323232 z4x5x4t223 += (t4t0bbt117 & 0xffff) * (et0aat16  & 0xffff)
2475int32323232 z4x5x4t224 += (t4t0bbt118 & 0xffff) * (et0aat16  & 0xffff)
2476int32323232 z4x5x4t225 <<= 1
2477int32323232 z4x5x4t226 += (t4t0bbt18  & 0xffff) * (et0aat118 & 0xffff)
2478int32323232 z4x5x4t227 += (t4t0bbt110 & 0xffff) * (et0aat117 & 0xffff)
2479int32323232 z4x5x4t228 += (t4t0bbt111 & 0xffff) * (et0aat117 & 0xffff)
2480int32323232 z4x5x4t229  = (t4t0bbt110 & 0xffff) * (et0aat119 & 0xffff)
2481int32323232 z4x5x4t223 += (t4t0bbt118 & 0xffff) * (et0aat15  & 0xffff)
2482int32323232 z4x5x4t224 += (t4t0bbt119 & 0xffff) * (et0aat15  & 0xffff)
2483int32323232 z4x5x4t225 += (t4t0bbt18  & 0xffff) * (et0aat117 & 0xffff)
2484int32323232 z4x5x4t226 += (t4t0bbt19  & 0xffff) * (et0aat117 & 0xffff)
2485int32323232 z4x5x4t227 += (t4t0bbt111 & 0xffff) * (et0aat116 & 0xffff)
2486int32323232 z4x5x4t228 += (t4t0bbt113 & 0xffff) * (et0aat115 & 0xffff)
2487int32323232 z4x5x4t229 += (t4t0bbt111 & 0xffff) * (et0aat118 & 0xffff)
2488int32323232 z4x5x4t223 += (t4t0bbt119 & 0xffff) * (et0aat14  & 0xffff)
2489int32323232 z4x5x4t224 <<= 1
2490int32323232 z4x5x4t225 += (t4t0bbt19  & 0xffff) * (et0aat116 & 0xffff)
2491int32323232 z4x5x4t226 += (t4t0bbt110 & 0xffff) * (et0aat116 & 0xffff)
2492int32323232 z4x5x4t227 += (t4t0bbt112 & 0xffff) * (et0aat115 & 0xffff)
2493int32323232 z4x5x4t228 += (t4t0bbt114 & 0xffff) * (et0aat114 & 0xffff)
2494int32323232 z4x5x4t229 += (t4t0bbt114 & 0xffff) * (et0aat115 & 0xffff)
2495int32323232 z4x5x4t230  = (t4t0bbt111 & 0xffff) * (et0aat119 & 0xffff)
2496int32323232 z4x5x4t224 += (t4t0bbt18  & 0xffff) * (et0aat116 & 0xffff)
2497int32323232 z4x5x4t225 += (t4t0bbt112 & 0xffff) * (et0aat113 & 0xffff)
2498int32323232 z4x5x4t226 += (t4t0bbt112 & 0xffff) * (et0aat114 & 0xffff)
2499int32323232 z4x5x4t227 += (t4t0bbt113 & 0xffff) * (et0aat114 & 0xffff)
2500int32323232 z4x5x4t228 += (t4t0bbt115 & 0xffff) * (et0aat113 & 0xffff)
2501int32323232 z4x5x4t229 += (t4t0bbt115 & 0xffff) * (et0aat114 & 0xffff)
2502int32323232 z4x5x4t230 += (t4t0bbt115 & 0xffff) * (et0aat115 & 0xffff)
2503int32323232 z4x5x4t224 += (t4t0bbt112 & 0xffff) * (et0aat112 & 0xffff)
2504int32323232 z4x5x4t225 += (t4t0bbt113 & 0xffff) * (et0aat112 & 0xffff)
2505int32323232 z4x5x4t226 += (t4t0bbt113 & 0xffff) * (et0aat113 & 0xffff)
2506int32323232 z4x5x4t227 += (t4t0bbt114 & 0xffff) * (et0aat113 & 0xffff)
2507int32323232 z4x5x4t228 += (t4t0bbt117 & 0xffff) * (et0aat111 & 0xffff)
2508int32323232 z4x5x4t229 += (t4t0bbt118 & 0xffff) * (et0aat111 & 0xffff)
2509int32323232 z4x5x4t230 += (t4t0bbt119 & 0xffff) * (et0aat111 & 0xffff)
2510int32323232 z4x5x4t224 += (t4t0bbt116 & 0xffff) * (et0aat18  & 0xffff)
2511int32323232 z4x5x4t225 += (t4t0bbt116 & 0xffff) * (et0aat19  & 0xffff)
2512int32323232 z4x5x4t226 += (t4t0bbt114 & 0xffff) * (et0aat112 & 0xffff)
2513int32323232 z4x5x4t227 += (t4t0bbt115 & 0xffff) * (et0aat112 & 0xffff)
2514int32323232 z4x5x4t228 += (t4t0bbt118 & 0xffff) * (et0aat110 & 0xffff)
2515int32323232 z4x5x4t229 += (t4t0bbt119 & 0xffff) * (et0aat110 & 0xffff)
2516int32323232 z4x5x4t230 <<= 1
2517int32323232 z4x5x4t231  = (t4t0bbt112 & 0xffff) * (et0aat119 & 0xffff)
2518int32323232 z4x5x4t225 += (t4t0bbt117 & 0xffff) * (et0aat18  & 0xffff)
2519int32323232 z4x5x4t226 += (t4t0bbt116 & 0xffff) * (et0aat110 & 0xffff)
2520int32323232 z4x5x4t227 += (t4t0bbt116 & 0xffff) * (et0aat111 & 0xffff)
2521int32323232 z4x5x4t228 += (t4t0bbt119 & 0xffff) * (et0aat19  & 0xffff)
2522int32323232 z4x5x4t229 <<= 1
2523int32323232 z4x5x4t230 += (t4t0bbt112 & 0xffff) * (et0aat118 & 0xffff)
2524int32323232 z4x5x4t231 += (t4t0bbt113 & 0xffff) * (et0aat118 & 0xffff)
2525int32323232 z4x5x4t232  = (t4t0bbt113 & 0xffff) * (et0aat119 & 0xffff)
2526int32323232 z4x5x4t226 += (t4t0bbt117 & 0xffff) * (et0aat19  & 0xffff)
2527int32323232 z4x5x4t227 += (t4t0bbt117 & 0xffff) * (et0aat110 & 0xffff)
2528int32323232 z4x5x4t228 <<= 1
2529int32323232 z4x5x4t229 += (t4t0bbt112 & 0xffff) * (et0aat117 & 0xffff)
2530int32323232 z4x5x4t230 += (t4t0bbt113 & 0xffff) * (et0aat117 & 0xffff)
2531int32323232 z4x5x4t231 += (t4t0bbt114 & 0xffff) * (et0aat117 & 0xffff)
2532int32323232 z4x5x4t232 += (t4t0bbt114 & 0xffff) * (et0aat118 & 0xffff)
2533int32323232 z4x5x4t226 += (t4t0bbt118 & 0xffff) * (et0aat18  & 0xffff)
2534int32323232 z4x5x4t227 += (t4t0bbt118 & 0xffff) * (et0aat19  & 0xffff)
2535int32323232 z4x5x4t228 += (t4t0bbt112 & 0xffff) * (et0aat116 & 0xffff)
2536int32323232 z4x5x4t229 += (t4t0bbt113 & 0xffff) * (et0aat116 & 0xffff)
2537int32323232 z4x5x4t230 += (t4t0bbt114 & 0xffff) * (et0aat116 & 0xffff)
2538int32323232 z4x5x4t231 += (t4t0bbt115 & 0xffff) * (et0aat116 & 0xffff)
2539int32323232 z4x5x4t232 += (t4t0bbt115 & 0xffff) * (et0aat117 & 0xffff)
2540int32323232 z4x5x4t233  = (t4t0bbt114 & 0xffff) * (et0aat119 & 0xffff)
2541int32323232 z4x5x4t227 += (t4t0bbt119 & 0xffff) * (et0aat18  & 0xffff)
2542int32323232 z4x5x4t228 += (t4t0bbt116 & 0xffff) * (et0aat112 & 0xffff)
2543int32323232 z4x5x4t229 += (t4t0bbt116 & 0xffff) * (et0aat113 & 0xffff)
2544int32323232 z4x5x4t230 += (t4t0bbt116 & 0xffff) * (et0aat114 & 0xffff)
2545int32323232 z4x5x4t231 += (t4t0bbt116 & 0xffff) * (et0aat115 & 0xffff)
2546int32323232 z4x5x4t232 += (t4t0bbt117 & 0xffff) * (et0aat115 & 0xffff)
2547int32323232 z4x5x4t233 += (t4t0bbt115 & 0xffff) * (et0aat118 & 0xffff)
2548int32323232 z4x5x4t234 = (t4t0bbt115 & 0xffff) * (et0aat119 & 0xffff)
2549int32323232 z4x5x4t235 = (t4t0bbt116 & 0xffff) * (et0aat119 & 0xffff)
2550int32323232 z4x5x4t229 += (t4t0bbt117 & 0xffff) * (et0aat112 & 0xffff)
2551int32323232 z4x5x4t230 += (t4t0bbt117 & 0xffff) * (et0aat113 & 0xffff)
2552int32323232 z4x5x4t231 += (t4t0bbt117 & 0xffff) * (et0aat114 & 0xffff)
2553int32323232 z4x5x4t232 += (t4t0bbt118 & 0xffff) * (et0aat114 & 0xffff)
2554int32323232 z4x5x4t233 += (t4t0bbt118 & 0xffff) * (et0aat115 & 0xffff)
2555int32323232 z4x5x4t234 += (t4t0bbt119 & 0xffff) * (et0aat115 & 0xffff)
2556int32323232 z4x5x4t235 += (t4t0bbt117 & 0xffff) * (et0aat118 & 0xffff)
2557int32323232 z4x5x4t236 = (t4t0bbt117 & 0xffff) * (et0aat119 & 0xffff)
2558int32323232 z4x5x4t230 += (t4t0bbt118 & 0xffff) * (et0aat112 & 0xffff)
2559int32323232 z4x5x4t231 += (t4t0bbt118 & 0xffff) * (et0aat113 & 0xffff)
2560int32323232 z4x5x4t232 += (t4t0bbt119 & 0xffff) * (et0aat113 & 0xffff)
2561int32323232 z4x5x4t233 += (t4t0bbt119 & 0xffff) * (et0aat114 & 0xffff)
2562int32323232 z4x5x4t234 <<= 1
2563int32323232 z4x5x4t235 += (t4t0bbt118 & 0xffff) * (et0aat117 & 0xffff)
2564int32323232 z4x5x4t236 += (t4t0bbt118 & 0xffff) * (et0aat118 & 0xffff)
2565int32323232 z4x5x4t237 = (t4t0bbt118 & 0xffff) * (et0aat119 & 0xffff)
2566int32323232 z4x5x4t231 += (t4t0bbt119 & 0xffff) * (et0aat112 & 0xffff)
2567int32323232 z4x5x4t232 <<= 1
2568int32323232 z4x5x4t233 <<= 1
2569int32323232 z4x5x4t234 += (t4t0bbt116 & 0xffff) * (et0aat118 & 0xffff)
2570int32323232 z4x5x4t235 += (t4t0bbt119 & 0xffff) * (et0aat116 & 0xffff)
2571int32323232 z4x5x4t236 += (t4t0bbt119 & 0xffff) * (et0aat117 & 0xffff)
2572int32323232 z4x5x4t237 += (t4t0bbt119 & 0xffff) * (et0aat118 & 0xffff)
2573int32323232 z4x5x4t238 = (t4t0bbt119 & 0xffff) * (et0aat119 & 0xffff)
2574int32323232 z4x5x4t232 += (t4t0bbt116 & 0xffff) * (et0aat116 & 0xffff)
2575int32323232 z4x5x4t233 += (t4t0bbt116 & 0xffff) * (et0aat117 & 0xffff)
2576int32323232 z4x5x4t234 += (t4t0bbt117 & 0xffff) * (et0aat117 & 0xffff)
2577int32323232 z4x5x4t236 <<= 1
2578int32323232 z4x5x4t237 <<= 1
2579int32323232 z4x5x4t238 <<= 1
2580int32323232 z4x5x4t233 += (t4t0bbt117 & 0xffff) * (et0aat116 & 0xffff)
2581int32323232 z4x5x4t234 += (t4t0bbt118 & 0xffff) * (et0aat116 & 0xffff)
2582
2583
2584## Reduction
2585
2586uint32323232 carry0 = z4x5x4t220 >> 13
2587uint32323232 carry1 = z4x5x4t224 >> 13
2588uint32323232 carry2 = z4x5x4t228 >> 13
2589uint32323232 carry3 = z4x5x4t232 >> 13
2590
2591int32323232 z4x5x4t221 += carry0
2592z4x5x4t220 &= mask13
2593int32323232 z4x5x4t225 += carry1
2594z4x5x4t224 &= mask13
2595int32323232 z4x5x4t229 += carry2
2596z4x5x4t228 &= mask13
2597int32323232 z4x5x4t233 += carry3
2598z4x5x4t232 &= mask13
2599
2600uint32323232 carry0 = z4x5x4t221 >> 13
2601uint32323232 carry1 = z4x5x4t225 >> 13
2602uint32323232 carry2 = z4x5x4t229 >> 13
2603uint32323232 carry3 = z4x5x4t233 >> 13
2604
2605int32323232 z4x5x4t222 += carry0
2606z4x5x4t221 &= mask13
2607int32323232 z4x5x4t226 += carry1
2608z4x5x4t225 &= mask13
2609int32323232 z4x5x4t230 += carry2
2610z4x5x4t229 &= mask13
2611int32323232 z4x5x4t234 += carry3
2612z4x5x4t233 &= mask13
2613
2614uint32323232 carry0 = z4x5x4t222 >> 13
2615uint32323232 carry1 = z4x5x4t226 >> 13
2616uint32323232 carry2 = z4x5x4t230 >> 13
2617uint32323232 carry3 = z4x5x4t234 >> 13
2618
2619int32323232 z4x5x4t223 += carry0
2620z4x5x4t222 &= mask13
2621int32323232 z4x5x4t227 += carry1
2622z4x5x4t226 &= mask13
2623int32323232 z4x5x4t231 += carry2
2624z4x5x4t230 &= mask13
2625int32323232 z4x5x4t235 += carry3
2626z4x5x4t234 &= mask13
2627
2628uint32323232 carry0 = z4x5x4t223 >> 12
2629uint32323232 carry1 = z4x5x4t227 >> 12
2630uint32323232 carry2 = z4x5x4t231 >> 12
2631uint32323232 carry3 = z4x5x4t235 >> 12
2632
2633int32323232 z4x5x4t224 += carry0
2634z4x5x4t223 &= mask12
2635int32323232 z4x5x4t228 += carry1
2636z4x5x4t227 &= mask12
2637int32323232 z4x5x4t232 += carry2
2638z4x5x4t231 &= mask12
2639int32323232 z4x5x4t236 += carry3
2640z4x5x4t235 &= mask12
2641
2642uint32323232 carry1 = z4x5x4t224 >> 13
2643uint32323232 carry2 = z4x5x4t228 >> 13
2644uint32323232 carry3 = z4x5x4t232 >> 13
2645uint32323232 carry4 = z4x5x4t236 >> 13
2646
2647int32323232 z4x5x4t225 += carry1
2648z4x5x4t224 &= mask13
2649int32323232 z4x5x4t229 += carry2
2650z4x5x4t228 &= mask13
2651int32323232 z4x5x4t233 += carry3
2652z4x5x4t232 &= mask13
2653int32323232 z4x5x4t237 += carry4
2654z4x5x4t236 &= mask13
2655
2656uint32323232 carry1 = z4x5x4t225 >> 13
2657uint32323232 carry2 = z4x5x4t229 >> 13
2658uint32323232 carry3 = z4x5x4t233 >> 13
2659uint32323232 carry4 = z4x5x4t237 >> 13
2660
2661int32323232 z4x5x4t226 += carry1
2662z4x5x4t225 &= mask13
2663int32323232 z4x5x4t230 += carry2
2664z4x5x4t229 &= mask13
2665int32323232 z4x5x4t234 += carry3
2666z4x5x4t233 &= mask13
2667int32323232 z4x5x4t238 += carry4
2668z4x5x4t237 &= mask13
2669
2670uint32323232 carry1 = z4x5x4t226 >> 13
2671uint32323232 carry2 = z4x5x4t230 >> 13
2672uint32323232 carry3 = z4x5x4t234 >> 13
2673uint32323232 z4x5x4t239 = z4x5x4t238 >> 13
2674
2675int32323232 z4x5x4t227 += carry1
2676z4x5x4t226 &= mask13
2677int32323232 z4x5x4t231 += carry2
2678z4x5x4t230 &= mask13
2679int32323232 z4x5x4t235 += carry3
2680z4x5x4t234 &= mask13
2681
2682
2683uint32323232 carry1 = z4x5x4t227 >> 12
2684z4x5x4t238 &= mask13
2685uint32323232 carry2 = z4x5x4t231 >> 12
2686uint32323232 carry3 = z4x5x4t235 >> 12
2687
2688int32323232 z4x5x4t228 += carry1
2689z4x5x4t227 &= mask12
2690int32323232 z4x5x4t232 += carry2
2691z4x5x4t231 &= mask12
2692int32323232 z4x5x4t236 += carry3
2693z4x5x4t235 &= mask12
2694
2695int32323232 z4x5x4t20  += (z4x5x4t220 & 0xffff) * (vec19 & 0xffff)
2696int32323232 z4x5x4t21  += (z4x5x4t221 & 0xffff) * (vec19 & 0xffff)
2697int32323232 z4x5x4t22  += (z4x5x4t222 & 0xffff) * (vec19 & 0xffff)
2698int32323232 z4x5x4t23  += (z4x5x4t223 & 0xffff) * (vec19 & 0xffff)
2699int32323232 z4x5x4t24  += (z4x5x4t224 & 0xffff) * (vec19 & 0xffff)
2700int32323232 z4x5x4t25  += (z4x5x4t225 & 0xffff) * (vec19 & 0xffff)
2701int32323232 z4x5x4t26  += (z4x5x4t226 & 0xffff) * (vec19 & 0xffff)
2702int32323232 z4x5x4t27  += (z4x5x4t227 & 0xffff) * (vec19 & 0xffff)
2703int32323232 z4x5x4t28  += (z4x5x4t228 & 0xffff) * (vec19 & 0xffff)
2704int32323232 z4x5x4t29  += (z4x5x4t229 & 0xffff) * (vec19 & 0xffff)
2705int32323232 z4x5x4t210 += (z4x5x4t230 & 0xffff) * (vec19 & 0xffff)
2706int32323232 z4x5x4t211 += (z4x5x4t231 & 0xffff) * (vec19 & 0xffff)
2707int32323232 z4x5x4t212 += (z4x5x4t232 & 0xffff) * (vec19 & 0xffff)
2708int32323232 z4x5x4t213 += (z4x5x4t233 & 0xffff) * (vec19 & 0xffff)
2709int32323232 z4x5x4t214 += (z4x5x4t234 & 0xffff) * (vec19 & 0xffff)
2710int32323232 z4x5x4t215 += (z4x5x4t235 & 0xffff) * (vec19 & 0xffff)
2711int32323232 z4x5x4t216 += (z4x5x4t236 & 0xffff) * (vec19 & 0xffff)
2712int32323232 z4x5x4t217 += (z4x5x4t237 & 0xffff) * (vec19 & 0xffff)
2713int32323232 z4x5x4t218 += (z4x5x4t238 & 0xffff) * (vec19 & 0xffff)
2714int32323232 z4x5x4t219 += (z4x5x4t239 & 0xffff) * (vec19 & 0xffff)
2715
2716uint32323232 carry = z4x5x4t216 >> 13
2717int32323232 z4x5x4t217 += carry
2718uint32323232 carry = z4x5x4t217 >> 13
2719int32323232 z4x5x4t218 += carry
2720uint32323232 carry = z4x5x4t218 >> 13
2721int32323232 z4x5x4t219 += carry
2722uint32323232 carry = z4x5x4t219 >> 12
2723#Multiply carry by 19
2724int32323232 red = carry << 4
2725int32323232 red += carry
2726int32323232 red += carry
2727int32323232 red += carry
2728
2729int32323232 z4x5x4t20 += red
2730z4x5x4t219 &= mask12
2731
2732z4x5x4t216 &= mask13
2733z4x5x4t217 &= mask13
2734z4x5x4t218 &= mask13
2735z4x5x4t219 &= mask12
2736
2737uint32323232 carry0 = z4x5x4t20  >> 13
2738uint32323232 carry1 = z4x5x4t24  >> 13
2739uint32323232 carry2 = z4x5x4t28  >> 13
2740uint32323232 carry3 = z4x5x4t212 >> 13
2741
2742int32323232 z4x5x4t21  += carry0
2743z4x5x4t20  &= mask13
2744int32323232 z4x5x4t25  += carry1
2745z4x5x4t24  &= mask13
2746int32323232 z4x5x4t29  += carry2
2747z4x5x4t28  &= mask13
2748int32323232 z4x5x4t213 += carry3
2749z4x5x4t212 &= mask13
2750
2751uint32323232 carry0 = z4x5x4t21  >> 13
2752uint32323232 carry1 = z4x5x4t25  >> 13
2753uint32323232 carry2 = z4x5x4t29  >> 13
2754uint32323232 carry3 = z4x5x4t213 >> 13
2755
2756int32323232 z4x5x4t22  += carry0
2757z4x5x4t21  &= mask13
2758int32323232 z4x5x4t26  += carry1
2759z4x5x4t25  &= mask13
2760int32323232 z4x5x4t210 += carry2
2761z4x5x4t29  &= mask13
2762int32323232 z4x5x4t214 += carry3
2763z4x5x4t213 &= mask13
2764
2765uint32323232 carry0 = z4x5x4t22  >> 13
2766uint32323232 carry1 = z4x5x4t26  >> 13
2767uint32323232 carry2 = z4x5x4t210 >> 13
2768uint32323232 carry3 = z4x5x4t214 >> 13
2769
2770int32323232 z4x5x4t23  += carry0
2771z4x5x4t22  &= mask13
2772int32323232 z4x5x4t27  += carry1
2773z4x5x4t26  &= mask13
2774int32323232 z4x5x4t211 += carry2
2775z4x5x4t210 &= mask13
2776int32323232 z4x5x4t215 += carry3
2777z4x5x4t214 &= mask13
2778
2779uint32323232 carry0 = z4x5x4t23  >> 12
2780uint32323232 carry1 = z4x5x4t27  >> 12
2781uint32323232 carry2 = z4x5x4t211 >> 12
2782uint32323232 carry3 = z4x5x4t215 >> 12
2783
2784z4x5x4t23  &= mask12
2785z4x5x4t27  &= mask12
2786z4x5x4t211 &= mask12
2787t20 = select bytes from z4x5x4t20 by selw3333
2788z4x5x4t215 &= mask12
2789t21 = select bytes from z4x5x4t21 by selw3333
2790
2791int32323232 z4x5x4t24  += carry0
2792t22 = select bytes from z4x5x4t22 by selw3333
2793int32323232 z4x5x4t28  += carry1
2794t23 = select bytes from z4x5x4t23 by selw3333
2795int32323232 z4x5x4t212 += carry2
2796int32323232 z4x5x4t216 += carry3
2797
2798uint32323232 carry1 = z4x5x4t24  >> 13
2799uint32323232 carry2 = z4x5x4t28  >> 13
2800uint32323232 carry3 = z4x5x4t212 >> 13
2801uint32323232 carry4 = z4x5x4t216 >> 13
2802
2803z4x5x4t24  &= mask13
2804z4x5x4t28  &= mask13
2805z4x5x4t212 &= mask13
2806t24 = select bytes from z4x5x4t24 by selw3333
2807z4x5x4t216 &= mask13
2808t28 = select bytes from z4x5x4t28 by selw3333
2809
2810int32323232 z4x5x4t25  += carry1
2811t212 = select bytes from z4x5x4t212 by selw3333
2812int32323232 z4x5x4t29  += carry2
2813t216 = select bytes from z4x5x4t216 by selw3333
2814int32323232 z4x5x4t213 += carry3
2815int32323232 z4x5x4t217 += carry4
2816
2817uint32323232 carry1 = z4x5x4t25  >> 13
2818uint32323232 carry2 = z4x5x4t29  >> 13
2819uint32323232 carry3 = z4x5x4t213 >> 13
2820uint32323232 carry4 = z4x5x4t217 >> 13
2821
2822z4x5x4t25  &= mask13
2823z4x5x4t29  &= mask13
2824z4x5x4t213 &= mask13
2825t25 = select bytes from z4x5x4t25 by selw3333
2826z4x5x4t217 &= mask13
2827t29 = select bytes from z4x5x4t29 by selw3333
2828
2829int32323232 z4x5x4t26  += carry1
2830t213 = select bytes from z4x5x4t213 by selw3333
2831int32323232 z4x5x4t210 += carry2
2832t217 = select bytes from z4x5x4t217 by selw3333
2833int32323232 z4x5x4t214 += carry3
2834int32323232 z4x5x4t218 += carry4
2835
2836uint32323232 carry1 = z4x5x4t26  >> 13
2837uint32323232 carry2 = z4x5x4t210 >> 13
2838uint32323232 carry3 = z4x5x4t214 >> 13
2839uint32323232 carry4 = z4x5x4t218  >> 13
2840
2841z4x5x4t26  &= mask13
2842z4x5x4t210 &= mask13
2843z4x5x4t214 &= mask13
2844t26 = select bytes from z4x5x4t26 by selw3333
2845z4x5x4t218  &= mask13
2846t210 = select bytes from z4x5x4t210 by selw3333
2847
2848int32323232 z4x5x4t27  += carry1
2849t214 = select bytes from z4x5x4t214 by selw3333
2850int32323232 z4x5x4t211 += carry2
2851t218 = select bytes from z4x5x4t218 by selw3333
2852int32323232 z4x5x4t215 += carry3
2853int32323232 z4x5x4t219 += carry4
2854
2855
2856###############################################################
2857#		Multiplications by t20..t23	      ###
2858###############################################################
2859
2860int32323232 tmp10 = (t21 & 0xffff) * (x1_03 & 0xffff)
2861t27 = select bytes from z4x5x4t27 by selw3333
2862int32323232 tmp11 = (t21 & 0xffff) * (x1_47 & 0xffff)
2863t211 = select bytes from z4x5x4t211 by selw3333
2864int32323232 tmp12 = (t21 & 0xffff) * (x1_811 & 0xffff)
2865t215 = select bytes from z4x5x4t215 by selw3333
2866int32323232 tmp13 = (t21 & 0xffff) * (x1_1215 & 0xffff)
2867t219 = select bytes from z4x5x4t219 by selw3333
2868int32323232 tmp14 = (t21 & 0xffff) * (x1_1619 & 0xffff)
2869x2_03 = combine z4x5x4t20 and z4x5x4t21 by shuf2_01
2870
2871
2872int32323232 z3_03 = (t20 & 0xffff) * (x1_03 & 0xffff)
2873x2_47 = combine z4x5x4t24 and z4x5x4t25 by shuf2_01
2874int32323232 z3_47 = (t20 & 0xffff) * (x1_47 & 0xffff)
2875x2_811 = combine z4x5x4t28 and z4x5x4t29 by shuf2_01
2876int32323232 z3_811 = (t20 & 0xffff) * (x1_811 & 0xffff)
2877x2_1215 = combine z4x5x4t212 and z4x5x4t213 by shuf2_01
2878int32323232 z3_1215 = (t20 & 0xffff) * (x1_1215 & 0xffff)
2879x2_1619 = combine z4x5x4t216 and z4x5x4t217 by shuf2_01
2880int32323232 z3_1619 = (t20 & 0xffff) * (x1_1619 & 0xffff)
2881
2882tmp10b = tmp10 >> (8 * 4)
2883int32323232 tmp10a = tmp10 << 1
2884x2_03 = combine x2_03 and z4x5x4t22 by shuf2_2
2885int32323232 tmp11a = tmp11 << 1
2886x2_47 = combine x2_47 and z4x5x4t26 by shuf2_2
2887int32323232 tmp12a = tmp12 << 1
2888x2_811 = combine x2_811 and z4x5x4t210 by shuf2_2
2889int32323232 tmp13a = tmp13 << 1
2890tmp14a = tmp14 << (8 * 12)
2891int32323232 tmp20 = (t22 & 0xffff) * (x1_03 & 0xffff)
2892tmp11b = combine tmp10a and tmp11 by comb13
2893int32323232 tmp21 = (t22 & 0xffff) * (x1_47 & 0xffff)
2894tmp12b = combine tmp11a and tmp12 by comb13
2895int32323232 tmp22 = (t22 & 0xffff) * (x1_811 & 0xffff)
2896tmp13b = combine tmp12a and tmp13 by comb13
2897int32323232 tmp23 = (t22 & 0xffff) * (x1_1215 & 0xffff)
2898tmp14b = combine tmp13a and tmp14 by comb13
2899int32323232 tmp24 = (t22 & 0xffff) * (x1_1619 & 0xffff)
2900x2_1215 = combine x2_1215 and z4x5x4t214 by shuf2_2
2901int32323232 z3_03 += tmp10b
2902x2_1619 = combine x2_1619 and z4x5x4t218 by shuf2_2
2903int32323232 z3_47 += tmp11b
2904x2_03 = combine x2_03 and z4x5x4t23 by shuf2_3
2905int32323232 z3_811 += tmp12b
2906x2_47 = combine x2_47 and z4x5x4t27 by shuf2_3
2907int32323232 z3_1215 += tmp13b
2908x2_811 = combine x2_811 and z4x5x4t211 by shuf2_3
2909int32323232 z3_1619 += tmp14b
2910x2_1215 = combine x2_1215 and z4x5x4t215 by shuf2_3
2911int32323232 z3_2023 = tmp14a << 1
2912
2913tmp20b = tmp20 >> (8 * 8)
2914int32323232 tmp20a = tmp20 << 1
2915x2_1619 = combine x2_1619 and z4x5x4t219 by shuf2_3
2916int32323232 tmp21a = tmp21 << 1
2917z2_03 = combine z4x5x4t20 and z4x5x4t21 by shuf0_01
2918int32323232 tmp22a = tmp22 << 1
2919tmp24a = tmp24 << (8 * 8)
2920int32323232 tmp23a = tmp23 << 1
2921z2_47 = combine z4x5x4t24 and z4x5x4t25 by shuf0_01
2922int32323232 tmp30 = (t23 & 0xffff) * (x1_03 & 0xffff)
2923z2_811 = combine z4x5x4t28 and z4x5x4t29 by shuf0_01
2924int32323232 tmp31 = (t23 & 0xffff) * (x1_47 & 0xffff)
2925tmp21b = combine tmp20a and tmp21 by comb22
2926int32323232 tmp32 = (t23 & 0xffff) * (x1_811 & 0xffff)
2927tmp22b = combine tmp21a and tmp22 by comb22
2928int32323232 tmp33 = (t23 & 0xffff) * (x1_1215 & 0xffff)
2929tmp23b = combine tmp22a and tmp23 by comb22
2930int32323232 tmp34 = (t23 & 0xffff) * (x1_1619 & 0xffff)
2931tmp24b = combine tmp23a and tmp24 by comb22
2932int32323232 tmp24a <<= 1
2933z2_1215 = combine z4x5x4t212 and z4x5x4t213 by shuf0_01
2934int32323232 z3_03 += tmp20b
2935z2_1619 = combine z4x5x4t216 and z4x5x4t217 by shuf0_01
2936int32323232 z3_47 += tmp21b
2937z2_03 = combine z2_03 and z4x5x4t22 by shuf0_2
2938int32323232 z3_811 += tmp22b
2939z2_47 = combine z2_47 and z4x5x4t26 by shuf0_2
2940int32323232 z3_1215 += tmp23b
2941z2_811 = combine z2_811 and z4x5x4t210 by shuf0_2
2942int32323232 z3_1619 += tmp24b
2943z2_1215 = combine z2_1215 and z4x5x4t214 by shuf0_2
2944int32323232 z3_2023 += tmp24a
2945
2946tmp30b = tmp30 >> (8 * 12)
2947int32323232 tmp30a = tmp30 << 1
2948z2_1619 = combine z2_1619 and z4x5x4t218 by shuf0_2
2949int32323232 tmp31a = tmp31 << 1
2950z2_03 = combine z2_03 and z4x5x4t23 by shuf0_3
2951int32323232 tmp32a = tmp32 << 1
2952tmp34a = tmp34 << (8 * 4)
2953int32323232 tmp33a = tmp33 << 1
2954z2_47 = combine z2_47 and z4x5x4t27 by shuf0_3
2955int32323232 tmp00 = (t24 & 0xffff) * (x1_03 & 0xffff)
2956z2_811 = combine z2_811 and z4x5x4t211 by shuf0_3
2957int32323232 tmp01 = (t24 & 0xffff) * (x1_47 & 0xffff)
2958tmp31b = combine tmp30a and tmp31 by comb31
2959int32323232 tmp02 = (t24 & 0xffff) * (x1_811 & 0xffff)
2960tmp32b = combine tmp31a and tmp32 by comb31
2961int32323232 tmp03 = (t24 & 0xffff) * (x1_1215 & 0xffff)
2962tmp33b = combine tmp32a and tmp33 by comb31
2963int32323232 tmp04 = (t24 & 0xffff) * (x1_1619 & 0xffff)
2964tmp34b = combine tmp33a and tmp34 by comb31
2965int32323232 tmp34a <<= 1
2966z2_1215 = combine z2_1215 and z4x5x4t215 by shuf0_3
2967int32323232 z3_03 += tmp30b
2968z2_1619 = combine z2_1619 and z4x5x4t219 by shuf0_3
2969int32323232 z3_47 += tmp31b
2970x3_03 = combine z4x5x4t20 and z4x5x4t21 by shuf1_01
2971
2972int32323232 z3_811 += tmp32b
2973x3_47 = combine z4x5x4t24 and z4x5x4t25 by shuf1_01
2974int32323232 z3_1215 += tmp33b
2975x3_811 = combine z4x5x4t28 and z4x5x4t29 by shuf1_01
2976int32323232 z3_1619 += tmp34b
2977x3_1215 = combine z4x5x4t212 and z4x5x4t213 by shuf1_01
2978int32323232 z3_2023 += tmp34a
2979x3_1619 = combine z4x5x4t216 and z4x5x4t217 by shuf1_01
2980
2981#################################################################
2982###		Multiplications by t24..t27	      ###
2983#################################################################
2984
2985int32323232 tmp10 = (t25 & 0xffff) * (x1_03 & 0xffff)
2986x3_03 = combine x3_03 and z4x5x4t22 by shuf1_2
2987int32323232 tmp11 = (t25 & 0xffff) * (x1_47 & 0xffff)
2988x3_47 = combine x3_47 and z4x5x4t26 by shuf1_2
2989int32323232 tmp12 = (t25 & 0xffff) * (x1_811 & 0xffff)
2990x3_811 = combine x3_811 and z4x5x4t210 by shuf1_2
2991int32323232 tmp13 = (t25 & 0xffff) * (x1_1215 & 0xffff)
2992x3_1215 = combine x3_1215 and z4x5x4t214 by shuf1_2
2993int32323232 tmp14 = (t25 & 0xffff) * (x1_1619 & 0xffff)
2994x3_1619 = combine x3_1619 and z4x5x4t218 by shuf1_2
2995
2996int32323232 z3_47 += tmp00
2997x3_03 = combine x3_03 and z4x5x4t23 by shuf1_3
2998int32323232 z3_811 += tmp01
2999x3_47 = combine x3_47 and z4x5x4t27 by shuf1_3
3000int32323232 z3_1215 += tmp02
3001x3_811 = combine x3_811 and z4x5x4t211 by shuf1_3
3002int32323232 z3_1619 += tmp03
3003tmp10b = tmp10 >> (8 * 4)
3004int32323232 z3_2023 += tmp04
3005x3_1215 = combine x3_1215 and z4x5x4t215 by shuf1_3
3006
3007int32323232 tmp10a = tmp10 << 1
3008x3_1619 = combine x3_1619 and z4x5x4t219 by shuf1_3
3009int32323232 tmp11a = tmp11 << 1
3010int32323232 tmp12a = tmp12 << 1
3011int32323232 tmp13a = tmp13 << 1
3012tmp14a = tmp14 << (8 * 12)
3013int32323232 tmp20 = (t26 & 0xffff) * (x1_03 & 0xffff)
3014tmp11b = combine tmp10a and tmp11 by comb13
3015int32323232 tmp21 = (t26 & 0xffff) * (x1_47 & 0xffff)
3016tmp12b = combine tmp11a and tmp12 by comb13
3017int32323232 tmp22 = (t26 & 0xffff) * (x1_811 & 0xffff)
3018tmp13b = combine tmp12a and tmp13 by comb13
3019int32323232 tmp23 = (t26 & 0xffff) * (x1_1215 & 0xffff)
3020tmp14b = combine tmp13a and tmp14 by comb13
3021int32323232 tmp24 = (t26 & 0xffff) * (x1_1619 & 0xffff)
3022int32323232 z3_47 += tmp10b
3023int32323232 z3_811 += tmp11b
3024int32323232 z3_1215 += tmp12b
3025int32323232 z3_1619 += tmp13b
3026int32323232 z3_2023 += tmp14b
3027int32323232 z3_2427 = tmp14a << 1
3028
3029tmp20b = tmp20 >> (8 * 8)
3030int32323232 tmp20a = tmp20 << 1
3031int32323232 tmp21a = tmp21 << 1
3032int32323232 tmp22a = tmp22 << 1
3033tmp24a = tmp24 << (8 * 8)
3034int32323232 tmp23a = tmp23 << 1
3035int32323232 tmp30 = (t27 & 0xffff) * (x1_03 & 0xffff)
3036int32323232 tmp31 = (t27 & 0xffff) * (x1_47 & 0xffff)
3037tmp21b = combine tmp20a and tmp21 by comb22
3038int32323232 tmp32 = (t27 & 0xffff) * (x1_811 & 0xffff)
3039tmp22b = combine tmp21a and tmp22 by comb22
3040int32323232 tmp33 = (t27 & 0xffff) * (x1_1215 & 0xffff)
3041tmp23b = combine tmp22a and tmp23 by comb22
3042int32323232 tmp34 = (t27 & 0xffff) * (x1_1619 & 0xffff)
3043tmp24b = combine tmp23a and tmp24 by comb22
3044int32323232 tmp24a <<= 1
3045int32323232 z3_47 += tmp20b
3046int32323232 z3_811 += tmp21b
3047int32323232 z3_1215 += tmp22b
3048int32323232 z3_1619 += tmp23b
3049int32323232 z3_2023 += tmp24b
3050int32323232 z3_2427 += tmp24a
3051
3052tmp30b = tmp30 >> (8 * 12)
3053int32323232 tmp30a = tmp30 << 1
3054int32323232 tmp31a = tmp31 << 1
3055int32323232 tmp32a = tmp32 << 1
3056tmp34a = tmp34 << (8 * 4)
3057int32323232 tmp33a = tmp33 << 1
3058int32323232 tmp00 = (t28 & 0xffff) * (x1_03 & 0xffff)
3059int32323232 tmp01 = (t28 & 0xffff) * (x1_47 & 0xffff)
3060tmp31b = combine tmp30a and tmp31 by comb31
3061int32323232 tmp02 = (t28 & 0xffff) * (x1_811 & 0xffff)
3062tmp32b = combine tmp31a and tmp32 by comb31
3063int32323232 tmp03 = (t28 & 0xffff) * (x1_1215 & 0xffff)
3064tmp33b = combine tmp32a and tmp33 by comb31
3065int32323232 tmp04 = (t28 & 0xffff) * (x1_1619 & 0xffff)
3066tmp34b = combine tmp33a and tmp34 by comb31
3067int32323232 tmp34a <<= 1
3068int32323232 z3_47 += tmp30b
3069int32323232 z3_811 += tmp31b
3070int32323232 z3_1215 += tmp32b
3071int32323232 z3_1619 += tmp33b
3072int32323232 z3_2023 += tmp34b
3073int32323232 z3_2427 += tmp34a
3074
3075#################################################################
3076###		Multiplications by t28..t211	      ###
3077#################################################################
3078
3079int32323232 tmp10 = (t29 & 0xffff) * (x1_03 & 0xffff)
3080int32323232 tmp11 = (t29 & 0xffff) * (x1_47 & 0xffff)
3081int32323232 tmp12 = (t29 & 0xffff) * (x1_811 & 0xffff)
3082int32323232 tmp13 = (t29 & 0xffff) * (x1_1215 & 0xffff)
3083int32323232 tmp14 = (t29 & 0xffff) * (x1_1619 & 0xffff)
3084
3085int32323232 z3_811 += tmp00
3086int32323232 z3_1215 += tmp01
3087int32323232 z3_1619 += tmp02
3088int32323232 z3_2023 += tmp03
3089int32323232 z3_2427 += tmp04
3090
3091tmp10b = tmp10 >> (8 * 4)
3092int32323232 tmp10a = tmp10 << 1
3093int32323232 tmp11a = tmp11 << 1
3094int32323232 tmp12a = tmp12 << 1
3095tmp14a = tmp14 << (8 * 12)
3096int32323232 tmp13a = tmp13 << 1
3097int32323232 tmp20 = (t210 & 0xffff) * (x1_03 & 0xffff)
3098int32323232 tmp21 = (t210 & 0xffff) * (x1_47 & 0xffff)
3099tmp11b = combine tmp10a and tmp11 by comb13
3100int32323232 tmp22 = (t210 & 0xffff) * (x1_811 & 0xffff)
3101tmp12b = combine tmp11a and tmp12 by comb13
3102int32323232 tmp23 = (t210 & 0xffff) * (x1_1215 & 0xffff)
3103tmp13b = combine tmp12a and tmp13 by comb13
3104int32323232 tmp24 = (t210 & 0xffff) * (x1_1619 & 0xffff)
3105tmp14b = combine tmp13a and tmp14 by comb13
3106int32323232 z3_811 += tmp10b
3107int32323232 z3_1215 += tmp11b
3108int32323232 z3_1619 += tmp12b
3109int32323232 z3_2023 += tmp13b
3110int32323232 z3_2427 += tmp14b
3111tmp20b = tmp20 >> (8 * 8)
3112int32323232 z3_2831 = tmp14a << 1
3113
3114int32323232 tmp20a = tmp20 << 1
3115int32323232 tmp21a = tmp21 << 1
3116int32323232 tmp22a = tmp22 << 1
3117int32323232 tmp23a = tmp23 << 1
3118tmp24a = tmp24 << (8 * 8)
3119int32323232 tmp30 = (t211 & 0xffff) * (x1_03 & 0xffff)
3120tmp21b = combine tmp20a and tmp21 by comb22
3121int32323232 tmp31 = (t211 & 0xffff) * (x1_47 & 0xffff)
3122tmp22b = combine tmp21a and tmp22 by comb22
3123int32323232 tmp32 = (t211 & 0xffff) * (x1_811 & 0xffff)
3124tmp23b = combine tmp22a and tmp23 by comb22
3125int32323232 tmp33 = (t211 & 0xffff) * (x1_1215 & 0xffff)
3126tmp24b = combine tmp23a and tmp24 by comb22
3127int32323232 tmp34 = (t211 & 0xffff) * (x1_1619 & 0xffff)
3128int32323232 tmp24a <<= 1
3129int32323232 z3_811 += tmp20b
3130int32323232 z3_1215 += tmp21b
3131int32323232 z3_1619 += tmp22b
3132int32323232 z3_2023 += tmp23b
3133int32323232 z3_2427 += tmp24b
3134tmp30b = tmp30 >> (8 * 12)
3135int32323232 z3_2831 += tmp24a
3136
3137int32323232 tmp30a = tmp30 << 1
3138int32323232 tmp31a = tmp31 << 1
3139int32323232 tmp32a = tmp32 << 1
3140int32323232 tmp33a = tmp33 << 1
3141tmp34a = tmp34 << (8 * 4)
3142int32323232 tmp00 = (t212 & 0xffff) * (x1_03 & 0xffff)
3143tmp31b = combine tmp30a and tmp31 by comb31
3144int32323232 tmp01 = (t212 & 0xffff) * (x1_47 & 0xffff)
3145tmp32b = combine tmp31a and tmp32 by comb31
3146int32323232 tmp02 = (t212 & 0xffff) * (x1_811 & 0xffff)
3147tmp33b = combine tmp32a and tmp33 by comb31
3148int32323232 tmp03 = (t212 & 0xffff) * (x1_1215 & 0xffff)
3149tmp34b = combine tmp33a and tmp34 by comb31
3150int32323232 tmp04 = (t212 & 0xffff) * (x1_1619 & 0xffff)
3151int32323232 tmp34a <<= 1
3152int32323232 z3_811 += tmp30b
3153int32323232 z3_1215 += tmp31b
3154int32323232 z3_1619 += tmp32b
3155int32323232 z3_2023 += tmp33b
3156int32323232 z3_2427 += tmp34b
3157int32323232 z3_2831 += tmp34a
3158
3159#################################################################
3160###		Multiplications by t212..t215	      ###
3161#################################################################
3162
3163int32323232 tmp10 = (t213 & 0xffff) * (x1_03 & 0xffff)
3164int32323232 tmp11 = (t213 & 0xffff) * (x1_47 & 0xffff)
3165int32323232 tmp12 = (t213 & 0xffff) * (x1_811 & 0xffff)
3166int32323232 tmp13 = (t213 & 0xffff) * (x1_1215 & 0xffff)
3167int32323232 tmp14 = (t213 & 0xffff) * (x1_1619 & 0xffff)
3168
3169int32323232 z3_1215 += tmp00
3170int32323232 z3_1619 += tmp01
3171int32323232 z3_2023 += tmp02
3172int32323232 z3_2427 += tmp03
3173tmp10b = tmp10 >> (8 * 4)
3174int32323232 z3_2831 += tmp04
3175
3176int32323232 tmp10a = tmp10 << 1
3177int32323232 tmp11a = tmp11 << 1
3178int32323232 tmp12a = tmp12 << 1
3179int32323232 tmp13a = tmp13 << 1
3180tmp14a = tmp14 << (8 * 12)
3181int32323232 tmp20 = (t214 & 0xffff) * (x1_03 & 0xffff)
3182tmp11b = combine tmp10a and tmp11 by comb13
3183int32323232 tmp21 = (t214 & 0xffff) * (x1_47 & 0xffff)
3184tmp12b = combine tmp11a and tmp12 by comb13
3185int32323232 tmp22 = (t214 & 0xffff) * (x1_811 & 0xffff)
3186tmp13b = combine tmp12a and tmp13 by comb13
3187int32323232 tmp23 = (t214 & 0xffff) * (x1_1215 & 0xffff)
3188tmp14b = combine tmp13a and tmp14 by comb13
3189int32323232 tmp24 = (t214 & 0xffff) * (x1_1619 & 0xffff)
3190int32323232 z3_1215 += tmp10b
3191int32323232 z3_1619 += tmp11b
3192int32323232 z3_2023 += tmp12b
3193int32323232 z3_2427 += tmp13b
3194int32323232 z3_2831 += tmp14b
3195int32323232 z3_3235 = tmp14a << 1
3196
3197tmp20b = tmp20 >> (8 * 8)
3198int32323232 tmp20a = tmp20 << 1
3199int32323232 tmp21a = tmp21 << 1
3200int32323232 tmp22a = tmp22 << 1
3201tmp24a = tmp24 << (8 * 8)
3202int32323232 tmp23a = tmp23 << 1
3203int32323232 tmp30 = (t215 & 0xffff) * (x1_03 & 0xffff)
3204int32323232 tmp31 = (t215 & 0xffff) * (x1_47 & 0xffff)
3205tmp21b = combine tmp20a and tmp21 by comb22
3206int32323232 tmp32 = (t215 & 0xffff) * (x1_811 & 0xffff)
3207tmp22b = combine tmp21a and tmp22 by comb22
3208int32323232 tmp33 = (t215 & 0xffff) * (x1_1215 & 0xffff)
3209tmp23b = combine tmp22a and tmp23 by comb22
3210int32323232 tmp34 = (t215 & 0xffff) * (x1_1619 & 0xffff)
3211tmp24b = combine tmp23a and tmp24 by comb22
3212int32323232 tmp24a <<= 1
3213int32323232 z3_1215 += tmp20b
3214int32323232 z3_1619 += tmp21b
3215int32323232 z3_2023 += tmp22b
3216int32323232 z3_2427 += tmp23b
3217int32323232 z3_2831 += tmp24b
3218int32323232 z3_3235 += tmp24a
3219
3220tmp30b = tmp30 >> (8 * 12)
3221int32323232 tmp30a = tmp30 << 1
3222int32323232 tmp31a = tmp31 << 1
3223int32323232 tmp32a = tmp32 << 1
3224tmp34a = tmp34 << (8 * 4)
3225int32323232 tmp33a = tmp33 << 1
3226int32323232 tmp00 = (t216 & 0xffff) * (x1_03 & 0xffff)
3227int32323232 tmp01 = (t216 & 0xffff) * (x1_47 & 0xffff)
3228tmp31b = combine tmp30a and tmp31 by comb31
3229int32323232 tmp02 = (t216 & 0xffff) * (x1_811 & 0xffff)
3230tmp32b = combine tmp31a and tmp32 by comb31
3231int32323232 tmp03 = (t216 & 0xffff) * (x1_1215 & 0xffff)
3232tmp33b = combine tmp32a and tmp33 by comb31
3233int32323232 tmp04 = (t216 & 0xffff) * (x1_1619 & 0xffff)
3234tmp34b = combine tmp33a and tmp34 by comb31
3235int32323232 tmp34a <<= 1
3236int32323232 z3_1215 += tmp30b
3237int32323232 z3_1619 += tmp31b
3238int32323232 z3_2023 += tmp32b
3239int32323232 z3_2427 += tmp33b
3240int32323232 z3_2831 += tmp34b
3241int32323232 z3_3235 += tmp34a
3242
3243#################################################################
3244###		Multiplications by t216..t219	      ###
3245#################################################################
3246
3247int32323232 tmp10 = (t217 & 0xffff) * (x1_03 & 0xffff)
3248int32323232 tmp11 = (t217 & 0xffff) * (x1_47 & 0xffff)
3249int32323232 tmp12 = (t217 & 0xffff) * (x1_811 & 0xffff)
3250int32323232 tmp13 = (t217 & 0xffff) * (x1_1215 & 0xffff)
3251int32323232 tmp14 = (t217 & 0xffff) * (x1_1619 & 0xffff)
3252
3253int32323232 z3_1619 += tmp00
3254int32323232 z3_2023 += tmp01
3255int32323232 z3_2427 += tmp02
3256int32323232 z3_2831 += tmp03
3257int32323232 z3_3235 += tmp04
3258
3259tmp10b = tmp10 >> (8 * 4)
3260int32323232 tmp10a = tmp10 << 1
3261int32323232 tmp11a = tmp11 << 1
3262int32323232 tmp12a = tmp12 << 1
3263tmp14a = tmp14 << (8 * 12)
3264int32323232 tmp13a = tmp13 << 1
3265int32323232 tmp20 = (t218 & 0xffff) * (x1_03 & 0xffff)
3266int32323232 tmp21 = (t218 & 0xffff) * (x1_47 & 0xffff)
3267tmp11b = combine tmp10a and tmp11 by comb13
3268int32323232 tmp22 = (t218 & 0xffff) * (x1_811 & 0xffff)
3269tmp12b = combine tmp11a and tmp12 by comb13
3270int32323232 tmp23 = (t218 & 0xffff) * (x1_1215 & 0xffff)
3271tmp13b = combine tmp12a and tmp13 by comb13
3272int32323232 tmp24 = (t218 & 0xffff) * (x1_1619 & 0xffff)
3273tmp14b = combine tmp13a and tmp14 by comb13
3274int32323232 z3_1619 += tmp10b
3275int32323232 z3_2023 += tmp11b
3276int32323232 z3_2427 += tmp12b
3277int32323232 z3_2831 += tmp13b
3278int32323232 z3_3235 += tmp14b
3279tmp20b = tmp20 >> (8 * 8)
3280int32323232 z3_3639 = tmp14a << 1
3281
3282int32323232 tmp20a = tmp20 << 1
3283int32323232 tmp21a = tmp21 << 1
3284int32323232 tmp22a = tmp22 << 1
3285int32323232 tmp23a = tmp23 << 1
3286tmp24a = tmp24 << (8 * 8)
3287int32323232 tmp30 = (t219 & 0xffff) * (x1_03 & 0xffff)
3288tmp21b = combine tmp20a and tmp21 by comb22
3289int32323232 tmp31 = (t219 & 0xffff) * (x1_47 & 0xffff)
3290tmp22b = combine tmp21a and tmp22 by comb22
3291int32323232 tmp32 = (t219 & 0xffff) * (x1_811 & 0xffff)
3292tmp23b = combine tmp22a and tmp23 by comb22
3293int32323232 tmp33 = (t219 & 0xffff) * (x1_1215 & 0xffff)
3294tmp24b = combine tmp23a and tmp24 by comb22
3295int32323232 tmp34 = (t219 & 0xffff) * (x1_1619 & 0xffff)
3296int32323232 tmp24a <<= 1
3297int32323232 z3_1619 += tmp20b
3298int32323232 z3_2023 += tmp21b
3299int32323232 tmp30a = tmp30 << 1
3300tmp30b = tmp30 >> (8 * 12)
3301int32323232 tmp31a = tmp31 << 1
3302int32323232 tmp32a = tmp32 << 1
3303int32323232 tmp33a = tmp33 << 1
3304tmp34a = tmp34 << (8 * 4)
3305
3306int32323232 z3_2427 += tmp22b
3307tmp31b = combine tmp30a and tmp31 by comb31
3308int32323232 z3_2831 += tmp23b
3309tmp32b = combine tmp31a and tmp32 by comb31
3310int32323232 z3_3235 += tmp24b
3311tmp33b = combine tmp32a and tmp33 by comb31
3312int32323232 z3_3639 += tmp24a
3313tmp34b = combine tmp33a and tmp34 by comb31
3314
3315int32323232 tmp34a <<= 1
3316int32323232 z3_1619 += tmp30b
3317int32323232 z3_2023 += tmp31b
3318int32323232 z3_2427 += tmp32b
3319int32323232 z3_2831 += tmp33b
3320int32323232 z3_3235 += tmp34b
3321int32323232 z3_3639 += tmp34a
3322
3323# Reduce coefficients
3324carry0 = select bytes from z3_2023 by sel01
3325carry1 = select bytes from z3_2427 by sel01
3326carry2 = select bytes from z3_2831 by sel01
3327carry3 = select bytes from z3_3235 by sel01
3328uint32323232 carry0 >>= 13
3329uint32323232 carry1 >>= 13
3330uint32323232 carry2 >>= 13
3331uint32323232 carry3 >>= 13
3332int32323232 z3_2023 += carry0
3333int32323232 z3_2427 += carry1
3334int32323232 z3_2831 += carry2
3335int32323232 z3_3235 += carry3
3336
3337carry0 = select bytes from z3_2023 by sel12
3338carry1 = select bytes from z3_2427 by sel12
3339carry2 = select bytes from z3_2831 by sel12
3340carry3 = select bytes from z3_3235 by sel12
3341uint32323232 carry0 >>= 13
3342uint32323232 carry1 >>= 13
3343uint32323232 carry2 >>= 13
3344uint32323232 carry3 >>= 13
3345int32323232 z3_2023 += carry0
3346int32323232 z3_2427 += carry1
3347int32323232 z3_2831 += carry2
3348int32323232 z3_3235 += carry3
3349
3350carry0 = select bytes from z3_2023 by sel23
3351carry1 = select bytes from z3_2427 by sel23
3352carry2 = select bytes from z3_2831 by sel23
3353carry3 = select bytes from z3_3235 by sel23
3354uint32323232 carry0 >>= 13
3355uint32323232 carry1 >>= 13
3356uint32323232 carry2 >>= 13
3357uint32323232 carry3 >>= 13
3358int32323232 z3_2023 += carry0
3359int32323232 z3_2427 += carry1
3360int32323232 z3_2831 += carry2
3361int32323232 z3_3235 += carry3
3362
3363carry0 = select bytes from z3_2023 by sel30
3364carry1 = select bytes from z3_2427 by sel30
3365carry2 = select bytes from z3_2831 by sel30
3366carry3 = select bytes from z3_3235 by sel30
3367uint32323232 carry0 >>= 12
3368uint32323232 carry1 >>= 12
3369uint32323232 carry2 >>= 12
3370uint32323232 carry3 >>= 12
3371z3_2023 &= redcoeffmask
3372z3_2427 &= redcoeffmask
3373z3_2831 &= redcoeffmask
3374z3_3235 &= redcoeffmask
3375
3376int32323232 z3_2427 += carry0
3377int32323232 z3_2831 += carry1
3378int32323232 z3_3235 += carry2
3379int32323232 z3_3639 += carry3
3380
3381
3382carry1 = select bytes from z3_2427 by sel01
3383carry2 = select bytes from z3_2831 by sel01
3384carry3 = select bytes from z3_3235 by sel01
3385carry4 = select bytes from z3_3639 by sel01
3386uint32323232 carry1 >>= 13
3387uint32323232 carry2 >>= 13
3388uint32323232 carry3 >>= 13
3389uint32323232 carry4 >>= 13
3390int32323232 z3_2427 += carry1
3391int32323232 z3_2831 += carry2
3392int32323232 z3_3235 += carry3
3393int32323232 z3_3639 += carry4
3394
3395carry1 = select bytes from z3_2427 by sel12
3396carry2 = select bytes from z3_2831 by sel12
3397carry3 = select bytes from z3_3235 by sel12
3398carry4 = select bytes from z3_3639 by sel12
3399uint32323232 carry1 >>= 13
3400uint32323232 carry2 >>= 13
3401uint32323232 carry3 >>= 13
3402uint32323232 carry4 >>= 13
3403int32323232 z3_2427 += carry1
3404int32323232 z3_2831 += carry2
3405int32323232 z3_3235 += carry3
3406int32323232 z3_3639 += carry4
3407
3408carry1 = select bytes from z3_2427 by sel23
3409carry2 = select bytes from z3_2831 by sel23
3410carry3 = select bytes from z3_3235 by sel23
3411carry4 = select bytes from z3_3639 by sel23
3412uint32323232 carry1 >>= 13
3413uint32323232 carry2 >>= 13
3414uint32323232 carry3 >>= 13
3415uint32323232 carry4 >>= 13
3416int32323232 z3_2427 += carry1
3417int32323232 z3_2831 += carry2
3418int32323232 z3_3235 += carry3
3419int32323232 z3_3639 += carry4
3420
3421carry1 = select bytes from z3_2427 by sel30
3422carry2 = select bytes from z3_2831 by sel30
3423carry3 = select bytes from z3_3235 by sel30
3424uint32323232 carry1 >>= 12
3425uint32323232 carry2 >>= 12
3426uint32323232 carry3 >>= 12
3427z3_2427 &= redcoeffmask
3428z3_2831 &= redcoeffmask
3429z3_3235 &= redcoeffmask
3430z3_3639 &= redcoeffmaskend
3431
3432int32323232 z3_2831 += carry1
3433int32323232 z3_3235 += carry2
3434int32323232 z3_3639 += carry3
3435
3436# Reduce polynomial
3437
3438uint32323232 red0 = (z3_2023 & 0xffff) * 19
3439uint32323232 red4 = (z3_3639 & 0xffff) * 19
3440uint32323232 red1 = (z3_2427 & 0xffff) * 19
3441uint32323232 red2 = (z3_2831 & 0xffff) * 19
3442uint32323232 red3 = (z3_3235 & 0xffff) * 19
3443
3444int32323232 z3_03 += red0
3445int32323232 z3_1619 += red4
3446int32323232 z3_47 += red1
3447int32323232 z3_811 += red2
3448int32323232 z3_1215 += red3
3449
3450# Reduce coefficients ctd.
3451carry = select bytes from z3_1619 by sel01
3452uint32323232 carry >>= 13
3453int32323232 z3_1619 += carry
3454
3455carry = select bytes from z3_1619 by sel12
3456uint32323232 carry >>= 13
3457int32323232 z3_1619 += carry
3458
3459carry = select bytes from z3_1619 by sel23
3460uint32323232 carry >>= 13
3461int32323232 z3_1619 += carry
3462
3463carry = select bytes from z3_1619 by sel30
3464uint32323232 carry >>= 12
3465#int32323232 red = (carry & 0xffff) * 19
3466int32323232 red = carry << 4
3467int32323232 red = red + carry
3468int32323232 red = red + carry
3469int32323232 red = red + carry
3470
3471int32323232 z3_03 += red
3472
3473z3_1619 &= redcoeffmask
3474
3475
3476carry0 = select bytes from z3_03 by sel01
3477carry1 = select bytes from z3_47 by sel01
3478carry2 = select bytes from z3_811 by sel01
3479carry3 = select bytes from z3_1215 by sel01
3480uint32323232 carry0 >>= 13
3481uint32323232 carry1 >>= 13
3482uint32323232 carry2 >>= 13
3483uint32323232 carry3 >>= 13
3484int32323232 z3_03 += carry0
3485int32323232 z3_47 += carry1
3486int32323232 z3_811 += carry2
3487int32323232 z3_1215 += carry3
3488
3489carry0 = select bytes from z3_03 by sel12
3490carry1 = select bytes from z3_47 by sel12
3491carry2 = select bytes from z3_811 by sel12
3492carry3 = select bytes from z3_1215 by sel12
3493uint32323232 carry0 >>= 13
3494uint32323232 carry1 >>= 13
3495uint32323232 carry2 >>= 13
3496uint32323232 carry3 >>= 13
3497int32323232 z3_03 += carry0
3498int32323232 z3_47 += carry1
3499int32323232 z3_811 += carry2
3500int32323232 z3_1215 += carry3
3501
3502carry0 = select bytes from z3_03 by sel23
3503carry1 = select bytes from z3_47 by sel23
3504carry2 = select bytes from z3_811 by sel23
3505carry3 = select bytes from z3_1215 by sel23
3506uint32323232 carry0 >>= 13
3507uint32323232 carry1 >>= 13
3508uint32323232 carry2 >>= 13
3509uint32323232 carry3 >>= 13
3510int32323232 z3_03 += carry0
3511int32323232 z3_47 += carry1
3512int32323232 z3_811 += carry2
3513int32323232 z3_1215 += carry3
3514
3515carry0 = select bytes from z3_03 by sel30
3516carry1 = select bytes from z3_47 by sel30
3517carry2 = select bytes from z3_811 by sel30
3518carry3 = select bytes from z3_1215 by sel30
3519uint32323232 carry0 >>= 12
3520uint32323232 carry1 >>= 12
3521uint32323232 carry2 >>= 12
3522uint32323232 carry3 >>= 12
3523z3_03 &= redcoeffmask
3524z3_47 &= redcoeffmask
3525z3_811 &= redcoeffmask
3526z3_1215 &= redcoeffmask
3527int32323232 z3_47 += carry0
3528int32323232 z3_811 += carry1
3529int32323232 z3_1215 += carry2
3530int32323232 z3_1619 += carry3
3531
3532
3533carry1 = select bytes from z3_47 by sel01
3534carry2 = select bytes from z3_811 by sel01
3535carry3 = select bytes from z3_1215 by sel01
3536carry4 = select bytes from z3_1619 by sel01
3537uint32323232 carry1 >>= 13
3538uint32323232 carry2 >>= 13
3539uint32323232 carry3 >>= 13
3540uint32323232 carry4 >>= 13
3541int32323232 z3_47 += carry1
3542int32323232 z3_811 += carry2
3543int32323232 z3_1215 += carry3
3544int32323232 z3_1619 += carry4
3545
3546carry1 = select bytes from z3_47 by sel12
3547carry2 = select bytes from z3_811 by sel12
3548carry3 = select bytes from z3_1215 by sel12
3549carry4 = select bytes from z3_1619 by sel12
3550uint32323232 carry1 >>= 13
3551uint32323232 carry2 >>= 13
3552uint32323232 carry3 >>= 13
3553uint32323232 carry4 >>= 13
3554int32323232 z3_47 += carry1
3555int32323232 z3_811 += carry2
3556int32323232 z3_1215 += carry3
3557int32323232 z3_1619 += carry4
3558
3559carry1 = select bytes from z3_47 by sel23
3560carry2 = select bytes from z3_811 by sel23
3561carry3 = select bytes from z3_1215 by sel23
3562carry4 = select bytes from z3_1619 by sel23
3563uint32323232 carry1 >>= 13
3564uint32323232 carry2 >>= 13
3565uint32323232 carry3 >>= 13
3566uint32323232 carry4 >>= 13
3567int32323232 z3_47 += carry1
3568int32323232 z3_811 += carry2
3569int32323232 z3_1215 += carry3
3570int32323232 z3_1619 += carry4
3571
3572z3_47 &= redcoeffmaskveryend
3573z3_811 &= redcoeffmaskveryend
3574z3_1215 &= redcoeffmaskveryend
3575z3_1619 &= redcoeffmaskveryend
3576
3577###################################################################################
3578##########################          Write Result      ##############################
3579####################################################################################
3580#
3581##*(vec128 *) ((retp + 0) & ~15) = x2_03
3582##*(vec128 *) ((retp + 16) & ~15) = x2_47
3583##*(vec128 *) ((retp + 32) & ~15) = x2_811
3584##*(vec128 *) ((retp + 48) & ~15) = x2_1215
3585##*(vec128 *) ((retp + 64) & ~15) = x2_1619
3586##*(vec128 *) ((retp + 80) & ~15) = z2_03
3587##*(vec128 *) ((retp + 96) & ~15) = z2_47
3588##*(vec128 *) ((retp + 112) & ~15) = z2_811
3589##*(vec128 *) ((retp + 128) & ~15) = z2_1215
3590##*(vec128 *) ((retp + 144) & ~15) = z2_1619
3591##*(vec128 *) ((retp + 160) & ~15) = x3_03
3592##*(vec128 *) ((retp + 176) & ~15) = x3_47
3593##*(vec128 *) ((retp + 192) & ~15) = x3_811
3594##*(vec128 *) ((retp + 208) & ~15) = x3_1215
3595##*(vec128 *) ((retp + 224) & ~15) = x3_1619
3596##*(vec128 *) ((retp + 240) & ~15) = z3_03
3597##*(vec128 *) ((retp + 256) & ~15) = z3_47
3598##*(vec128 *) ((retp + 272) & ~15) = z3_811
3599##*(vec128 *) ((retp + 288) & ~15) = z3_1215
3600##*(vec128 *) ((retp + 304) & ~15) = z3_1619
3601
3602uint32323232 check = loopmask[0] | loopmask[1] | loopmask[2] | loopmask[3]
3603goto loop if (check & 0xffffffff)
3604
3605goto end if (done & 0xffff)
3606
3607prevextbit_stack = prevextbit
3608sk = *(vec128 *) ((skp + 0) & ~15)
3609uint32323232 loopmask = 1
3610loopmask <<= (8 * 15)
3611loopmask <<= (7 % 8)
3612sk = select bytes from sk by swapendian
3613prevextbit = prevextbit_stack
3614int32323232 done = 1
3615goto loop
3616
3617####################################################################################
3618#######################   Conditionally swap P2 and P3   ###########################
3619####################################################################################
3620
3621end:
3622
3623flip = extbit
3624nflip = ~(flip | zero)
3625
3626tmp0 = x2_03   & nflip
3627tmp1 = x3_03   & flip
3628tmp2 = x2_03   & flip
3629tmp3 = x3_03   & nflip
3630x2_03 = tmp0 ^ tmp1
3631x3_03 = tmp2 ^ tmp3
3632
3633tmp0 = x2_47   & nflip
3634tmp1 = x3_47   & flip
3635tmp2 = x2_47   & flip
3636tmp3 = x3_47   & nflip
3637x2_47 = tmp0 ^ tmp1
3638x3_47 = tmp2 ^ tmp3
3639
3640tmp0 = x2_811  & nflip
3641tmp1 = x3_811  & flip
3642tmp2 = x2_811  & flip
3643tmp3 = x3_811  & nflip
3644x2_811 = tmp0 ^ tmp1
3645x3_811 = tmp2 ^ tmp3
3646
3647tmp0 = x2_1215 & nflip
3648tmp1 = x3_1215 & flip
3649tmp2 = x2_1215 & flip
3650tmp3 = x3_1215 & nflip
3651x2_1215 = tmp0 ^ tmp1
3652x3_1215 = tmp2 ^ tmp3
3653
3654tmp0 = x2_1619 & nflip
3655tmp1 = x3_1619 & flip
3656tmp2 = x2_1619 & flip
3657tmp3 = x3_1619 & nflip
3658x2_1619 = tmp0 ^ tmp1
3659x3_1619 = tmp2 ^ tmp3
3660
3661tmp0 = z2_03   & nflip
3662tmp1 = z3_03   & flip
3663tmp2 = z2_03   & flip
3664tmp3 = z3_03   & nflip
3665z2_03 = tmp0 ^ tmp1
3666z3_03 = tmp2 ^ tmp3
3667
3668tmp0 = z2_47   & nflip
3669tmp1 = z3_47   & flip
3670tmp2 = z2_47   & flip
3671tmp3 = z3_47   & nflip
3672z2_47 = tmp0 ^ tmp1
3673z3_47 = tmp2 ^ tmp3
3674
3675tmp0 = z2_811  & nflip
3676tmp1 = z3_811  & flip
3677tmp2 = z2_811  & flip
3678tmp3 = z3_811  & nflip
3679z2_811 = tmp0 ^ tmp1
3680z3_811 = tmp2 ^ tmp3
3681
3682tmp0 = z2_1215 & nflip
3683tmp1 = z3_1215 & flip
3684tmp2 = z2_1215 & flip
3685tmp3 = z3_1215 & nflip
3686z2_1215 = tmp0 ^ tmp1
3687z3_1215 = tmp2 ^ tmp3
3688
3689tmp0 = z2_1619 & nflip
3690tmp1 = z3_1619 & flip
3691tmp2 = z2_1619 & flip
3692tmp3 = z3_1619 & nflip
3693z2_1619 = tmp0 ^ tmp1
3694z3_1619 = tmp2 ^ tmp3
3695
3696
3697*(vec128 *) ((retp + 0) & ~15) = x2_03
3698*(vec128 *) ((retp + 16) & ~15) = x2_47
3699*(vec128 *) ((retp + 32) & ~15) = x2_811
3700*(vec128 *) ((retp + 48) & ~15) = x2_1215
3701*(vec128 *) ((retp + 64) & ~15) = x2_1619
3702*(vec128 *) ((retp + 80) & ~15) = z2_03
3703*(vec128 *) ((retp + 96) & ~15) = z2_47
3704*(vec128 *) ((retp + 112) & ~15) = z2_811
3705*(vec128 *) ((retp + 128) & ~15) = z2_1215
3706*(vec128 *) ((retp + 144) & ~15) = z2_1619
3707
3708call0 = call0_stack
3709call1 = call1_stack
3710call2 = call2_stack
3711call3 = call3_stack
3712call4 = call4_stack
3713call5 = call5_stack
3714call6 = call6_stack
3715call7 = call7_stack
3716call8 = call8_stack
3717call9 = call9_stack
3718call10 = call10_stack
3719call11 = call11_stack
3720call12 = call12_stack
3721call13 = call13_stack
3722call14 = call14_stack
3723call15 = call15_stack
3724call16 = call16_stack
3725call17 = call17_stack
3726call18 = call18_stack
3727call19 = call19_stack
3728call20 = call20_stack
3729call21 = call21_stack
3730call22 = call22_stack
3731call23 = call23_stack
3732call24 = call24_stack
3733call25 = call25_stack
3734call26 = call26_stack
3735call27 = call27_stack
3736call28 = call28_stack
3737call29 = call29_stack
3738call30 = call30_stack
3739call31 = call31_stack
3740call32 = call32_stack
3741call33 = call33_stack
3742call34 = call34_stack
3743call35 = call35_stack
3744call36 = call36_stack
3745call37 = call37_stack
3746call38 = call38_stack
3747call39 = call39_stack
3748call40 = call40_stack
3749call41 = call41_stack
3750call42 = call42_stack
3751call43 = call43_stack
3752call44 = call44_stack
3753call45 = call45_stack
3754call46 = call46_stack
3755call47 = call47_stack
3756
3757
3758leave
3759