1int32 a
2
3stack32 arg1
4stack32 arg2
5stack32 arg3
6stack32 arg4
7input arg1
8input arg2
9input arg3
10input arg4
11
12int32 eax
13int32 ebx
14int32 esi
15int32 edi
16int32 ebp
17caller eax
18caller ebx
19caller esi
20caller edi
21caller ebp
22
23
24int32 k
25int32 kbits
26int32 iv
27
28int32 i
29
30int32 x
31int32 m
32int32 out
33stack32 bytes_backup
34int32 bytes
35
36stack32 eax_stack
37stack32 ebx_stack
38stack32 esi_stack
39stack32 edi_stack
40stack32 ebp_stack
41
42int6464 diag0
43int6464 diag1
44int6464 diag2
45int6464 diag3
46# situation at beginning of first round:
47# diag0: x0 x5 x10 x15
48# diag1: x12 x1 x6 x11
49# diag2: x8 x13 x2 x7
50# diag3: x4 x9 x14 x3
51# situation at beginning of second round:
52# diag0: x0 x5 x10 x15
53# diag1: x1 x6 x11 x12
54# diag2: x2 x7 x8 x13
55# diag3: x3 x4 x9 x14
56
57int6464 a0
58int6464 a1
59int6464 a2
60int6464 a3
61int6464 a4
62int6464 a5
63int6464 a6
64int6464 a7
65int6464 b0
66int6464 b1
67int6464 b2
68int6464 b3
69int6464 b4
70int6464 b5
71int6464 b6
72int6464 b7
73
74int6464 z0
75int6464 z1
76int6464 z2
77int6464 z3
78int6464 z4
79int6464 z5
80int6464 z6
81int6464 z7
82int6464 z8
83int6464 z9
84int6464 z10
85int6464 z11
86int6464 z12
87int6464 z13
88int6464 z14
89int6464 z15
90
91stack128 z0_stack
92stack128 z1_stack
93stack128 z2_stack
94stack128 z3_stack
95stack128 z4_stack
96stack128 z5_stack
97stack128 z6_stack
98stack128 z7_stack
99stack128 z8_stack
100stack128 z9_stack
101stack128 z10_stack
102stack128 z11_stack
103stack128 z12_stack
104stack128 z13_stack
105stack128 z14_stack
106stack128 z15_stack
107
108stack128 orig0
109stack128 orig1
110stack128 orig2
111stack128 orig3
112stack128 orig4
113stack128 orig5
114stack128 orig6
115stack128 orig7
116stack128 orig8
117stack128 orig9
118stack128 orig10
119stack128 orig11
120stack128 orig12
121stack128 orig13
122stack128 orig14
123stack128 orig15
124
125int6464 p
126int6464 q
127int6464 r
128int6464 s
129int6464 t
130int6464 u
131int6464 v
132int6464 w
133
134int6464 mp
135int6464 mq
136int6464 mr
137int6464 ms
138int6464 mt
139int6464 mu
140int6464 mv
141int6464 mw
142
143int32 in0
144int32 in1
145int32 in2
146int32 in3
147int32 in4
148int32 in5
149int32 in6
150int32 in7
151int32 in8
152int32 in9
153int32 in10
154int32 in11
155int32 in12
156int32 in13
157int32 in14
158int32 in15
159
160stack512 tmp
161
162stack32 ctarget
163
164
165enter ECRYPT_keystream_bytes
166
167eax_stack = eax
168ebx_stack = ebx
169esi_stack = esi
170edi_stack = edi
171ebp_stack = ebp
172
173x = arg1
174m = arg2
175out = m
176bytes = arg3
177
178              unsigned>? bytes - 0
179goto done if !unsigned>
180
181a = 0
182i = bytes
183while (i) { *out++ = a; --i }
184out -= bytes
185
186goto start
187
188
189enter ECRYPT_decrypt_bytes
190
191eax_stack = eax
192ebx_stack = ebx
193esi_stack = esi
194edi_stack = edi
195ebp_stack = ebp
196
197x = arg1
198m = arg2
199out = arg3
200bytes = arg4
201
202              unsigned>? bytes - 0
203goto done if !unsigned>
204
205goto start
206
207
208enter ECRYPT_encrypt_bytes
209
210eax_stack = eax
211ebx_stack = ebx
212esi_stack = esi
213edi_stack = edi
214ebp_stack = ebp
215
216x = arg1
217m = arg2
218out = arg3
219bytes = arg4
220
221              unsigned>? bytes - 0
222goto done if !unsigned>
223
224
225start:
226
227                              unsigned<? bytes - 256
228  goto bytesbetween1and255 if unsigned<
229
230  z0 = *(int128 *) (x + 0)
231  z5 = z0[1,1,1,1]
232  z10 = z0[2,2,2,2]
233  z15 = z0[3,3,3,3]
234  z0 = z0[0,0,0,0]
235  orig5 = z5
236  orig10 = z10
237  orig15 = z15
238  orig0 = z0
239
240  z1 = *(int128 *) (x + 16)
241  z6 = z1[2,2,2,2]
242  z11 = z1[3,3,3,3]
243  z12 = z1[0,0,0,0]
244  z1 = z1[1,1,1,1]
245  orig6 = z6
246  orig11 = z11
247  orig12 = z12
248  orig1 = z1
249
250  z2 = *(int128 *) (x + 32)
251  z7 = z2[3,3,3,3]
252  z13 = z2[1,1,1,1]
253  z2 = z2[2,2,2,2]
254  orig7 = z7
255  orig13 = z13
256  orig2 = z2
257
258  z3 = *(int128 *) (x + 48)
259  z4 = z3[0,0,0,0]
260  z14 = z3[2,2,2,2]
261  z3 = z3[3,3,3,3]
262  orig4 = z4
263  orig14 = z14
264  orig3 = z3
265
266bytesatleast256:
267
268  in8 = *(uint32 *) (x + 32)
269  in9 = *(uint32 *) (x + 52)
270  ((uint32 *) &orig8)[0] = in8
271  ((uint32 *) &orig9)[0] = in9
272  carry? in8 += 1
273  in9 += 0 + carry
274  ((uint32 *) &orig8)[1] = in8
275  ((uint32 *) &orig9)[1] = in9
276  carry? in8 += 1
277  in9 += 0 + carry
278  ((uint32 *) &orig8)[2] = in8
279  ((uint32 *) &orig9)[2] = in9
280  carry? in8 += 1
281  in9 += 0 + carry
282  ((uint32 *) &orig8)[3] = in8
283  ((uint32 *) &orig9)[3] = in9
284  carry? in8 += 1
285  in9 += 0 + carry
286  *(uint32 *) (x + 32) = in8
287  *(uint32 *) (x + 52) = in9
288
289  bytes_backup = bytes
290
291i = 20
292
293  z5 = orig5
294  z10 = orig10
295  z15 = orig15
296  z14 = orig14
297  z3 = orig3
298  z6 = orig6
299  z11 = orig11
300  z1 = orig1
301
302  z5_stack = z5
303  z10_stack = z10
304  z15_stack = z15
305  z14_stack = z14
306  z3_stack = z3
307  z6_stack = z6
308  z11_stack = z11
309  z1_stack = z1
310
311  z7 = orig7
312  z13 = orig13
313  z2 = orig2
314  z9 = orig9
315                  p = orig0
316                  t = orig12
317                  q = orig4
318                  r = orig8
319
320  z7_stack = z7
321  z13_stack = z13
322  z2_stack = z2
323  z9_stack = z9
324  z0_stack = p
325  z12_stack = t
326  z4_stack = q
327  z8_stack = r
328
329
330mainloop1:
331
332		  assign xmm0 to p
333		  assign xmm1 to r
334		  assign xmm2 to t
335		  assign xmm3 to q
336
337                  s = t
338uint32323232      t += p
339                  u = t
340uint32323232      t >>= 25
341                  q ^= t
342uint32323232      u <<= 7
343                  q ^= u
344                  z4_stack = q
345
346                  t = p
347uint32323232      t += q
348                  u = t
349uint32323232      t >>= 23
350                  r ^= t
351uint32323232      u <<= 9
352                  r ^= u
353                  z8_stack = r
354
355uint32323232      q += r
356                  u = q
357uint32323232      q >>= 19
358                  s ^= q
359uint32323232      u <<= 13
360                  s ^= u
361                                  mt = z1_stack
362                                  mp = z5_stack
363                                  mq = z9_stack
364                                  mr = z13_stack
365
366                  z12_stack = s
367
368uint32323232      r += s
369                  u = r
370uint32323232      r >>= 14
371                  p ^= r
372uint32323232      u <<= 18
373                  p ^= u
374                  z0_stack = p
375
376		  		  assign xmm2 to mt
377		  		  assign xmm3 to mq
378		  		  assign xmm4 to mp
379		  		  assign xmm5 to mr
380
381                                  ms = mt
382uint32323232                      mt += mp
383                                  mu = mt
384uint32323232                      mt >>= 25
385                                  mq ^= mt
386uint32323232                      mu <<= 7
387                                  mq ^= mu
388                                  z9_stack = mq
389
390                                  mt = mp
391uint32323232                      mt += mq
392                                  mu = mt
393uint32323232                      mt >>= 23
394                                  mr ^= mt
395uint32323232                      mu <<= 9
396                                  mr ^= mu
397                                  z13_stack = mr
398
399uint32323232                      mq += mr
400                                  mu = mq
401uint32323232                      mq >>= 19
402                                  ms ^= mq
403uint32323232                      mu <<= 13
404                                  ms ^= mu
405                                                  t = z6_stack
406                                                  p = z10_stack
407                                                  q = z14_stack
408                                                  r = z2_stack
409
410                                  z1_stack = ms
411
412uint32323232                      mr += ms
413                                  mu = mr
414uint32323232                      mr >>= 14
415                                  mp ^= mr
416uint32323232                      mu <<= 18
417                                  mp ^= mu
418                                  z5_stack = mp
419
420		  				  assign xmm0 to p
421						  assign xmm1 to r
422						  assign xmm2 to t
423						  assign xmm3 to q
424
425                                                  s = t
426uint32323232                                      t += p
427                                                  u = t
428uint32323232                                      t >>= 25
429                                                  q ^= t
430uint32323232                                      u <<= 7
431                                                  q ^= u
432                                                  z14_stack = q
433
434                                                  t = p
435uint32323232                                      t += q
436                                                  u = t
437uint32323232                                      t >>= 23
438                                                  r ^= t
439uint32323232                                      u <<= 9
440                                                  r ^= u
441                                                  z2_stack = r
442
443uint32323232                                      q += r
444                                                  u = q
445uint32323232                                      q >>= 19
446                                                  s ^= q
447uint32323232                                      u <<= 13
448                                                  s ^= u
449                                                                  mt = z11_stack
450                                                                  mp = z15_stack
451                                                                  mq = z3_stack
452                                                                  mr = z7_stack
453
454                                                  z6_stack = s
455
456uint32323232                                      r += s
457                                                  u = r
458uint32323232                                      r >>= 14
459                                                  p ^= r
460uint32323232                                      u <<= 18
461                                                  p ^= u
462                                                  z10_stack = p
463
464		  						  assign xmm2 to mt
465						  		  assign xmm3 to mq
466						  		  assign xmm4 to mp
467						  		  assign xmm5 to mr
468
469                                                                  ms = mt
470uint32323232                                                      mt += mp
471                                                                  mu = mt
472uint32323232                                                      mt >>= 25
473                                                                  mq ^= mt
474uint32323232                                                      mu <<= 7
475                                                                  mq ^= mu
476                                                                  z3_stack = mq
477
478                                                                  mt = mp
479uint32323232                                                      mt += mq
480                                                                  mu = mt
481uint32323232                                                      mt >>= 23
482                                                                  mr ^= mt
483uint32323232                                                      mu <<= 9
484                                                                  mr ^= mu
485                                                                  z7_stack = mr
486
487uint32323232                                                      mq += mr
488                                                                  mu = mq
489uint32323232                                                      mq >>= 19
490                                                                  ms ^= mq
491uint32323232                                                      mu <<= 13
492                                                                  ms ^= mu
493                  t = z3_stack
494                  p = z0_stack
495                  q = z1_stack
496                  r = z2_stack
497
498                                                                  z11_stack = ms
499
500uint32323232                                                      mr += ms
501                                                                  mu = mr
502uint32323232                                                      mr >>= 14
503                                                                  mp ^= mr
504uint32323232                                                      mu <<= 18
505                                                                  mp ^= mu
506                                                                  z15_stack = mp
507
508		  assign xmm0 to p
509		  assign xmm1 to r
510		  assign xmm2 to t
511		  assign xmm3 to q
512
513                  s = t
514uint32323232      t += p
515                  u = t
516uint32323232      t >>= 25
517                  q ^= t
518uint32323232      u <<= 7
519                  q ^= u
520                  z1_stack = q
521
522                  t = p
523uint32323232      t += q
524                  u = t
525uint32323232      t >>= 23
526                  r ^= t
527uint32323232      u <<= 9
528                  r ^= u
529                  z2_stack = r
530
531uint32323232      q += r
532                  u = q
533uint32323232      q >>= 19
534                  s ^= q
535uint32323232      u <<= 13
536                  s ^= u
537                                  mt = z4_stack
538                                  mp = z5_stack
539                                  mq = z6_stack
540                                  mr = z7_stack
541
542                  z3_stack = s
543
544uint32323232      r += s
545                  u = r
546uint32323232      r >>= 14
547                  p ^= r
548uint32323232      u <<= 18
549                  p ^= u
550                  z0_stack = p
551
552		  		  assign xmm2 to mt
553		  		  assign xmm3 to mq
554		  		  assign xmm4 to mp
555		  		  assign xmm5 to mr
556
557                                  ms = mt
558uint32323232                      mt += mp
559                                  mu = mt
560uint32323232                      mt >>= 25
561                                  mq ^= mt
562uint32323232                      mu <<= 7
563                                  mq ^= mu
564                                  z6_stack = mq
565
566                                  mt = mp
567uint32323232                      mt += mq
568                                  mu = mt
569uint32323232                      mt >>= 23
570                                  mr ^= mt
571uint32323232                      mu <<= 9
572                                  mr ^= mu
573                                  z7_stack = mr
574
575uint32323232                      mq += mr
576                                  mu = mq
577uint32323232                      mq >>= 19
578                                  ms ^= mq
579uint32323232                      mu <<= 13
580                                  ms ^= mu
581                                                  t = z9_stack
582                                                  p = z10_stack
583                                                  q = z11_stack
584                                                  r = z8_stack
585
586                                  z4_stack = ms
587
588uint32323232                      mr += ms
589                                  mu = mr
590uint32323232                      mr >>= 14
591                                  mp ^= mr
592uint32323232                      mu <<= 18
593                                  mp ^= mu
594                                  z5_stack = mp
595
596		  				  assign xmm0 to p
597						  assign xmm1 to r
598						  assign xmm2 to t
599						  assign xmm3 to q
600
601                                                  s = t
602uint32323232                                      t += p
603                                                  u = t
604uint32323232                                      t >>= 25
605                                                  q ^= t
606uint32323232                                      u <<= 7
607                                                  q ^= u
608                                                  z11_stack = q
609
610                                                  t = p
611uint32323232                                      t += q
612                                                  u = t
613uint32323232                                      t >>= 23
614                                                  r ^= t
615uint32323232                                      u <<= 9
616                                                  r ^= u
617                                                  z8_stack = r
618
619uint32323232                                      q += r
620                                                  u = q
621uint32323232                                      q >>= 19
622                                                  s ^= q
623uint32323232                                      u <<= 13
624                                                  s ^= u
625                                                                  mt = z14_stack
626                                                                  mp = z15_stack
627                                                                  mq = z12_stack
628                                                                  mr = z13_stack
629
630                                                  z9_stack = s
631
632uint32323232                                      r += s
633                                                  u = r
634uint32323232                                      r >>= 14
635                                                  p ^= r
636uint32323232                                      u <<= 18
637                                                  p ^= u
638                                                  z10_stack = p
639
640		  						  assign xmm2 to mt
641						  		  assign xmm3 to mq
642						  		  assign xmm4 to mp
643						  		  assign xmm5 to mr
644
645                                                                  ms = mt
646uint32323232                                                      mt += mp
647                                                                  mu = mt
648uint32323232                                                      mt >>= 25
649                                                                  mq ^= mt
650uint32323232                                                      mu <<= 7
651                                                                  mq ^= mu
652                                                                  z12_stack = mq
653
654                                                                  mt = mp
655uint32323232                                                      mt += mq
656                                                                  mu = mt
657uint32323232                                                      mt >>= 23
658                                                                  mr ^= mt
659uint32323232                                                      mu <<= 9
660                                                                  mr ^= mu
661                                                                  z13_stack = mr
662
663uint32323232                                                      mq += mr
664                                                                  mu = mq
665uint32323232                                                      mq >>= 19
666                                                                  ms ^= mq
667uint32323232                                                      mu <<= 13
668                                                                  ms ^= mu
669                  t = z12_stack
670                  p = z0_stack
671                  q = z4_stack
672                  r = z8_stack
673
674                                                                  z14_stack = ms
675
676uint32323232                                                      mr += ms
677                                                                  mu = mr
678uint32323232                                                      mr >>= 14
679                                                                  mp ^= mr
680uint32323232                                                      mu <<= 18
681                                                                  mp ^= mu
682                                                                  z15_stack = mp
683
684                  unsigned>? i -= 2
685goto mainloop1 if unsigned>
686
687  z0 = z0_stack
688  z1 = z1_stack
689  z2 = z2_stack
690  z3 = z3_stack
691  uint32323232 z0 += orig0
692  uint32323232 z1 += orig1
693  uint32323232 z2 += orig2
694  uint32323232 z3 += orig3
695  in0 = z0
696  in1 = z1
697  in2 = z2
698  in3 = z3
699  z0 <<<= 96
700  z1 <<<= 96
701  z2 <<<= 96
702  z3 <<<= 96
703  in0 ^= *(uint32 *) (m + 0)
704  in1 ^= *(uint32 *) (m + 4)
705  in2 ^= *(uint32 *) (m + 8)
706  in3 ^= *(uint32 *) (m + 12)
707  *(uint32 *) (out + 0) = in0
708  *(uint32 *) (out + 4) = in1
709  *(uint32 *) (out + 8) = in2
710  *(uint32 *) (out + 12) = in3
711  in0 = z0
712  in1 = z1
713  in2 = z2
714  in3 = z3
715  z0 <<<= 96
716  z1 <<<= 96
717  z2 <<<= 96
718  z3 <<<= 96
719  in0 ^= *(uint32 *) (m + 64)
720  in1 ^= *(uint32 *) (m + 68)
721  in2 ^= *(uint32 *) (m + 72)
722  in3 ^= *(uint32 *) (m + 76)
723  *(uint32 *) (out + 64) = in0
724  *(uint32 *) (out + 68) = in1
725  *(uint32 *) (out + 72) = in2
726  *(uint32 *) (out + 76) = in3
727  in0 = z0
728  in1 = z1
729  in2 = z2
730  in3 = z3
731  z0 <<<= 96
732  z1 <<<= 96
733  z2 <<<= 96
734  z3 <<<= 96
735  in0 ^= *(uint32 *) (m + 128)
736  in1 ^= *(uint32 *) (m + 132)
737  in2 ^= *(uint32 *) (m + 136)
738  in3 ^= *(uint32 *) (m + 140)
739  *(uint32 *) (out + 128) = in0
740  *(uint32 *) (out + 132) = in1
741  *(uint32 *) (out + 136) = in2
742  *(uint32 *) (out + 140) = in3
743  in0 = z0
744  in1 = z1
745  in2 = z2
746  in3 = z3
747  in0 ^= *(uint32 *) (m + 192)
748  in1 ^= *(uint32 *) (m + 196)
749  in2 ^= *(uint32 *) (m + 200)
750  in3 ^= *(uint32 *) (m + 204)
751  *(uint32 *) (out + 192) = in0
752  *(uint32 *) (out + 196) = in1
753  *(uint32 *) (out + 200) = in2
754  *(uint32 *) (out + 204) = in3
755
756  z4 = z4_stack
757  z5 = z5_stack
758  z6 = z6_stack
759  z7 = z7_stack
760  uint32323232 z4 += orig4
761  uint32323232 z5 += orig5
762  uint32323232 z6 += orig6
763  uint32323232 z7 += orig7
764  in4 = z4
765  in5 = z5
766  in6 = z6
767  in7 = z7
768  z4 <<<= 96
769  z5 <<<= 96
770  z6 <<<= 96
771  z7 <<<= 96
772  in4 ^= *(uint32 *) (m + 16)
773  in5 ^= *(uint32 *) (m + 20)
774  in6 ^= *(uint32 *) (m + 24)
775  in7 ^= *(uint32 *) (m + 28)
776  *(uint32 *) (out + 16) = in4
777  *(uint32 *) (out + 20) = in5
778  *(uint32 *) (out + 24) = in6
779  *(uint32 *) (out + 28) = in7
780  in4 = z4
781  in5 = z5
782  in6 = z6
783  in7 = z7
784  z4 <<<= 96
785  z5 <<<= 96
786  z6 <<<= 96
787  z7 <<<= 96
788  in4 ^= *(uint32 *) (m + 80)
789  in5 ^= *(uint32 *) (m + 84)
790  in6 ^= *(uint32 *) (m + 88)
791  in7 ^= *(uint32 *) (m + 92)
792  *(uint32 *) (out + 80) = in4
793  *(uint32 *) (out + 84) = in5
794  *(uint32 *) (out + 88) = in6
795  *(uint32 *) (out + 92) = in7
796  in4 = z4
797  in5 = z5
798  in6 = z6
799  in7 = z7
800  z4 <<<= 96
801  z5 <<<= 96
802  z6 <<<= 96
803  z7 <<<= 96
804  in4 ^= *(uint32 *) (m + 144)
805  in5 ^= *(uint32 *) (m + 148)
806  in6 ^= *(uint32 *) (m + 152)
807  in7 ^= *(uint32 *) (m + 156)
808  *(uint32 *) (out + 144) = in4
809  *(uint32 *) (out + 148) = in5
810  *(uint32 *) (out + 152) = in6
811  *(uint32 *) (out + 156) = in7
812  in4 = z4
813  in5 = z5
814  in6 = z6
815  in7 = z7
816  in4 ^= *(uint32 *) (m + 208)
817  in5 ^= *(uint32 *) (m + 212)
818  in6 ^= *(uint32 *) (m + 216)
819  in7 ^= *(uint32 *) (m + 220)
820  *(uint32 *) (out + 208) = in4
821  *(uint32 *) (out + 212) = in5
822  *(uint32 *) (out + 216) = in6
823  *(uint32 *) (out + 220) = in7
824
825  z8 = z8_stack
826  z9 = z9_stack
827  z10 = z10_stack
828  z11 = z11_stack
829  uint32323232 z8 += orig8
830  uint32323232 z9 += orig9
831  uint32323232 z10 += orig10
832  uint32323232 z11 += orig11
833  in8 = z8
834  in9 = z9
835  in10 = z10
836  in11 = z11
837  z8 <<<= 96
838  z9 <<<= 96
839  z10 <<<= 96
840  z11 <<<= 96
841  in8 ^= *(uint32 *) (m + 32)
842  in9 ^= *(uint32 *) (m + 36)
843  in10 ^= *(uint32 *) (m + 40)
844  in11 ^= *(uint32 *) (m + 44)
845  *(uint32 *) (out + 32) = in8
846  *(uint32 *) (out + 36) = in9
847  *(uint32 *) (out + 40) = in10
848  *(uint32 *) (out + 44) = in11
849  in8 = z8
850  in9 = z9
851  in10 = z10
852  in11 = z11
853  z8 <<<= 96
854  z9 <<<= 96
855  z10 <<<= 96
856  z11 <<<= 96
857  in8 ^= *(uint32 *) (m + 96)
858  in9 ^= *(uint32 *) (m + 100)
859  in10 ^= *(uint32 *) (m + 104)
860  in11 ^= *(uint32 *) (m + 108)
861  *(uint32 *) (out + 96) = in8
862  *(uint32 *) (out + 100) = in9
863  *(uint32 *) (out + 104) = in10
864  *(uint32 *) (out + 108) = in11
865  in8 = z8
866  in9 = z9
867  in10 = z10
868  in11 = z11
869  z8 <<<= 96
870  z9 <<<= 96
871  z10 <<<= 96
872  z11 <<<= 96
873  in8 ^= *(uint32 *) (m + 160)
874  in9 ^= *(uint32 *) (m + 164)
875  in10 ^= *(uint32 *) (m + 168)
876  in11 ^= *(uint32 *) (m + 172)
877  *(uint32 *) (out + 160) = in8
878  *(uint32 *) (out + 164) = in9
879  *(uint32 *) (out + 168) = in10
880  *(uint32 *) (out + 172) = in11
881  in8 = z8
882  in9 = z9
883  in10 = z10
884  in11 = z11
885  in8 ^= *(uint32 *) (m + 224)
886  in9 ^= *(uint32 *) (m + 228)
887  in10 ^= *(uint32 *) (m + 232)
888  in11 ^= *(uint32 *) (m + 236)
889  *(uint32 *) (out + 224) = in8
890  *(uint32 *) (out + 228) = in9
891  *(uint32 *) (out + 232) = in10
892  *(uint32 *) (out + 236) = in11
893
894  z12 = z12_stack
895  z13 = z13_stack
896  z14 = z14_stack
897  z15 = z15_stack
898  uint32323232 z12 += orig12
899  uint32323232 z13 += orig13
900  uint32323232 z14 += orig14
901  uint32323232 z15 += orig15
902  in12 = z12
903  in13 = z13
904  in14 = z14
905  in15 = z15
906  z12 <<<= 96
907  z13 <<<= 96
908  z14 <<<= 96
909  z15 <<<= 96
910  in12 ^= *(uint32 *) (m + 48)
911  in13 ^= *(uint32 *) (m + 52)
912  in14 ^= *(uint32 *) (m + 56)
913  in15 ^= *(uint32 *) (m + 60)
914  *(uint32 *) (out + 48) = in12
915  *(uint32 *) (out + 52) = in13
916  *(uint32 *) (out + 56) = in14
917  *(uint32 *) (out + 60) = in15
918  in12 = z12
919  in13 = z13
920  in14 = z14
921  in15 = z15
922  z12 <<<= 96
923  z13 <<<= 96
924  z14 <<<= 96
925  z15 <<<= 96
926  in12 ^= *(uint32 *) (m + 112)
927  in13 ^= *(uint32 *) (m + 116)
928  in14 ^= *(uint32 *) (m + 120)
929  in15 ^= *(uint32 *) (m + 124)
930  *(uint32 *) (out + 112) = in12
931  *(uint32 *) (out + 116) = in13
932  *(uint32 *) (out + 120) = in14
933  *(uint32 *) (out + 124) = in15
934  in12 = z12
935  in13 = z13
936  in14 = z14
937  in15 = z15
938  z12 <<<= 96
939  z13 <<<= 96
940  z14 <<<= 96
941  z15 <<<= 96
942  in12 ^= *(uint32 *) (m + 176)
943  in13 ^= *(uint32 *) (m + 180)
944  in14 ^= *(uint32 *) (m + 184)
945  in15 ^= *(uint32 *) (m + 188)
946  *(uint32 *) (out + 176) = in12
947  *(uint32 *) (out + 180) = in13
948  *(uint32 *) (out + 184) = in14
949  *(uint32 *) (out + 188) = in15
950  in12 = z12
951  in13 = z13
952  in14 = z14
953  in15 = z15
954  in12 ^= *(uint32 *) (m + 240)
955  in13 ^= *(uint32 *) (m + 244)
956  in14 ^= *(uint32 *) (m + 248)
957  in15 ^= *(uint32 *) (m + 252)
958  *(uint32 *) (out + 240) = in12
959  *(uint32 *) (out + 244) = in13
960  *(uint32 *) (out + 248) = in14
961  *(uint32 *) (out + 252) = in15
962
963  bytes = bytes_backup
964
965  bytes -= 256
966  m += 256
967  out += 256
968                           unsigned<? bytes - 256
969  goto bytesatleast256 if !unsigned<
970
971                unsigned>? bytes - 0
972  goto done if !unsigned>
973
974bytesbetween1and255:
975
976                  unsigned<? bytes - 64
977  goto nocopy if !unsigned<
978
979    ctarget = out
980
981    out = &tmp
982    i = bytes
983    while (i) { *out++ = *m++; --i }
984    out = &tmp
985    m = &tmp
986
987  nocopy:
988
989  bytes_backup = bytes
990
991
992
993diag0 = *(int128 *) (x + 0)
994diag1 = *(int128 *) (x + 16)
995diag2 = *(int128 *) (x + 32)
996diag3 = *(int128 *) (x + 48)
997
998
999                    a0 = diag1
1000i = 20
1001
1002mainloop2:
1003
1004uint32323232        a0 += diag0
1005                                a1 = diag0
1006                    b0 = a0
1007uint32323232        a0 <<= 7
1008uint32323232        b0 >>= 25
1009                diag3 ^= a0
1010
1011                diag3 ^= b0
1012
1013uint32323232                        a1 += diag3
1014                                                a2 = diag3
1015                                    b1 = a1
1016uint32323232                        a1 <<= 9
1017uint32323232                        b1 >>= 23
1018                                diag2 ^= a1
1019                diag3 <<<= 32
1020                                diag2 ^= b1
1021
1022uint32323232                                        a2 += diag2
1023                                                                a3 = diag2
1024                                                    b2 = a2
1025uint32323232                                        a2 <<= 13
1026uint32323232                                        b2 >>= 19
1027                                                diag1 ^= a2
1028                                diag2 <<<= 64
1029                                                diag1 ^= b2
1030
1031uint32323232                                                        a3 += diag1
1032                a4 = diag3
1033                                                                    b3 = a3
1034uint32323232                                                        a3 <<= 18
1035uint32323232                                                        b3 >>= 14
1036                                                                diag0 ^= a3
1037                                                diag1 <<<= 96
1038                                                                diag0 ^= b3
1039
1040uint32323232        a4 += diag0
1041                                a5 = diag0
1042                    b4 = a4
1043uint32323232        a4 <<= 7
1044uint32323232        b4 >>= 25
1045                diag1 ^= a4
1046
1047                diag1 ^= b4
1048
1049uint32323232                        a5 += diag1
1050                                                a6 = diag1
1051                                    b5 = a5
1052uint32323232                        a5 <<= 9
1053uint32323232                        b5 >>= 23
1054                                diag2 ^= a5
1055                diag1 <<<= 32
1056                                diag2 ^= b5
1057
1058uint32323232                                        a6 += diag2
1059                                                                a7 = diag2
1060                                                    b6 = a6
1061uint32323232                                        a6 <<= 13
1062uint32323232                                        b6 >>= 19
1063                                                diag3 ^= a6
1064                                diag2 <<<= 64
1065                                                diag3 ^= b6
1066
1067uint32323232                                                        a7 += diag3
1068                a0 = diag1
1069                                                                    b7 = a7
1070uint32323232                                                        a7 <<= 18
1071uint32323232                                                        b7 >>= 14
1072                                                                diag0 ^= a7
1073                                                diag3 <<<= 96
1074                                                                diag0 ^= b7
1075
1076
1077uint32323232        a0 += diag0
1078                                a1 = diag0
1079                    b0 = a0
1080uint32323232        a0 <<= 7
1081uint32323232        b0 >>= 25
1082                diag3 ^= a0
1083
1084                diag3 ^= b0
1085
1086uint32323232                        a1 += diag3
1087                                                a2 = diag3
1088                                    b1 = a1
1089uint32323232                        a1 <<= 9
1090uint32323232                        b1 >>= 23
1091                                diag2 ^= a1
1092                diag3 <<<= 32
1093                                diag2 ^= b1
1094
1095uint32323232                                        a2 += diag2
1096                                                                a3 = diag2
1097                                                    b2 = a2
1098uint32323232                                        a2 <<= 13
1099uint32323232                                        b2 >>= 19
1100                                                diag1 ^= a2
1101                                diag2 <<<= 64
1102                                                diag1 ^= b2
1103
1104uint32323232                                                        a3 += diag1
1105                a4 = diag3
1106                                                                    b3 = a3
1107uint32323232                                                        a3 <<= 18
1108uint32323232                                                        b3 >>= 14
1109                                                                diag0 ^= a3
1110                                                diag1 <<<= 96
1111                                                                diag0 ^= b3
1112
1113uint32323232        a4 += diag0
1114                                a5 = diag0
1115                    b4 = a4
1116uint32323232        a4 <<= 7
1117uint32323232        b4 >>= 25
1118                diag1 ^= a4
1119
1120                diag1 ^= b4
1121
1122uint32323232                        a5 += diag1
1123                                                a6 = diag1
1124                                    b5 = a5
1125uint32323232                        a5 <<= 9
1126uint32323232                        b5 >>= 23
1127                                diag2 ^= a5
1128                diag1 <<<= 32
1129                                diag2 ^= b5
1130
1131uint32323232                                        a6 += diag2
1132                                                                a7 = diag2
1133                                                    b6 = a6
1134uint32323232                                        a6 <<= 13
1135uint32323232                                        b6 >>= 19
1136                                                diag3 ^= a6
1137                                diag2 <<<= 64
1138                                                diag3 ^= b6
1139                 unsigned>? i -= 4
1140uint32323232                                                        a7 += diag3
1141                a0 = diag1
1142                                                                    b7 = a7
1143uint32323232                                                        a7 <<= 18
1144                b0 = 0
1145uint32323232                                                        b7 >>= 14
1146                                                                diag0 ^= a7
1147                                                diag3 <<<= 96
1148                                                                diag0 ^= b7
1149goto mainloop2 if unsigned>
1150
1151
1152uint32323232 diag0 += *(int128 *) (x + 0)
1153uint32323232 diag1 += *(int128 *) (x + 16)
1154uint32323232 diag2 += *(int128 *) (x + 32)
1155uint32323232 diag3 += *(int128 *) (x + 48)
1156
1157
1158in0 = diag0
1159in12 = diag1
1160in8 = diag2
1161in4 = diag3
1162diag0 <<<= 96
1163diag1 <<<= 96
1164diag2 <<<= 96
1165diag3 <<<= 96
1166in0 ^= *(uint32 *) (m + 0)
1167in12 ^= *(uint32 *) (m + 48)
1168in8 ^= *(uint32 *) (m + 32)
1169in4 ^= *(uint32 *) (m + 16)
1170*(uint32 *) (out + 0) = in0
1171*(uint32 *) (out + 48) = in12
1172*(uint32 *) (out + 32) = in8
1173*(uint32 *) (out + 16) = in4
1174
1175in5 = diag0
1176in1 = diag1
1177in13 = diag2
1178in9 = diag3
1179diag0 <<<= 96
1180diag1 <<<= 96
1181diag2 <<<= 96
1182diag3 <<<= 96
1183in5 ^= *(uint32 *) (m + 20)
1184in1 ^= *(uint32 *) (m + 4)
1185in13 ^= *(uint32 *) (m + 52)
1186in9 ^= *(uint32 *) (m + 36)
1187*(uint32 *) (out + 20) = in5
1188*(uint32 *) (out + 4) = in1
1189*(uint32 *) (out + 52) = in13
1190*(uint32 *) (out + 36) = in9
1191
1192in10 = diag0
1193in6 = diag1
1194in2 = diag2
1195in14 = diag3
1196diag0 <<<= 96
1197diag1 <<<= 96
1198diag2 <<<= 96
1199diag3 <<<= 96
1200in10 ^= *(uint32 *) (m + 40)
1201in6 ^= *(uint32 *) (m + 24)
1202in2 ^= *(uint32 *) (m + 8)
1203in14 ^= *(uint32 *) (m + 56)
1204*(uint32 *) (out + 40) = in10
1205*(uint32 *) (out + 24) = in6
1206*(uint32 *) (out + 8) = in2
1207*(uint32 *) (out + 56) = in14
1208
1209in15 = diag0
1210in11 = diag1
1211in7 = diag2
1212in3 = diag3
1213in15 ^= *(uint32 *) (m + 60)
1214in11 ^= *(uint32 *) (m + 44)
1215in7 ^= *(uint32 *) (m + 28)
1216in3 ^= *(uint32 *) (m + 12)
1217*(uint32 *) (out + 60) = in15
1218*(uint32 *) (out + 44) = in11
1219*(uint32 *) (out + 28) = in7
1220*(uint32 *) (out + 12) = in3
1221
1222
1223  bytes = bytes_backup
1224
1225  in8 = *(uint32 *) (x + 32)
1226  in9 = *(uint32 *) (x + 52)
1227  carry? in8 += 1
1228  in9 += 0 + carry
1229  *(uint32 *) (x + 32) = in8
1230  *(uint32 *) (x + 52) = in9
1231
1232                         unsigned>? unsigned<? bytes - 64
1233  goto bytesatleast65 if unsigned>
1234
1235    goto bytesatleast64 if !unsigned<
1236      m = out
1237      out = ctarget
1238      i = bytes
1239      while (i) { *out++ = *m++; --i }
1240    bytesatleast64:
1241    done:
1242
1243    eax = eax_stack
1244    ebx = ebx_stack
1245    esi = esi_stack
1246    edi = edi_stack
1247    ebp = ebp_stack
1248
1249    leave
1250
1251  bytesatleast65:
1252
1253  bytes -= 64
1254  out += 64
1255  m += 64
1256goto bytesbetween1and255
1257
1258
1259enter ECRYPT_init
1260leave
1261
1262
1263enter ECRYPT_keysetup
1264
1265  eax_stack = eax
1266  ebx_stack = ebx
1267  esi_stack = esi
1268  edi_stack = edi
1269  ebp_stack = ebp
1270
1271  k = arg2
1272  kbits = arg3
1273  x = arg1
1274
1275# situation at beginning of first round:
1276# diag0: x0 x5 x10 x15
1277# diag1: x12 x1 x6 x11
1278# diag2: x8 x13 x2 x7
1279# diag3: x4 x9 x14 x3
1280
1281  in1 = *(uint32 *) (k + 0)
1282  in2 = *(uint32 *) (k + 4)
1283  in3 = *(uint32 *) (k + 8)
1284  in4 = *(uint32 *) (k + 12)
1285  *(uint32 *) (x + 20) = in1
1286  *(uint32 *) (x + 40) = in2
1287  *(uint32 *) (x + 60) = in3
1288  *(uint32 *) (x + 48) = in4
1289
1290                   unsigned<? kbits - 256
1291  goto kbits128 if unsigned<
1292
1293  kbits256:
1294
1295    in11 = *(uint32 *) (k + 16)
1296    in12 = *(uint32 *) (k + 20)
1297    in13 = *(uint32 *) (k + 24)
1298    in14 = *(uint32 *) (k + 28)
1299    *(uint32 *) (x + 28) = in11
1300    *(uint32 *) (x + 16) = in12
1301    *(uint32 *) (x + 36) = in13
1302    *(uint32 *) (x + 56) = in14
1303
1304    in0 = 1634760805
1305    in5 = 857760878
1306    in10 = 2036477234
1307    in15 = 1797285236
1308    *(uint32 *) (x + 0) = in0
1309    *(uint32 *) (x + 4) = in5
1310    *(uint32 *) (x + 8) = in10
1311    *(uint32 *) (x + 12) = in15
1312
1313  goto keysetupdone
1314
1315  kbits128:
1316
1317    in11 = *(uint32 *) (k + 0)
1318    in12 = *(uint32 *) (k + 4)
1319    in13 = *(uint32 *) (k + 8)
1320    in14 = *(uint32 *) (k + 12)
1321    *(uint32 *) (x + 28) = in11
1322    *(uint32 *) (x + 16) = in12
1323    *(uint32 *) (x + 36) = in13
1324    *(uint32 *) (x + 56) = in14
1325
1326    in0 = 1634760805
1327    in5 = 824206446
1328    in10 = 2036477238
1329    in15 = 1797285236
1330    *(uint32 *) (x + 0) = in0
1331    *(uint32 *) (x + 4) = in5
1332    *(uint32 *) (x + 8) = in10
1333    *(uint32 *) (x + 12) = in15
1334
1335  keysetupdone:
1336
1337  eax = eax_stack
1338  ebx = ebx_stack
1339  esi = esi_stack
1340  edi = edi_stack
1341  ebp = ebp_stack
1342
1343leave
1344
1345
1346enter ECRYPT_ivsetup
1347
1348  eax_stack = eax
1349  ebx_stack = ebx
1350  esi_stack = esi
1351  edi_stack = edi
1352  ebp_stack = ebp
1353
1354  iv = arg2
1355  x = arg1
1356
1357# situation at beginning of first round:
1358# diag0: x0 x5 x10 x15
1359# diag1: x12 x1 x6 x11
1360# diag2: x8 x13 x2 x7
1361# diag3: x4 x9 x14 x3
1362
1363  in6 = *(uint32 *) (iv + 0)
1364  in7 = *(uint32 *) (iv + 4)
1365  in8 = 0
1366  in9 = 0
1367  *(uint32 *) (x + 24) = in6
1368  *(uint32 *) (x + 44) = in7
1369  *(uint32 *) (x + 32) = in8
1370  *(uint32 *) (x + 52) = in9
1371
1372  eax = eax_stack
1373  ebx = ebx_stack
1374  esi = esi_stack
1375  edi = edi_stack
1376  ebp = ebp_stack
1377
1378leave
1379