1;/*---------------------------------------------------------------------*
2; * The following (piece of) code, (part of) the 2xSaI engine,          *
3; * copyright (c) 2001 by Derek Liauw Kie Fa.                           *
4; * Non-Commercial use of the engine is allowed and is encouraged,      *
5; * provided that appropriate credit be given and that this copyright   *
6; * notice will not be removed under any circumstance.                  *
7; * You may freely modify this code, but I request                      *
8; * that any improvements to the engine be submitted to me, so          *
9; * that I can implement these improvements in newer versions of        *
10; * the engine.                                                         *
11; * If you need more information, have any comments or suggestions,     *
12; * you can e-mail me. My e-mail: DerekL666@yahoo.com                   *
13; *---------------------------------------------------------------------*/
14; modified by Spacy to compile with yasm [2006-06-20]
15
16%include "macros.mac"
17
18;----------------------
19; 2xSaI, Super2xSaI, SuperEagle .. FINAL. no versioning anymore..
20;----------------------
21
22BITS 32
23
24SECTION .text
25
26srcPtr        equ 8
27deltaPtr      equ 12
28srcPitch      equ 16
29width         equ 20
30dstOffset     equ 24
31dstPitch      equ 28
32dstSegment    equ 32
33
34colorB0   equ -2
35colorB1   equ 0
36colorB2   equ 2
37colorB3   equ 4
38
39color7   equ -2
40color8   equ 0
41color9   equ 2
42
43color4   equ -2
44color5   equ 0
45color6   equ 2
46colorS2   equ 4
47
48color1   equ -2
49color2   equ 0
50color3   equ 2
51colorS1   equ 4
52
53colorA0   equ -2
54colorA1   equ 0
55colorA2   equ 2
56colorA3   equ 4
57
58
59NEWSYM _2xSaISuper2xSaILine
60; Store some stuff
61	 push ebp
62	 mov ebp, esp
63         pushad
64
65; Prepare the destination
66%ifdef __DJGPP__
67         ; Set the selector
68         mov eax, [ebp+dstSegment]
69         mov fs, ax
70%endif
71         mov edx, [ebp+dstOffset]         ; edx points to the screen
72; Prepare the source
73         ; eax points to colorA
74         mov eax, [ebp+srcPtr]				;eax points to colorA
75         mov ebx, [ebp+srcPitch]			;ebx contains the source pitch
76         mov ecx, [ebp+width]				;ecx contains the number of pixels to process
77         ; eax now points to colorB1
78         sub eax, ebx						;eax points to B1 which is the base
79
80; Main Loop
81.Loop:   push ecx
82
83         ;-----Check Delta------------------
84         mov ecx, [ebp+deltaPtr]
85
86
87		;load source img
88         movq mm0, [eax+colorB0]
89         movq mm1, [eax+colorB3]
90         movq mm2, [eax+ebx+color4]
91         movq mm3, [eax+ebx+colorS2]
92         movq mm4, [eax+ebx+ebx+color1]
93         movq mm5, [eax+ebx+ebx+colorS1]
94         push eax
95         add eax, ebx
96         movq mm6, [eax+ebx+ebx+colorA0]
97         movq mm7, [eax+ebx+ebx+colorA3]
98         pop eax
99
100		;compare to delta
101         pcmpeqw mm0, [ecx+2+colorB0]
102         pcmpeqw mm1, [ecx+2+colorB3]
103         pcmpeqw mm2, [ecx+ebx+2+color4]
104         pcmpeqw mm3, [ecx+ebx+2+colorS2]
105         pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
106         pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
107         add ecx, ebx
108         pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
109         pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
110         sub ecx, ebx
111
112
113		;compose results
114         pand mm0, mm1
115         pand mm2, mm3
116         pand mm4, mm5
117         pand mm6, mm7
118         pand mm0, mm2
119         pand mm4, mm6
120         pxor mm7, mm7
121         pand mm0, mm4
122         movq mm6, [eax+colorB0]
123         pcmpeqw mm7, mm0			;did any compare give us a zero ?
124
125         movq [ecx+2+colorB0], mm6
126
127         packsswb mm7, mm7
128         movd ecx, mm7
129         test ecx, ecx
130         jz near .SKIP_PROCESS		;no, so we can skip
131
132         ;End Delta
133
134         ;---------------------------------
135         movq mm0, [eax+ebx+color5]
136         movq mm1, [eax+ebx+color6]
137         movq mm2, mm0
138         movq mm3, mm1
139         movq mm4, mm0
140         movq mm5, mm1
141
142         pand mm0, [colorMask]
143         pand mm1, [colorMask]
144
145         psrlw mm0, 1
146         psrlw mm1, 1
147
148         pand mm3, [lowPixelMask]
149         paddw mm0, mm1
150
151         pand mm3, mm2
152         paddw mm0, mm3                ;mm0 contains the interpolated values
153         movq [I56Pixel], mm0
154         movq mm7, mm0
155
156         ;-------------------
157         movq mm0, mm7
158         movq mm1, mm4  ;5,5,5,6
159         movq mm2, mm0
160         movq mm3, mm1
161
162         pand mm0, [colorMask]
163         pand mm1, [colorMask]
164
165         psrlw mm0, 1
166         psrlw mm1, 1
167
168         pand mm3, [lowPixelMask]
169         paddw mm0, mm1
170
171         pand mm3, mm2
172         paddw mm0, mm3                ;mm0 contains the interpolated values
173         movq [I5556Pixel], mm0
174         ;--------------------
175
176         movq mm0, mm7
177         movq mm1, mm5  ;6,6,6,5
178         movq mm2, mm0
179         movq mm3, mm1
180
181         pand mm0, [colorMask]
182         pand mm1, [colorMask]
183
184         psrlw mm0, 1
185         psrlw mm1, 1
186
187         pand mm3, [lowPixelMask]
188         paddw mm0, mm1
189
190         pand mm3, mm2
191         paddw mm0, mm3
192         movq [I5666Pixel], mm0
193
194         ;-------------------------
195         ;-------------------------
196         movq mm0, [eax+ebx+ebx+color2]
197         movq mm1, [eax+ebx+ebx+color3]
198         movq mm2, mm0
199         movq mm3, mm1
200         movq mm4, mm0
201         movq mm5, mm1
202
203         pand mm0, [colorMask]
204         pand mm1, [colorMask]
205
206         psrlw mm0, 1
207         psrlw mm1, 1
208
209         pand mm3, [lowPixelMask]
210         paddw mm0, mm1
211
212         pand mm3, mm2
213         paddw mm0, mm3
214         movq [I23Pixel], mm0
215         movq mm7, mm0
216
217         ;---------------------
218         movq mm0, mm7
219         movq mm1, mm4  ;2,2,2,3
220         movq mm2, mm0
221         movq mm3, mm1
222
223         pand mm0, [colorMask]
224         pand mm1, [colorMask]
225
226         psrlw mm0, 1
227         psrlw mm1, 1
228
229         pand mm3, [lowPixelMask]
230         paddw mm0, mm1
231
232         pand mm3, mm2
233         paddw mm0, mm3
234         movq [I2223Pixel], mm0
235
236         ;----------------------
237         movq mm0, mm7
238         movq mm1, mm5  ;3,3,3,2
239         movq mm2, mm0
240         movq mm3, mm1
241
242         pand mm0, [colorMask]
243         pand mm1, [colorMask]
244
245         psrlw mm0, 1
246         psrlw mm1, 1
247
248         pand mm3, [lowPixelMask]
249         paddw mm0, mm1
250
251         pand mm3, mm2
252         paddw mm0, mm3
253         movq [I2333Pixel], mm0
254
255
256         ;--------------------
257;////////////////////////////////
258; Decide which "branch" to take
259;--------------------------------
260         movq mm0, [eax+ebx+color5]
261         movq mm1, [eax+ebx+color6]
262         movq mm6, mm0
263         movq mm7, mm1
264         pcmpeqw mm0, [eax+ebx+ebx+color3]
265         pcmpeqw mm1, [eax+ebx+ebx+color2]
266         pcmpeqw mm6, mm7
267
268         movq mm2, mm0
269         movq mm3, mm0
270
271         pand mm0, mm1       ;colorA == colorD && colorB == colorC
272         pxor mm7, mm7
273
274         pcmpeqw mm2, mm7
275         pand mm6, mm0
276         pand mm2, mm1       ;colorA != colorD && colorB == colorC
277
278         pcmpeqw mm1, mm7
279
280         pand mm1, mm3       ;colorA == colorD && colorB != colorC
281         pxor mm0, mm6
282         por mm1, mm6
283         movq mm7, mm0
284         movq [Mask26], mm2
285         packsswb mm7, mm7
286         movq [Mask35], mm1
287
288         movd ecx, mm7
289         test ecx, ecx
290         jz near .SKIP_GUESS
291
292;---------------------------------------------
293         movq mm6, mm0
294         movq mm4, [eax+ebx+colorA]
295         movq mm5, [eax+ebx+colorB]
296         pxor mm7, mm7
297         pand mm6, [ONE]
298
299         movq mm0, [eax+colorE]
300         movq mm1, [eax+ebx+colorG]
301         movq mm2, mm0
302         movq mm3, mm1
303         pcmpeqw mm0, mm4
304         pcmpeqw mm1, mm4
305         pcmpeqw mm2, mm5
306         pcmpeqw mm3, mm5
307         pand mm0, mm6
308         pand mm1, mm6
309         pand mm2, mm6
310         pand mm3, mm6
311         paddw mm0, mm1
312         paddw mm2, mm3
313
314         pxor mm3, mm3
315         pcmpgtw mm0, mm6
316         pcmpgtw mm2, mm6
317         pcmpeqw mm0, mm3
318         pcmpeqw mm2, mm3
319         pand mm0, mm6
320         pand mm2, mm6
321         paddw mm7, mm0
322         psubw mm7, mm2
323
324         movq mm0, [eax+colorF]
325         movq mm1, [eax+ebx+colorK]
326         movq mm2, mm0
327         movq mm3, mm1
328         pcmpeqw mm0, mm4
329         pcmpeqw mm1, mm4
330         pcmpeqw mm2, mm5
331         pcmpeqw mm3, mm5
332         pand mm0, mm6
333         pand mm1, mm6
334         pand mm2, mm6
335         pand mm3, mm6
336         paddw mm0, mm1
337         paddw mm2, mm3
338
339         pxor mm3, mm3
340         pcmpgtw mm0, mm6
341         pcmpgtw mm2, mm6
342         pcmpeqw mm0, mm3
343         pcmpeqw mm2, mm3
344         pand mm0, mm6
345         pand mm2, mm6
346         paddw mm7, mm0
347         psubw mm7, mm2
348
349         push eax
350         add eax, ebx
351         movq mm0, [eax+ebx+colorH]
352         movq mm1, [eax+ebx+ebx+colorN]
353         movq mm2, mm0
354         movq mm3, mm1
355         pcmpeqw mm0, mm4
356         pcmpeqw mm1, mm4
357         pcmpeqw mm2, mm5
358         pcmpeqw mm3, mm5
359         pand mm0, mm6
360         pand mm1, mm6
361         pand mm2, mm6
362         pand mm3, mm6
363         paddw mm0, mm1
364         paddw mm2, mm3
365
366         pxor mm3, mm3
367         pcmpgtw mm0, mm6
368         pcmpgtw mm2, mm6
369         pcmpeqw mm0, mm3
370         pcmpeqw mm2, mm3
371         pand mm0, mm6
372         pand mm2, mm6
373         paddw mm7, mm0
374         psubw mm7, mm2
375
376         movq mm0, [eax+ebx+colorL]
377         movq mm1, [eax+ebx+ebx+colorO]
378         movq mm2, mm0
379         movq mm3, mm1
380         pcmpeqw mm0, mm4
381         pcmpeqw mm1, mm4
382         pcmpeqw mm2, mm5
383         pcmpeqw mm3, mm5
384         pand mm0, mm6
385         pand mm1, mm6
386         pand mm2, mm6
387         pand mm3, mm6
388         paddw mm0, mm1
389         paddw mm2, mm3
390
391         pxor mm3, mm3
392         pcmpgtw mm0, mm6
393         pcmpgtw mm2, mm6
394         pcmpeqw mm0, mm3
395         pcmpeqw mm2, mm3
396         pand mm0, mm6
397         pand mm2, mm6
398         paddw mm7, mm0
399         psubw mm7, mm2
400
401         pop eax
402         movq mm1, mm7
403         pxor mm0, mm0
404         pcmpgtw mm7, mm0
405         pcmpgtw mm0, mm1
406
407         por mm7, [Mask35]
408         por mm0, [Mask26]
409         movq [Mask35], mm7
410         movq [Mask26], mm0
411
412.SKIP_GUESS:
413
414         ;Start the ASSEMBLY !!!	eh... compose all the results together to form the final image...
415
416
417         movq mm0, [eax+ebx+color5]
418         movq mm1, [eax+ebx+ebx+color2]
419         movq mm2, mm0
420         movq mm3, mm1
421         movq mm4, mm0
422         movq mm5, mm1
423
424         pand mm0, [colorMask]
425         pand mm1, [colorMask]
426
427         psrlw mm0, 1
428         psrlw mm1, 1
429
430         pand mm3, [lowPixelMask]
431         paddw mm0, mm1
432
433         pand mm3, mm2
434         paddw mm0, mm3                ;mm0 contains the interpolated values
435		 ;---------------------------
436
437
438
439%ifdef dfhsdfhsdahdsfhdsfh
440
441                if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2)
442                   product2a = INTERPOLATE (color2, color5);
443                else
444                if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0)
445                   product2a = INTERPOLATE(color2, color5);
446                else
447                   product2a = color2;
448
449                if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2)
450                   product1a = INTERPOLATE (color2, color5);
451                else
452                if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0)
453                   product1a = INTERPOLATE(color2, color5);
454                else
455                   product1a = color5;
456
457%endif
458
459
460		 movq mm7, [Mask26]
461		 movq mm6, [eax+colorB2]
462		 movq mm5, [eax+ebx+ebx+color2]
463		 movq mm4, [eax+ebx+ebx+color1]
464		 pcmpeqw mm4, mm5
465		 pcmpeqw mm6, mm5
466		 pxor mm5, mm5
467		 pand mm7, mm4
468		 pcmpeqw mm6, mm5
469		 pand mm7, mm6
470
471
472
473		 movq mm6, [eax+ebx+ebx+color3]
474		 movq mm5, [eax+ebx+ebx+color2]
475		 movq mm4, [eax+ebx+ebx+color1]
476		 movq mm2, [eax+ebx+color5]
477		 movq mm1, [eax+ebx+color4]
478		 movq mm3, [eax+colorB0]
479
480		 pcmpeqw mm2, mm4
481		 pcmpeqw mm6, mm5
482		 pcmpeqw mm1, mm5
483		 pcmpeqw mm3, mm5
484		 pxor mm5, mm5
485		 pcmpeqw mm2, mm5
486		 pcmpeqw mm3, mm5
487		 pand mm6, mm1
488		 pand mm2, mm3
489		 pand mm6, mm2
490		 por mm7, mm6
491
492
493		 movq mm6, mm7
494		 pcmpeqw mm6, mm5
495		 pand mm7, mm0
496
497		 movq mm1, [eax+ebx+color5]
498		 pand mm6, mm1
499		 por mm7, mm6
500		 movq [final1a], mm7			;finished  1a
501
502
503
504	     ;--------------------------------
505
506		 movq mm7, [Mask35]
507		 push eax
508		 add eax, ebx
509		 movq mm6, [eax+ebx+ebx+colorA2]
510		 pop eax
511		 movq mm5, [eax+ebx+color5]
512		 movq mm4, [eax+ebx+color4]
513		 pcmpeqw mm4, mm5
514		 pcmpeqw mm6, mm5
515		 pxor mm5, mm5
516		 pand mm7, mm4
517		 pcmpeqw mm6, mm5
518		 pand mm7, mm6
519
520
521
522		 movq mm6, [eax+ebx+color6]
523		 movq mm5, [eax+ebx+color5]
524		 movq mm4, [eax+ebx+color4]
525		 movq mm2, [eax+ebx+ebx+color2]
526		 movq mm1, [eax+ebx+ebx+color1]
527		 push eax
528		 add eax, ebx
529		 movq mm3, [eax+ebx+ebx+colorA0]
530		 pop eax
531
532		 pcmpeqw mm2, mm4
533		 pcmpeqw mm6, mm5
534		 pcmpeqw mm1, mm5
535		 pcmpeqw mm3, mm5
536		 pxor mm5, mm5
537		 pcmpeqw mm2, mm5
538		 pcmpeqw mm3, mm5
539		 pand mm6, mm1
540		 pand mm2, mm3
541		 pand mm6, mm2
542		 por mm7, mm6
543
544
545		 movq mm6, mm7
546		 pcmpeqw mm6, mm5
547		 pand mm7, mm0
548
549		 movq mm1, [eax+ebx+ebx+color2]
550		 pand mm6, mm1
551		 por mm7, mm6
552		 movq [final2a], mm7			;finished  2a
553
554
555		 ;--------------------------------------------
556
557
558%ifdef dfhsdfhsdahdsfhdsfh
559                   if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0)
560                      product2b = Q_INTERPOLATE (color3, color3, color3, color2);
561                   else
562                   if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3)
563                      product2b = Q_INTERPOLATE (color2, color2, color2, color3);
564                   else
565                      product2b = INTERPOLATE (color2, color3);
566
567                   if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0)
568                      product1b = Q_INTERPOLATE (color6, color6, color6, color5);
569                   else
570                   if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3)
571                      product1b = Q_INTERPOLATE (color6, color5, color5, color5);
572                   else
573                      product1b = INTERPOLATE (color5, color6);
574%endif
575
576		 push eax
577		 add eax, ebx
578		 pxor mm7, mm7
579		 movq mm0, [eax+ebx+ebx+colorA0]
580		 movq mm1, [eax+ebx+ebx+colorA1]
581		 movq mm2, [eax+ebx+ebx+colorA2]
582		 movq mm3, [eax+ebx+ebx+colorA3]
583		 pop eax
584		 movq mm4, [eax+ebx+ebx+color2]
585		 movq mm5, [eax+ebx+ebx+color3]
586		 movq mm6, [eax+ebx+color6]
587
588		 pcmpeqw mm6, mm5
589		 pcmpeqw mm1, mm5
590		 pcmpeqw mm4, mm2
591		 pcmpeqw mm0, mm5
592		 pcmpeqw mm4, mm7
593		 pcmpeqw mm0, mm7
594		 pand mm0, mm4
595		 pand mm6, mm1
596		 pand mm0, mm6
597
598		 movq mm4, [eax+ebx+color2]
599		 movq mm5, [eax+ebx+ebx+color5]
600		 movq mm6, [eax+ebx+ebx+color3]
601
602		 pcmpeqw mm5, mm4
603		 pcmpeqw mm2, mm4
604		 pcmpeqw mm1, mm6
605		 pcmpeqw mm3, mm4
606		 pcmpeqw mm1, mm7
607		 pcmpeqw mm3, mm7
608		 pand mm2, mm5
609		 pand mm1, mm3
610		 pand mm1, mm2
611
612		 movq mm2, mm0
613		 movq mm7, [I2333Pixel]
614		 movq mm6, [I2223Pixel]
615		 movq mm5, [I23Pixel]
616		 movq mm4, [Mask35]
617		 movq mm3, [Mask26]
618
619		 por mm2, mm4
620		 pand mm4, [eax+ebx+ebx+color3]
621		 por mm2, mm3
622		 pand mm3, [eax+ebx+ebx+color2]
623		 por mm2, mm1
624		 pand mm0, mm7
625		 pand mm1, mm6
626		 pxor mm7, mm7
627		 pcmpeqw mm2, mm7
628		 por mm0, mm1
629		 por mm3, mm4
630		 pand mm2, mm5
631		 por mm0, mm3
632		 por mm0, mm2
633		 movq [final2b], mm0
634
635		 ;-----------------------------------
636
637
638		 pxor mm7, mm7
639		 movq mm0, [eax+colorB0]
640		 movq mm1, [eax+colorB1]
641		 movq mm2, [eax+colorB2]
642		 movq mm3, [eax+colorB3]
643		 movq mm4, [eax+ebx+color5]
644		 movq mm5, [eax+ebx+color6]
645		 movq mm6, [eax+ebx+ebx+color3]
646
647		 pcmpeqw mm6, mm5
648		 pcmpeqw mm1, mm5
649		 pcmpeqw mm4, mm2
650		 pcmpeqw mm0, mm5
651		 pcmpeqw mm4, mm7
652		 pcmpeqw mm0, mm7
653		 pand mm0, mm4
654		 pand mm6, mm1
655		 pand mm0, mm6
656
657		 movq mm4, [eax+ebx+color5]
658		 movq mm5, [eax+ebx+ebx+color2]
659		 movq mm6, [eax+ebx+color6]
660
661		 pcmpeqw mm5, mm4
662		 pcmpeqw mm2, mm4
663		 pcmpeqw mm1, mm6
664		 pcmpeqw mm3, mm4
665		 pcmpeqw mm1, mm7
666		 pcmpeqw mm3, mm7
667		 pand mm2, mm5
668		 pand mm1, mm3
669		 pand mm1, mm2
670
671		 movq mm2, mm0
672		 movq mm7, [I5666Pixel]
673		 movq mm6, [I5556Pixel]
674		 movq mm5, [I56Pixel]
675		 movq mm4, [Mask35]
676		 movq mm3, [Mask26]
677
678		 por mm2, mm4
679		 pand mm4, [eax+ebx+color5]
680		 por mm2, mm3
681		 pand mm3, [eax+ebx+color6]
682		 por mm2, mm1
683		 pand mm0, mm7
684		 pand mm1, mm6
685		 pxor mm7, mm7
686		 pcmpeqw mm2, mm7
687		 por mm0, mm1
688		 por mm3, mm4
689		 pand mm2, mm5
690		 por mm0, mm3
691		 por mm0, mm2
692		 movq [final1b], mm0
693
694	  ;---------
695
696		 movq mm0, [final1a]
697		 movq mm4, [final2a]
698		 movq mm2, [final1b]
699		 movq mm6, [final2b]
700
701		 movq mm1, mm0
702		 movq mm5, mm4
703
704
705         punpcklwd mm0, mm2
706         punpckhwd mm1, mm2
707
708         punpcklwd mm4, mm6
709         punpckhwd mm5, mm6
710
711
712%ifdef FAR_POINTER
713         movq [fs:edx], mm0
714         movq [fs:edx+8], mm1
715         push edx
716         add edx, [ebp+dstPitch]
717         movq [fs:edx], mm4
718         movq [fs:edx+8], mm5
719         pop edx
720%else
721         movq [es:edx], mm0
722         movq [es:edx+8], mm1
723         push edx
724         add edx, [ebp+dstPitch]
725         movq [es:edx], mm4
726         movq [es:edx+8], mm5
727         pop edx
728%endif
729.SKIP_PROCESS:
730         mov ecx, [ebp+deltaPtr]
731         add ecx, 8
732         mov [ebp+deltaPtr], ecx
733         add edx, 16
734         add eax, 8
735
736         pop ecx
737         sub ecx, 4
738         cmp ecx, 0
739         jg  near .Loop
740
741; Restore some stuff
742         popad
743         mov esp, ebp
744         pop ebp
745         emms
746         ret
747
748
749;-------------------------------------------------------------------------
750;-------------------------------------------------------------------------
751;-------------------------------------------------------------------------
752;-------------------------------------------------------------------------
753;-------------------------------------------------------------------------
754;-------------------------------------------------------------------------
755;-------------------------------------------------------------------------
756
757
758
759
760
761NEWSYM _2xSaISuperEagleLine
762; Store some stuff
763	 push ebp
764	 mov ebp, esp
765         pushad
766
767; Prepare the destination
768%ifdef __DJGPP__
769         ; Set the selector
770         mov eax, [ebp+dstSegment]
771         mov fs, ax
772%endif
773         mov edx, [ebp+dstOffset]         ; edx points to the screen
774; Prepare the source
775         ; eax points to colorA
776         mov eax, [ebp+srcPtr]
777         mov ebx, [ebp+srcPitch]
778         mov ecx, [ebp+width]
779         ; eax now points to colorB1
780         sub eax, ebx
781
782; Main Loop
783.Loop:   push ecx
784
785         ;-----Check Delta------------------
786         mov ecx, [ebp+deltaPtr]
787
788         movq mm0, [eax+colorB0]
789         movq mm1, [eax+colorB3]
790         movq mm2, [eax+ebx+color4]
791         movq mm3, [eax+ebx+colorS2]
792         movq mm4, [eax+ebx+ebx+color1]
793         movq mm5, [eax+ebx+ebx+colorS1]
794         push eax
795         add eax, ebx
796         movq mm6, [eax+ebx+ebx+colorA0]
797         movq mm7, [eax+ebx+ebx+colorA3]
798         pop eax
799
800         pcmpeqw mm0, [ecx+2+colorB0]
801         pcmpeqw mm1, [ecx+2+colorB3]
802         pcmpeqw mm2, [ecx+ebx+2+color4]
803         pcmpeqw mm3, [ecx+ebx+2+colorS2]
804         pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
805         pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
806         add ecx, ebx
807         pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
808         pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
809         sub ecx, ebx
810
811
812         pand mm0, mm1
813         pand mm2, mm3
814         pand mm4, mm5
815         pand mm6, mm7
816         pand mm0, mm2
817         pand mm4, mm6
818         pxor mm7, mm7
819         pand mm0, mm4
820         movq mm6, [eax+colorB0]
821         pcmpeqw mm7, mm0
822
823         movq [ecx+2+colorB0], mm6
824
825         packsswb mm7, mm7
826         movd ecx, mm7
827         test ecx, ecx
828         jz near .SKIP_PROCESS
829
830         ;End Delta
831
832         ;---------------------------------
833         movq mm0, [eax+ebx+color5]
834         movq mm1, [eax+ebx+color6]
835         movq mm2, mm0
836         movq mm3, mm1
837         movq mm4, mm0
838         movq mm5, mm1
839
840         pand mm0, [colorMask]
841         pand mm1, [colorMask]
842
843         psrlw mm0, 1
844         psrlw mm1, 1
845
846         pand mm3, [lowPixelMask]
847         paddw mm0, mm1
848
849         pand mm3, mm2
850         paddw mm0, mm3                ;mm0 contains the interpolated values
851         movq [I56Pixel], mm0
852         movq mm7, mm0
853
854         ;-------------------
855         movq mm0, mm7
856         movq mm1, mm4  ;5,5,5,6
857         movq mm2, mm0
858         movq mm3, mm1
859
860         pand mm0, [colorMask]
861         pand mm1, [colorMask]
862
863         psrlw mm0, 1
864         psrlw mm1, 1
865
866         pand mm3, [lowPixelMask]
867         paddw mm0, mm1
868
869         pand mm3, mm2
870         paddw mm0, mm3                ;mm0 contains the interpolated values
871         movq [product1a], mm0
872         ;--------------------
873
874         movq mm0, mm7
875         movq mm1, mm5  ;6,6,6,5
876         movq mm2, mm0
877         movq mm3, mm1
878
879         pand mm0, [colorMask]
880         pand mm1, [colorMask]
881
882         psrlw mm0, 1
883         psrlw mm1, 1
884
885         pand mm3, [lowPixelMask]
886         paddw mm0, mm1
887
888         pand mm3, mm2
889         paddw mm0, mm3
890         movq [product1b], mm0
891
892         ;-------------------------
893         ;-------------------------
894         movq mm0, [eax+ebx+ebx+color2]
895         movq mm1, [eax+ebx+ebx+color3]
896         movq mm2, mm0
897         movq mm3, mm1
898         movq mm4, mm0
899         movq mm5, mm1
900
901         pand mm0, [colorMask]
902         pand mm1, [colorMask]
903
904         psrlw mm0, 1
905         psrlw mm1, 1
906
907         pand mm3, [lowPixelMask]
908         paddw mm0, mm1
909
910         pand mm3, mm2
911         paddw mm0, mm3
912         movq [I23Pixel], mm0
913         movq mm7, mm0
914
915         ;---------------------
916         movq mm0, mm7
917         movq mm1, mm4  ;2,2,2,3
918         movq mm2, mm0
919         movq mm3, mm1
920
921         pand mm0, [colorMask]
922         pand mm1, [colorMask]
923
924         psrlw mm0, 1
925         psrlw mm1, 1
926
927         pand mm3, [lowPixelMask]
928         paddw mm0, mm1
929
930         pand mm3, mm2
931         paddw mm0, mm3
932         movq [product2a], mm0
933
934         ;----------------------
935         movq mm0, mm7
936         movq mm1, mm5  ;3,3,3,2
937         movq mm2, mm0
938         movq mm3, mm1
939
940         pand mm0, [colorMask]
941         pand mm1, [colorMask]
942
943         psrlw mm0, 1
944         psrlw mm1, 1
945
946         pand mm3, [lowPixelMask]
947         paddw mm0, mm1
948
949         pand mm3, mm2
950         paddw mm0, mm3
951         movq [product2b], mm0
952
953
954         ;////////////////////////////////
955         ; Decide which "branch" to take
956         ;--------------------------------
957         movq mm4, [eax+ebx+color5]
958         movq mm5, [eax+ebx+color6]
959         movq mm6, [eax+ebx+ebx+color3]
960         movq mm7, [eax+ebx+ebx+color2]
961
962         pxor mm3, mm3
963         movq mm0, mm4
964         movq mm1, mm5
965
966         pcmpeqw mm0, mm6
967         pcmpeqw mm1, mm7
968         pcmpeqw mm1, mm3
969         pand mm0, mm1
970         movq [Mask35], mm0
971
972         movq mm0, [eax+ebx+ebx+colorS1]
973         movq mm1, [eax+ebx+color4]
974         push eax
975         add eax, ebx
976         movq mm2, [eax+ebx+ebx+colorA2]
977         pop eax
978         movq mm3, [eax+colorB1]
979         pcmpeqw mm0, mm4
980         pcmpeqw mm1, mm4
981         pcmpeqw mm2, mm4
982         pcmpeqw mm3, mm4
983         pand mm0, mm1
984         pand mm2, mm3
985         por mm0, mm2
986         pand mm0, [Mask35]
987         movq [Mask35b], mm0
988
989         ;-----------
990         pxor mm3, mm3
991         movq mm0, mm4
992         movq mm1, mm5
993
994         pcmpeqw mm0, mm6
995         pcmpeqw mm1, mm7
996         pcmpeqw mm0, mm3
997         pand mm0, mm1
998         movq [Mask26], mm0
999
1000         movq mm0, [eax+ebx+ebx+color1]
1001         movq mm1, [eax+ebx+colorS2]
1002         push eax
1003         add eax, ebx
1004         movq mm2, [eax+ebx+ebx+colorA1]
1005         pop eax
1006         movq mm3, [eax+colorB2]
1007         pcmpeqw mm0, mm5
1008         pcmpeqw mm1, mm5
1009         pcmpeqw mm2, mm5
1010         pcmpeqw mm3, mm5
1011         pand mm0, mm1
1012         pand mm2, mm3
1013         por mm0, mm2
1014         pand mm0, [Mask26]
1015         movq [Mask26b], mm0
1016
1017         ;--------------------
1018         movq mm0, mm4
1019         movq mm1, mm5
1020         movq mm2, mm0
1021
1022         pcmpeqw mm2, mm1
1023         pcmpeqw mm0, mm6
1024         pcmpeqw mm1, mm7
1025         pand mm0, mm1
1026         pand mm2, mm0
1027         pxor mm0, mm2
1028         movq mm7, mm0
1029
1030         ;------------------
1031         packsswb mm7, mm7
1032         movd ecx, mm7
1033         test ecx, ecx
1034         jz near .SKIP_GUESS
1035
1036;---------------------------------------------
1037; Map of the pixels:                    I|E F|J
1038;                                       G|A B|K
1039;                                       H|C D|L
1040;                                       M|N O|P
1041         movq mm6, mm0
1042         movq mm4, [eax+ebx+color5]
1043         movq mm5, [eax+ebx+color6]
1044         pxor mm7, mm7
1045         pand mm6, [ONE]
1046
1047         movq mm0, [eax+colorB1]
1048         movq mm1, [eax+ebx+color4]
1049         movq mm2, mm0
1050         movq mm3, mm1
1051         pcmpeqw mm0, mm4
1052         pcmpeqw mm1, mm4
1053         pcmpeqw mm2, mm5
1054         pcmpeqw mm3, mm5
1055         pand mm0, mm6
1056         pand mm1, mm6
1057         pand mm2, mm6
1058         pand mm3, mm6
1059         paddw mm0, mm1
1060         paddw mm2, mm3
1061
1062         pxor mm3, mm3
1063         pcmpgtw mm0, mm6
1064         pcmpgtw mm2, mm6
1065         pcmpeqw mm0, mm3
1066         pcmpeqw mm2, mm3
1067         pand mm0, mm6
1068         pand mm2, mm6
1069         paddw mm7, mm0
1070         psubw mm7, mm2
1071
1072         movq mm0, [eax+colorB2]
1073         movq mm1, [eax+ebx+colorS2]
1074         movq mm2, mm0
1075         movq mm3, mm1
1076         pcmpeqw mm0, mm4
1077         pcmpeqw mm1, mm4
1078         pcmpeqw mm2, mm5
1079         pcmpeqw mm3, mm5
1080         pand mm0, mm6
1081         pand mm1, mm6
1082         pand mm2, mm6
1083         pand mm3, mm6
1084         paddw mm0, mm1
1085         paddw mm2, mm3
1086
1087         pxor mm3, mm3
1088         pcmpgtw mm0, mm6
1089         pcmpgtw mm2, mm6
1090         pcmpeqw mm0, mm3
1091         pcmpeqw mm2, mm3
1092         pand mm0, mm6
1093         pand mm2, mm6
1094         paddw mm7, mm0
1095         psubw mm7, mm2
1096
1097         push eax
1098         add eax, ebx
1099         movq mm0, [eax+ebx+color1]
1100         movq mm1, [eax+ebx+ebx+colorA1]
1101         movq mm2, mm0
1102         movq mm3, mm1
1103         pcmpeqw mm0, mm4
1104         pcmpeqw mm1, mm4
1105         pcmpeqw mm2, mm5
1106         pcmpeqw mm3, mm5
1107         pand mm0, mm6
1108         pand mm1, mm6
1109         pand mm2, mm6
1110         pand mm3, mm6
1111         paddw mm0, mm1
1112         paddw mm2, mm3
1113
1114         pxor mm3, mm3
1115         pcmpgtw mm0, mm6
1116         pcmpgtw mm2, mm6
1117         pcmpeqw mm0, mm3
1118         pcmpeqw mm2, mm3
1119         pand mm0, mm6
1120         pand mm2, mm6
1121         paddw mm7, mm0
1122         psubw mm7, mm2
1123
1124         movq mm0, [eax+ebx+colorS1]
1125         movq mm1, [eax+ebx+ebx+colorA2]
1126         movq mm2, mm0
1127         movq mm3, mm1
1128         pcmpeqw mm0, mm4
1129         pcmpeqw mm1, mm4
1130         pcmpeqw mm2, mm5
1131         pcmpeqw mm3, mm5
1132         pand mm0, mm6
1133         pand mm1, mm6
1134         pand mm2, mm6
1135         pand mm3, mm6
1136         paddw mm0, mm1
1137         paddw mm2, mm3
1138
1139         pxor mm3, mm3
1140         pcmpgtw mm0, mm6
1141         pcmpgtw mm2, mm6
1142         pcmpeqw mm0, mm3
1143         pcmpeqw mm2, mm3
1144         pand mm0, mm6
1145         pand mm2, mm6
1146         paddw mm7, mm0
1147         psubw mm7, mm2
1148
1149         pop eax
1150         movq mm1, mm7
1151         pxor mm0, mm0
1152         pcmpgtw mm7, mm0
1153         pcmpgtw mm0, mm1
1154
1155         por mm7, [Mask35]
1156         por mm1, [Mask26]
1157         movq [Mask35], mm7
1158         movq [Mask26], mm1
1159
1160.SKIP_GUESS:
1161         ;Start the ASSEMBLY !!!
1162
1163         movq mm4, [Mask35]
1164         movq mm5, [Mask26]
1165         movq mm6, [Mask35b]
1166         movq mm7, [Mask26b]
1167
1168         movq mm0, [eax+ebx+color5]
1169         movq mm1, [eax+ebx+color6]
1170         movq mm2, [eax+ebx+ebx+color2]
1171         movq mm3, [eax+ebx+ebx+color3]
1172         pcmpeqw mm0, mm2
1173         pcmpeqw mm1, mm3
1174         movq mm2, mm4
1175         movq mm3, mm5
1176         por mm0, mm1
1177         por mm2, mm3
1178         pand mm2, mm0
1179         pxor mm0, mm2
1180         movq mm3, mm0
1181
1182         movq mm2, mm0
1183         pxor mm0, mm0
1184         por mm2, mm4
1185         pxor mm4, mm6
1186         por mm2, mm5
1187         pxor mm5, mm7
1188         pcmpeqw mm2, mm0
1189         ;----------------
1190
1191         movq mm0, [eax+ebx+color5]
1192         movq mm1, mm3
1193         por mm1, mm4
1194         por mm1, mm6
1195         pand mm0, mm1
1196         movq mm1, mm5
1197         pand mm1, [I56Pixel]
1198         por mm0, mm1
1199         movq mm1, mm7
1200         pand mm1, [product1b]
1201         por mm0, mm1
1202         movq mm1, mm2
1203         pand mm1, [product1a]
1204         por mm0, mm1
1205         movq [final1a], mm0
1206
1207         movq mm0, [eax+ebx+color6]
1208         movq mm1, mm3
1209         por mm1, mm5
1210         por mm1, mm7
1211         pand mm0, mm1
1212         movq mm1, mm4
1213         pand mm1, [I56Pixel]
1214         por mm0, mm1
1215         movq mm1, mm6
1216         pand mm1, [product1a]
1217         por mm0, mm1
1218         movq mm1, mm2
1219         pand mm1, [product1b]
1220         por mm0, mm1
1221         movq [final1b], mm0
1222
1223         movq mm0, [eax+ebx+ebx+color2]
1224         movq mm1, mm3
1225         por mm1, mm5
1226         por mm1, mm7
1227         pand mm0, mm1
1228         movq mm1, mm4
1229         pand mm1, [I23Pixel]
1230         por mm0, mm1
1231         movq mm1, mm6
1232         pand mm1, [product2b]
1233         por mm0, mm1
1234         movq mm1, mm2
1235         pand mm1, [product2a]
1236         por mm0, mm1
1237         movq [final2a], mm0
1238
1239         movq mm0, [eax+ebx+ebx+color3]
1240         movq mm1, mm3
1241         por mm1, mm4
1242         por mm1, mm6
1243         pand mm0, mm1
1244         movq mm1, mm5
1245         pand mm1, [I23Pixel]
1246         por mm0, mm1
1247         movq mm1, mm7
1248         pand mm1, [product2a]
1249         por mm0, mm1
1250         movq mm1, mm2
1251         pand mm1, [product2b]
1252         por mm0, mm1
1253         movq [final2b], mm0
1254
1255
1256         movq mm0, [final1a]
1257         movq mm2, [final1b]
1258         movq mm1, mm0
1259         movq mm4, [final2a]
1260         movq mm6, [final2b]
1261         movq mm5, mm4
1262         punpcklwd mm0, mm2
1263         punpckhwd mm1, mm2
1264         punpcklwd mm4, mm6
1265         punpckhwd mm5, mm6
1266
1267
1268
1269
1270%ifdef __DJGPP__
1271         movq [fs:edx], mm0
1272         movq [fs:edx+8], mm1
1273         push edx
1274         add edx, [ebp+dstPitch]
1275         movq [fs:edx], mm4
1276         movq [fs:edx+8], mm5
1277         pop edx
1278%else
1279         movq [es:edx], mm0
1280         movq [es:edx+8], mm1
1281         push edx
1282         add edx, [ebp+dstPitch]
1283         movq [es:edx], mm4
1284         movq [es:edx+8], mm5
1285         pop edx
1286%endif
1287.SKIP_PROCESS:
1288         mov ecx, [ebp+deltaPtr]
1289         add ecx, 8
1290         mov [ebp+deltaPtr], ecx
1291         add edx, 16
1292         add eax, 8
1293
1294         pop ecx
1295         sub ecx, 4
1296         cmp ecx, 0
1297         jg  near .Loop
1298
1299; Restore some stuff
1300         popad
1301         mov esp, ebp
1302         pop ebp
1303         emms
1304         ret
1305
1306
1307;-------------------------------------------------------------------------
1308;-------------------------------------------------------------------------
1309;-------------------------------------------------------------------------
1310;-------------------------------------------------------------------------
1311;-------------------------------------------------------------------------
1312;-------------------------------------------------------------------------
1313;-------------------------------------------------------------------------
1314
1315
1316;This is 2xSaI
1317colorI   equ -2
1318colorE   equ 0
1319colorF   equ 2
1320colorJ   equ 4
1321
1322colorG   equ -2
1323colorA   equ 0
1324colorB   equ 2
1325colorK   equ 4
1326
1327colorH   equ -2
1328colorC   equ 0
1329colorD   equ 2
1330colorL   equ 4
1331
1332colorM   equ -2
1333colorN   equ 0
1334colorO   equ 2
1335colorP   equ 4
1336
1337NEWSYM _2xSaILine
1338; Store some stuff
1339	 push ebp
1340	 mov ebp, esp
1341         pushad
1342
1343; Prepare the destination
1344%ifdef __DJGPP__
1345         ; Set the selector
1346         mov eax, [ebp+dstSegment]
1347         mov fs, ax
1348%endif
1349         mov edx, [ebp+dstOffset]         ; edx points to the screen
1350; Prepare the source
1351         ; eax points to colorA
1352         mov eax, [ebp+srcPtr]
1353         mov ebx, [ebp+srcPitch]
1354         mov ecx, [ebp+width]
1355         ; eax now points to colorE
1356         sub eax, ebx
1357
1358
1359; Main Loop
1360.Loop:   push ecx
1361
1362         ;-----Check Delta------------------
1363         mov ecx, [ebp+deltaPtr]
1364
1365         movq mm0, [eax+colorI]
1366         movq mm1, [eax+colorJ]
1367         movq mm2, [eax+ebx+colorG]
1368         movq mm3, [eax+ebx+colorK]
1369         movq mm4, [eax+ebx+ebx+colorH]
1370         movq mm5, [eax+ebx+ebx+colorL]
1371         push eax
1372         add eax, ebx
1373         movq mm6, [eax+ebx+ebx+colorM]
1374         movq mm7, [eax+ebx+ebx+colorP]
1375         pop eax
1376
1377         pcmpeqw mm0, [ecx+2+colorI]
1378         pcmpeqw mm1, [ecx+2+colorK]
1379         pcmpeqw mm2, [ecx+ebx+2+colorG]
1380         pcmpeqw mm3, [ecx+ebx+2+colorK]
1381         pcmpeqw mm4, [ecx+ebx+ebx+2+colorH]
1382         pcmpeqw mm5, [ecx+ebx+ebx+2+colorL]
1383         add ecx, ebx
1384         pcmpeqw mm6, [ecx+ebx+ebx+2+colorM]
1385         pcmpeqw mm7, [ecx+ebx+ebx+2+colorP]
1386         sub ecx, ebx
1387
1388
1389         pand mm0, mm1
1390         pand mm2, mm3
1391         pand mm4, mm5
1392         pand mm6, mm7
1393         pand mm0, mm2
1394         pand mm4, mm6
1395         pxor mm7, mm7
1396         pand mm0, mm4
1397         movq mm6, [eax+colorI]
1398         pcmpeqw mm7, mm0
1399
1400         movq [ecx+2+colorI], mm6
1401
1402         packsswb mm7, mm7
1403         movd ecx, mm7
1404         test ecx, ecx
1405         jz near .SKIP_PROCESS
1406
1407         ;End Delta
1408
1409         ;---------------------------------
1410
1411
1412;1
1413         ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL)
1414         movq mm0, [eax+ebx+colorA]        ;mm0 and mm1 contain colorA
1415         movq mm2, [eax+ebx+colorB]        ;mm2 and mm3 contain colorB
1416
1417         movq mm1, mm0
1418         movq mm3, mm2
1419
1420         pcmpeqw mm0, [eax+ebx+ebx+colorD]
1421         pcmpeqw mm1, [eax+colorE]
1422         pcmpeqw mm2, [eax+ebx+ebx+colorL]
1423         pcmpeqw mm3, [eax+ebx+ebx+colorC]
1424
1425         pand mm0, mm1
1426         pxor mm1, mm1
1427         pand mm0, mm2
1428         pcmpeqw mm3, mm1
1429         pand mm0, mm3                 ;result in mm0
1430
1431         ;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ)
1432         movq mm4, [eax+ebx+colorA]        ;mm4 and mm5 contain colorA
1433         movq mm6, [eax+ebx+colorB]        ;mm6 and mm7 contain colorB
1434         movq mm5, mm4
1435         movq mm7, mm6
1436
1437         pcmpeqw mm4, [eax+ebx+ebx+colorC]
1438         pcmpeqw mm5, [eax+colorF]
1439         pcmpeqw mm6, [eax+colorJ]
1440         pcmpeqw mm7, [eax+colorE]
1441
1442         pand mm4, mm5
1443         pxor mm5, mm5
1444         pand mm4, mm6
1445         pcmpeqw mm7, mm5
1446         pand mm4, mm7                 ;result in mm4
1447
1448         por mm0, mm4                  ;combine the masks
1449         movq [Mask1], mm0
1450
1451         ;--------------------------------------------
1452
1453;2
1454         ;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH)
1455         movq mm0, [eax+ebx+colorB]        ;mm0 and mm1 contain colorB
1456         movq mm2, [eax+ebx+colorA]        ;mm2 and mm3 contain colorA
1457         movq mm1, mm0
1458         movq mm3, mm2
1459
1460         pcmpeqw mm0, [eax+ebx+ebx+colorC]
1461         pcmpeqw mm1, [eax+colorF]
1462         pcmpeqw mm2, [eax+ebx+ebx+colorH]
1463         pcmpeqw mm3, [eax+ebx+ebx+colorD]
1464
1465         pand mm0, mm1
1466         pxor mm1, mm1
1467         pand mm0, mm2
1468         pcmpeqw mm3, mm1
1469         pand mm0, mm3                 ;result in mm0
1470
1471         ;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)
1472         movq mm4, [eax+ebx+colorB]        ;mm4 and mm5 contain colorB
1473         movq mm6, [eax+ebx+colorA]        ;mm6 and mm7 contain colorA
1474         movq mm5, mm4
1475         movq mm7, mm6
1476
1477         pcmpeqw mm4, [eax+ebx+ebx+colorD]
1478         pcmpeqw mm5, [eax+colorE]
1479         pcmpeqw mm6, [eax+colorI]
1480         pcmpeqw mm7, [eax+colorF]
1481
1482         pand mm4, mm5
1483         pxor mm5, mm5
1484         pand mm4, mm6
1485         pcmpeqw mm7, mm5
1486         pand mm4, mm7                 ;result in mm4
1487
1488         por mm0, mm4                  ;combine the masks
1489         movq [Mask2], mm0
1490
1491
1492;interpolate colorA and colorB
1493         movq mm0, [eax+ebx+colorA]
1494         movq mm1, [eax+ebx+colorB]
1495
1496         movq mm2, mm0
1497         movq mm3, mm1
1498
1499         pand mm0, [colorMask]
1500         pand mm1, [colorMask]
1501
1502         psrlw mm0, 1
1503         psrlw mm1, 1
1504
1505         pand mm3, [lowPixelMask]
1506         paddw mm0, mm1
1507
1508         pand mm3, mm2
1509         paddw mm0, mm3                ;mm0 contains the interpolated values
1510
1511         ;assemble the pixels
1512         movq mm1, [eax+ebx+colorA]
1513         movq mm2, [eax+ebx+colorB]
1514
1515         movq mm3, [Mask1]
1516         movq mm5, mm1
1517         movq mm4, [Mask2]
1518         movq mm6, mm1
1519
1520         pand mm1, mm3
1521         por mm3, mm4
1522         pxor mm7, mm7
1523         pand mm2, mm4
1524
1525         pcmpeqw mm3, mm7
1526         por mm1, mm2
1527         pand mm0, mm3
1528
1529         por mm0, mm1
1530
1531         punpcklwd mm5, mm0
1532         punpckhwd mm6, mm0
1533
1534%ifdef __DJGPP__
1535         movq [fs:edx], mm5
1536         movq [fs:edx+8], mm6
1537%else
1538         movq [es:edx], mm5
1539         movq [es:edx+8], mm6
1540%endif
1541
1542;------------------------------------------------
1543;        Create the Nextline
1544;------------------------------------------------
1545;3       ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO)
1546         movq mm0, [eax+ebx+colorA]        ;mm0 and mm1 contain colorA
1547         movq mm2, [eax+ebx+ebx+colorC]        ;mm2 and mm3 contain colorC
1548         movq mm1, mm0
1549         movq mm3, mm2
1550
1551         push eax
1552         add eax, ebx
1553         pcmpeqw mm0, [eax+ebx+colorD]
1554         pcmpeqw mm1, [eax+colorG]
1555         pcmpeqw mm2, [eax+ebx+ebx+colorO]
1556         pcmpeqw mm3, [eax+colorB]
1557         pop eax
1558
1559         pand mm0, mm1
1560         pxor mm1, mm1
1561         pand mm0, mm2
1562         pcmpeqw mm3, mm1
1563         pand mm0, mm3                 ;result in mm0
1564
1565         ;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM)
1566         movq mm4, [eax+ebx+colorA]        ;mm4 and mm5 contain colorA
1567         movq mm6, [eax+ebx+ebx+colorC]        ;mm6 and mm7 contain colorC
1568         movq mm5, mm4
1569         movq mm7, mm6
1570
1571         push eax
1572         add eax, ebx
1573         pcmpeqw mm4, [eax+ebx+colorH]
1574         pcmpeqw mm5, [eax+colorB]
1575         pcmpeqw mm6, [eax+ebx+ebx+colorM]
1576         pcmpeqw mm7, [eax+colorG]
1577         pop eax
1578
1579         pand mm4, mm5
1580         pxor mm5, mm5
1581         pand mm4, mm6
1582         pcmpeqw mm7, mm5
1583         pand mm4, mm7                 ;result in mm4
1584
1585         por mm0, mm4                  ;combine the masks
1586         movq [Mask1], mm0
1587         ;--------------------------------------------
1588
1589;4
1590         ;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF)
1591         movq mm0, [eax+ebx+ebx+colorC]        ;mm0 and mm1 contain colorC
1592         movq mm2, [eax+ebx+colorA]        ;mm2 and mm3 contain colorA
1593         movq mm1, mm0
1594         movq mm3, mm2
1595
1596         pcmpeqw mm0, [eax+ebx+colorB]
1597         pcmpeqw mm1, [eax+ebx+ebx+colorH]
1598         pcmpeqw mm2, [eax+colorF]
1599         pcmpeqw mm3, [eax+ebx+ebx+colorD]
1600
1601         pand mm0, mm1
1602         pxor mm1, mm1
1603         pand mm0, mm2
1604         pcmpeqw mm3, mm1
1605         pand mm0, mm3                 ;result in mm0
1606
1607         ;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)
1608         movq mm4, [eax+ebx+ebx+colorC]        ;mm4 and mm5 contain colorC
1609         movq mm6, [eax+ebx+colorA]        ;mm6 and mm7 contain colorA
1610         movq mm5, mm4
1611         movq mm7, mm6
1612
1613         pcmpeqw mm4, [eax+ebx+ebx+colorD]
1614         pcmpeqw mm5, [eax+ebx+colorG]
1615         pcmpeqw mm6, [eax+colorI]
1616         pcmpeqw mm7, [eax+ebx+ebx+colorH]
1617
1618         pand mm4, mm5
1619         pxor mm5, mm5
1620         pand mm4, mm6
1621         pcmpeqw mm7, mm5
1622         pand mm4, mm7                 ;result in mm4
1623
1624         por mm0, mm4                  ;combine the masks
1625         movq [Mask2], mm0
1626         ;----------------------------------------------
1627
1628;interpolate colorA and colorC
1629         movq mm0, [eax+ebx+colorA]
1630         movq mm1, [eax+ebx+ebx+colorC]
1631
1632         movq mm2, mm0
1633         movq mm3, mm1
1634
1635         pand mm0, [colorMask]
1636         pand mm1, [colorMask]
1637
1638         psrlw mm0, 1
1639         psrlw mm1, 1
1640
1641         pand mm3, [lowPixelMask]
1642         paddw mm0, mm1
1643
1644         pand mm3, mm2
1645         paddw mm0, mm3                ;mm0 contains the interpolated values
1646         ;-------------
1647
1648         ;assemble the pixels
1649         movq mm1, [eax+ebx+colorA]
1650         movq mm2, [eax+ebx+ebx+colorC]
1651
1652         movq mm3, [Mask1]
1653         movq mm4, [Mask2]
1654
1655         pand mm1, mm3
1656         pand mm2, mm4
1657
1658         por mm3, mm4
1659         pxor mm7, mm7
1660         por mm1, mm2
1661
1662         pcmpeqw mm3, mm7
1663         pand mm0, mm3
1664         por mm0, mm1
1665         movq [ACPixel], mm0
1666
1667;////////////////////////////////
1668; Decide which "branch" to take
1669;--------------------------------
1670         movq mm0, [eax+ebx+colorA]
1671         movq mm1, [eax+ebx+colorB]
1672         movq mm6, mm0
1673         movq mm7, mm1
1674         pcmpeqw mm0, [eax+ebx+ebx+colorD]
1675         pcmpeqw mm1, [eax+ebx+ebx+colorC]
1676         pcmpeqw mm6, mm7
1677
1678         movq mm2, mm0
1679         movq mm3, mm0
1680
1681         pand mm0, mm1       ;colorA == colorD && colorB == colorC
1682         pxor mm7, mm7
1683
1684         pcmpeqw mm2, mm7
1685         pand mm6, mm0
1686         pand mm2, mm1       ;colorA != colorD && colorB == colorC
1687
1688         pcmpeqw mm1, mm7
1689
1690         pand mm1, mm3       ;colorA == colorD && colorB != colorC
1691         pxor mm0, mm6
1692         por mm1, mm6
1693         movq mm7, mm0
1694         movq [Mask2], mm2
1695         packsswb mm7, mm7
1696         movq [Mask1], mm1
1697
1698         movd ecx, mm7
1699         test ecx, ecx
1700         jz near .SKIP_GUESS
1701;---------------------------------------------
1702; Map of the pixels:                    I|E F|J
1703;                                       G|A B|K
1704;                                       H|C D|L
1705;                                       M|N O|P
1706         movq mm6, mm0
1707         movq mm4, [eax+ebx+colorA]
1708         movq mm5, [eax+ebx+colorB]
1709         pxor mm7, mm7
1710         pand mm6, [ONE]
1711
1712         movq mm0, [eax+colorE]
1713         movq mm1, [eax+ebx+colorG]
1714         movq mm2, mm0
1715         movq mm3, mm1
1716         pcmpeqw mm0, mm4
1717         pcmpeqw mm1, mm4
1718         pcmpeqw mm2, mm5
1719         pcmpeqw mm3, mm5
1720         pand mm0, mm6
1721         pand mm1, mm6
1722         pand mm2, mm6
1723         pand mm3, mm6
1724         paddw mm0, mm1
1725         paddw mm2, mm3
1726
1727         pxor mm3, mm3
1728         pcmpgtw mm0, mm6
1729         pcmpgtw mm2, mm6
1730         pcmpeqw mm0, mm3
1731         pcmpeqw mm2, mm3
1732         pand mm0, mm6
1733         pand mm2, mm6
1734         paddw mm7, mm0
1735         psubw mm7, mm2
1736
1737         movq mm0, [eax+colorF]
1738         movq mm1, [eax+ebx+colorK]
1739         movq mm2, mm0
1740         movq mm3, mm1
1741         pcmpeqw mm0, mm4
1742         pcmpeqw mm1, mm4
1743         pcmpeqw mm2, mm5
1744         pcmpeqw mm3, mm5
1745         pand mm0, mm6
1746         pand mm1, mm6
1747         pand mm2, mm6
1748         pand mm3, mm6
1749         paddw mm0, mm1
1750         paddw mm2, mm3
1751
1752         pxor mm3, mm3
1753         pcmpgtw mm0, mm6
1754         pcmpgtw mm2, mm6
1755         pcmpeqw mm0, mm3
1756         pcmpeqw mm2, mm3
1757         pand mm0, mm6
1758         pand mm2, mm6
1759         paddw mm7, mm0
1760         psubw mm7, mm2
1761
1762         push eax
1763         add eax, ebx
1764         movq mm0, [eax+ebx+colorH]
1765         movq mm1, [eax+ebx+ebx+colorN]
1766         movq mm2, mm0
1767         movq mm3, mm1
1768         pcmpeqw mm0, mm4
1769         pcmpeqw mm1, mm4
1770         pcmpeqw mm2, mm5
1771         pcmpeqw mm3, mm5
1772         pand mm0, mm6
1773         pand mm1, mm6
1774         pand mm2, mm6
1775         pand mm3, mm6
1776         paddw mm0, mm1
1777         paddw mm2, mm3
1778
1779         pxor mm3, mm3
1780         pcmpgtw mm0, mm6
1781         pcmpgtw mm2, mm6
1782         pcmpeqw mm0, mm3
1783         pcmpeqw mm2, mm3
1784         pand mm0, mm6
1785         pand mm2, mm6
1786         paddw mm7, mm0
1787         psubw mm7, mm2
1788
1789         movq mm0, [eax+ebx+colorL]
1790         movq mm1, [eax+ebx+ebx+colorO]
1791         movq mm2, mm0
1792         movq mm3, mm1
1793         pcmpeqw mm0, mm4
1794         pcmpeqw mm1, mm4
1795         pcmpeqw mm2, mm5
1796         pcmpeqw mm3, mm5
1797         pand mm0, mm6
1798         pand mm1, mm6
1799         pand mm2, mm6
1800         pand mm3, mm6
1801         paddw mm0, mm1
1802         paddw mm2, mm3
1803
1804         pxor mm3, mm3
1805         pcmpgtw mm0, mm6
1806         pcmpgtw mm2, mm6
1807         pcmpeqw mm0, mm3
1808         pcmpeqw mm2, mm3
1809         pand mm0, mm6
1810         pand mm2, mm6
1811         paddw mm7, mm0
1812         psubw mm7, mm2
1813
1814         pop eax
1815         movq mm1, mm7
1816         pxor mm0, mm0
1817         pcmpgtw mm7, mm0
1818         pcmpgtw mm0, mm1
1819
1820         por mm7, [Mask1]
1821         por mm1, [Mask2]
1822         movq [Mask1], mm7
1823         movq [Mask2], mm1
1824
1825.SKIP_GUESS:
1826         ;----------------------------
1827         ;interpolate A, B, C and D
1828         movq mm0, [eax+ebx+colorA]
1829         movq mm1, [eax+ebx+colorB]
1830         movq mm4, mm0
1831         movq mm2, [eax+ebx+ebx+colorC]
1832         movq mm5, mm1
1833         movq mm3, [qcolorMask]
1834         movq mm6, mm2
1835         movq mm7, [qlowpixelMask]
1836
1837         pand mm0, mm3
1838         pand mm1, mm3
1839         pand mm2, mm3
1840         pand mm3, [eax+ebx+ebx+colorD]
1841
1842         psrlw mm0, 2
1843         pand mm4, mm7
1844         psrlw mm1, 2
1845         pand mm5, mm7
1846         psrlw mm2, 2
1847         pand mm6, mm7
1848         psrlw mm3, 2
1849         pand mm7, [eax+ebx+ebx+colorD]
1850
1851         paddw mm0, mm1
1852         paddw mm2, mm3
1853
1854         paddw mm4, mm5
1855         paddw mm6, mm7
1856
1857         paddw mm4, mm6
1858         paddw mm0, mm2
1859         psrlw mm4, 2
1860         pand mm4, [qlowpixelMask]
1861         paddw mm0, mm4      ;mm0 contains the interpolated value of A, B, C and D
1862
1863;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
1864         ;assemble the pixels
1865         movq mm1, [Mask1]
1866         movq mm2, [Mask2]
1867         movq mm4, [eax+ebx+colorA]
1868         movq mm5, [eax+ebx+colorB]
1869         pand mm4, mm1
1870         pand mm5, mm2
1871
1872         pxor mm7, mm7
1873         por mm1, mm2
1874         por mm4, mm5
1875         pcmpeqw mm1, mm7
1876         pand mm0, mm1
1877         por mm4, mm0        ;mm4 contains the diagonal pixels
1878
1879         movq mm0, [ACPixel]
1880         movq mm1, mm0
1881         punpcklwd mm0, mm4
1882         punpckhwd mm1, mm4
1883
1884         push edx
1885         add edx, [ebp+dstPitch]
1886
1887%ifdef __DJGPP__
1888         movq [fs:edx], mm0
1889         movq [fs:edx+8], mm1
1890%else
1891         movq [es:edx], mm0
1892         movq [es:edx+8], mm1
1893%endif
1894         pop edx
1895
1896.SKIP_PROCESS:
1897         mov ecx, [ebp+deltaPtr]
1898         add ecx, 8
1899         mov [ebp+deltaPtr], ecx
1900         add edx, 16
1901         add eax, 8
1902
1903         pop ecx
1904         sub ecx, 4
1905         cmp ecx, 0
1906         jg  near .Loop
1907
1908; Restore some stuff
1909         popad
1910         mov esp, ebp
1911         pop ebp
1912         emms
1913         ret
1914
1915;-------------------------------------------------------------------------
1916;-------------------------------------------------------------------------
1917;-------------------------------------------------------------------------
1918;-------------------------------------------------------------------------
1919;-------------------------------------------------------------------------
1920;-------------------------------------------------------------------------
1921;-------------------------------------------------------------------------
1922
1923NEWSYM _Init_2xSaIMMX
1924; Store some stuff
1925	 push ebp
1926	 mov ebp, esp
1927         push edx
1928
1929
1930;Damn thing doesn't work
1931;	 mov eax,1
1932;	 cpuid
1933;	 test edx, 0x00800000     ;test bit 23
1934;	 jz end2 ;bit not set => no MMX detected
1935
1936	 mov eax, [ebp+8]         ;PixelFormat
1937	 cmp eax, 555
1938	 jz Bits555
1939	 cmp eax, 565
1940	 jz Bits565
1941end2:
1942	 mov eax, 1
1943	 jmp end
1944Bits555:
1945         mov edx, 0x7BDE7BDE
1946         mov eax, colorMask
1947         mov [eax], edx
1948         mov [eax+4], edx
1949         mov edx, 0x04210421
1950         mov eax, lowPixelMask
1951         mov [eax], edx
1952         mov [eax+4], edx
1953         mov edx, 0x739C739C
1954         mov eax, qcolorMask
1955         mov [eax], edx
1956         mov [eax+4], edx
1957         mov edx, 0x0C630C63
1958         mov eax, qlowpixelMask
1959         mov [eax], edx
1960         mov [eax+4], edx
1961         mov eax, 0
1962         jmp end
1963Bits565:
1964         mov edx, 0xF7DEF7DE
1965         mov eax, colorMask
1966         mov [eax], edx
1967         mov [eax+4], edx
1968         mov edx, 0x08210821
1969         mov eax, lowPixelMask
1970         mov [eax], edx
1971         mov [eax+4], edx
1972         mov edx, 0xE79CE79C
1973         mov eax, qcolorMask
1974         mov [eax], edx
1975         mov [eax+4], edx
1976         mov edx, 0x18631863
1977         mov eax, qlowpixelMask
1978         mov [eax], edx
1979         mov [eax+4], edx
1980         mov eax, 0
1981         jmp end
1982end:
1983         pop edx
1984	 mov esp, ebp
1985	 pop ebp
1986	 ret
1987
1988
1989;-------------------------------------------------------------------------
1990;-------------------------------------------------------------------------
1991;-------------------------------------------------------------------------
1992;-------------------------------------------------------------------------
1993;-------------------------------------------------------------------------
1994;-------------------------------------------------------------------------
1995;-------------------------------------------------------------------------
1996
1997SECTION .data
1998;Some constants
1999colorMask     dd 0xF7DEF7DE, 0xF7DEF7DE
2000lowPixelMask  dd 0x08210821, 0x08210821
2001
2002qcolorMask    dd 0xE79CE79C, 0xE79CE79C
2003qlowpixelMask dd 0x18631863, 0x18631863
2004
2005FALSE         dd 0x00000000, 0x00000000
2006TRUE          dd 0xffffffff, 0xffffffff
2007ONE           dd 0x00010001, 0x00010001
2008
2009
2010SECTION .bss
2011ACPixel       resb 8
2012Mask1         resb 8
2013Mask2         resb 8
2014
2015I56Pixel      resb 8
2016I23Pixel      resb 8
2017I5556Pixel    resb 8
2018I2223Pixel    resb 8
2019I5666Pixel    resb 8
2020I2333Pixel    resb 8
2021Mask26        resb 8
2022Mask35        resb 8
2023Mask26b       resb 8
2024Mask35b       resb 8
2025product1a     resb 8
2026product1b     resb 8
2027product2a     resb 8
2028product2b     resb 8
2029final1a       resb 8
2030final1b       resb 8
2031final2a       resb 8
2032final2b       resb 8
2033