1;SuperScale by ElSemi 2; 3; Based on the original idea of 2xScale of AdvanceMAME. 4; AdvanceMAME code is GPLed and I didn't get permisson to use it. so this code is 5; my own implementation of that effect. 6; similar to Kawaks' KScale effect 7; 8;It expands the central pixel of a 3x3 matrix to 2x2 pixels resulting in a 2x scale in 9;both axis. 10; 11;A B C E0 E1 12;D E F -> E2 E3 13;G H I 14; 15; 16;Original algorithm (AdvanceMAME) 17; 18;E0 = (D == B && B != F && D != H) ? D : E; 19;E1 = (B == F && B != D && F != H) ? F : E; 20;E2 = (D == H && D != B && H != F) ? D : E; 21;E3 = (H == F && D != H && B != F) ? F : E; 22; 23; 24;(I'll only put the first 2 equations as the 3rd and 4th 25;are symmetrical ABC<->GHI) 26; 27;Reduce variable usage 28; 29;D==B => E0=D -> E0=B 30; D!=H -> B!=H 31; 32;B==F => E1=F -> E1=B 33; F!=H -> B!=H 34; 35;Group and reorder 36; 37;E0=(B==D && B!=F && B!=H)?B:E; 38;E1=(B!=D && B==F && B!=H)?B:E; 39; 40;as you can see, there are only 3 "real" conditions (B==D, B==F, and B!=H). the other 41;2 can be get reversing the first 1 conditions (B!=D, B!=F) that makes the code 42;suitable to pcmpeqw,pand,pandn 43 44 BITS 32 45 SECTION .text ALIGN = 32 46 GLOBAL _superscale_line 47 GLOBAL _superscale_line_75 48 GLOBAL superscale_line 49 GLOBAL superscale_line_75 50 51%macro ALIGN32 0 52 times ($$-$)&31 nop 53%endmacro 54 55_superscale_line: 56superscale_line: 57 push ebp 58 mov ebp,esp 59 push eax 60 push esi 61 push ebx 62 push edi 63 push ecx 64 65 mov eax,[ebp+8h] ;ABC ;line -1 66 mov esi,[ebp+0Ch] ;DEF ;current line 67 mov ebx,[ebp+10h] ;GHI ;line +1 68 mov edi,[ebp+14h] ;dst 69 mov ecx,[ebp+18h] ;count 70 shr ecx,2 ;/4, we'll make 4 pixels per loop 71 ALIGN32 72iloop: 73;This code makes 4 pixels per loop 74;The comments refer only to 1 pixel (the 3rd one to make explanations clearer and 75;closer to the algorithm) 76; v 77 movq mm0,[eax] ;xABC 78;v movq mm1,[esi] ;xDEF 79 movq mm2,[ebx] ;xGHI 80 movq mm3,[esi-2] ;xxDE 81 movq mm1,[esi] ;xDEF 82 movq mm4,[esi+2] ;DEFx 83 84;Prepare basic comparisons 85 pcmpeqw mm2,mm0 ;mm2=xABC==xGHI (B==H) 86 pcmpeqw mm3,mm0 ;mm3=xABC==xxDE (B==D) 87 pcmpeqw mm4,mm0 ;mm4=xABC==DEFx (B==F) 88 movq mm5,mm2 ;mm5=(B==H) (we need this for both pixel conditions) 89 90;prepare pixel masks 91 pandn mm2,mm3 ;mm2=(B==D) && (B!=H) 92 pandn mm5,mm4 ;mm5=(B==F) && (B!=H) 93 pandn mm4,mm2 ;mm4=(B==D) && (B!=H) && (B!=F) -> left pixel mask 94 pandn mm3,mm5 ;mm3=(B==F) && (B!=H) && (B!=D) -> right pixel mask 95 96;Mask pixels and merge 97 movq mm2,mm4 ;store to temp as I will overwrite them in the masking process 98 movq mm5,mm3 ; 99 pand mm4,mm0 ;Mask out pixels that don't satisfy the conditions 100 pand mm3,mm0 ;leaving only the "new" pixels (E0,E1=B) 101 pandn mm2,mm1 ;Invert the mask and merge to get the pixels that remain 102 pandn mm5,mm1 ;unchanged (E0,E1=E) 103 por mm4,mm2 ;merge both data to get the final pixels xxE0x 104 por mm3,mm5 ;xxE1x 105 movq mm0,mm4 106 ;merge both regs, interleaving E0 and E1 data 107 punpcklwd mm4,mm3 ;in 2 regs (mm4|mm0) 108 punpckhwd mm0,mm3 ; 109 movq [edi],mm4 110 movq [edi+8],mm0 111 add eax,8 112 add esi,8 113 add ebx,8 114 add edi,16 115 loop iloop 116 117 pop ecx 118 pop edi 119 pop ebx 120 pop esi 121 pop eax 122 pop ebp 123 ret 124 125ALIGN32 126 127_superscale_line_75: ;do a 75% reduction on the final pixel colour 128superscale_line_75: ;do a 75% reduction on the final pixel colour 129 push ebp 130 mov ebp,esp 131 push eax 132 push esi 133 push ebx 134 push edi 135 push ecx 136 137 mov eax,[ebp+8h] ;ABC ;line -1 138 mov esi,[ebp+0Ch] ;DEF ;current line 139 mov ebx,[ebp+10h] ;GHI ;line +1 140 mov edi,[ebp+14h] ;dst 141 mov ecx,[ebp+1Ch] ;mask 142 movq mm7,[ecx] ; 143 mov ecx,[ebp+18h] ;count 144 shr ecx,2 ;/4, we'll make 4 pixels per loop 145 ALIGN32 146iloop2: 147;This code makes 4 pixels per loop resulting in 8 pixels expansion 148;The comments refer only to 1 pixel (the 3rd one to make explanations clearer and 149;closer to the algorithm) 150; v 151 movq mm0,[eax] ;xABC 152;v movq mm1,[esi] ;xDEF 153 movq mm2,[ebx] ;xGHI 154 movq mm3,[esi-2] ;xxDE 155 movq mm1,[esi] ;xDEF 156 movq mm4,[esi+2] ;DEFx 157 158;Prepare basic conditions 159 pcmpeqw mm2,mm0 ;mm2=xABC==xGHI (B==H) 160 pcmpeqw mm3,mm0 ;mm3=xABC==xxDE (B==D) 161 pcmpeqw mm4,mm0 ;mm4=xABC==DEFx (B==F) 162 movq mm5,mm2 ;mm5=(B==H) (we need this for both pixel conditions) 163 164;Prepare pixel masks 165 pandn mm2,mm3 ;mm2=(B==D) && (B!=H) 166 pandn mm5,mm4 ;mm5=(B==F) && (B!=H) 167 pandn mm4,mm2 ;mm4=(B==D) && (B!=H) && (B!=F) -> left pixel mask 168 pandn mm3,mm5 ;mm3=(B==F) && (B!=H) && (B!=D) -> right pixel mask 169 170;Mask pixels and merge 171 movq mm2,mm4 ;store to temp as I will overwrite them in the masking process 172 movq mm5,mm3 ; 173 pand mm4,mm0 ;Mask out pixels that don't satisfy the conditions 174 pand mm3,mm0 ;leaving only the "new" pixels (E0,E1=B) 175 pandn mm2,mm1 ;Invert the mask and merge to get the pixels that remain 176 pandn mm5,mm1 ;unchanged (E0,E1=E) 177 por mm4,mm2 ;merge both data to get the final pixels xxE0x 178 por mm3,mm5 ;xxE1x 179 movq mm0,mm4 ;merge both regs, interleaving E0 and E1 data 180 punpcklwd mm4,mm3 ;in 2 regs (mm4|mm0) 181 punpckhwd mm0,mm3 ; 182 183;Reduce color bright to 75% using shift/mask 184 psrlw mm4,1 185 psrlw mm0,1 186 pand mm4,mm7 187 pand mm0,mm7 188 movq mm2,mm4 189 movq mm1,mm0 190 psrlw mm2,1 191 psrlw mm1,1 192 pand mm2,mm7 193 pand mm1,mm7 194 paddw mm4,mm2 195 paddw mm0,mm1 196 movq [edi],mm4 197 movq [edi+8],mm0 198 add eax,8 199 add esi,8 200 add ebx,8 201 add edi,16 202 dec ecx 203 jnz near iloop2 204 205 pop ecx 206 pop edi 207 pop ebx 208 pop esi 209 pop eax 210 pop ebp 211 ret 212