1 // This file has been modified from Ken Silverman's original release
2 // by Jonathon Fowler (jf@jonof.id.au)
3 
4 
5 #ifndef __pragmas_h__
6 #define __pragmas_h__
7 
8 #ifdef __cplusplus
9 extern "C" {
10 #endif
11 
12 extern int dmval;
13 
14 #if defined(__GNUC__) && defined(__i386__) && USE_ASM
15 
16 //
17 // GCC Inline Assembler version
18 //
19 
20 //{{{
21 
22 // maybe one day I'll make these into macros
23 int boundmulscale(int a, int b, int c);
24 void clearbufbyte(void *D, int c, int a);
25 void copybufbyte(void *S, void *D, int c);
26 void copybufreverse(void *S, void *D, int c);
27 
28 
29 #define sqr(a) \
30 	({ int __a=(a); \
31 	   __asm__ __volatile__ ("imull %0, %0" \
32 		: "=q" (__a) \
33 		: "0" (__a) \
34 		: "cc"); \
35 	 __a; })
36 
37 #define scale(a,d,c) \
38 	({ int __a=(a), __d=(d), __c=(c); \
39 	   __asm__ __volatile__ ("imull %%edx; idivl %%ecx" \
40 		: "=a" (__a), "=d" (__d) \
41 		: "0" (__a), "1" (__d), "c" (__c) : "cc"); \
42 	 __a; })
43 
44 #define mulscale(a,d,c) \
45 	({ int __a=(a), __d=(d), __c=(c); \
46 	   __asm__ __volatile__ ("imull %%edx; shrdl %%cl, %%edx, %%eax" \
47 		: "=a" (__a), "=d" (__d) \
48 		: "a" (__a), "d" (__d), "c" (__c) : "cc"); \
49 	 __a; })
50 #define mulscale1(a,d) \
51 	({ int __a=(a), __d=(d); \
52 	   __asm__ __volatile__ ("imull %%edx; shrdl $1, %%edx, %%eax" \
53 		: "=a" (__a), "=d" (__d) \
54 		: "a" (__a), "d" (__d) : "cc"); \
55 	 __a; })
56 #define mulscale2(a,d) \
57 	({ int __a=(a), __d=(d); \
58 	   __asm__ __volatile__ ("imull %%edx; shrdl $2, %%edx, %%eax" \
59 		: "=a" (__a), "=d" (__d) \
60 		: "a" (__a), "d" (__d) : "cc"); \
61 	 __a; })
62 #define mulscale3(a,d) \
63 	({ int __a=(a), __d=(d); \
64 	   __asm__ __volatile__ ("imull %%edx; shrdl $3, %%edx, %%eax" \
65 		: "=a" (__a), "=d" (__d) \
66 		: "a" (__a), "d" (__d) : "cc"); \
67 	 __a; })
68 #define mulscale4(a,d) \
69 	({ int __a=(a), __d=(d); \
70 	   __asm__ __volatile__ ("imull %%edx; shrdl $4, %%edx, %%eax" \
71 		: "=a" (__a), "=d" (__d) \
72 		: "a" (__a), "d" (__d) : "cc"); \
73 	 __a; })
74 #define mulscale5(a,d) \
75 	({ int __a=(a), __d=(d); \
76 	   __asm__ __volatile__ ("imull %%edx; shrdl $5, %%edx, %%eax" \
77 		: "=a" (__a), "=d" (__d) \
78 		: "a" (__a), "d" (__d) : "cc"); \
79 	 __a; })
80 #define mulscale6(a,d) \
81 	({ int __a=(a), __d=(d); \
82 	   __asm__ __volatile__ ("imull %%edx; shrdl $6, %%edx, %%eax" \
83 		: "=a" (__a), "=d" (__d) \
84 		: "a" (__a), "d" (__d) : "cc"); \
85 	 __a; })
86 #define mulscale7(a,d) \
87 	({ int __a=(a), __d=(d); \
88 	   __asm__ __volatile__ ("imull %%edx; shrdl $7, %%edx, %%eax" \
89 		: "=a" (__a), "=d" (__d) \
90 		: "a" (__a), "d" (__d) : "cc"); \
91 	 __a; })
92 #define mulscale8(a,d) \
93 	({ int __a=(a), __d=(d); \
94 	   __asm__ __volatile__ ("imull %%edx; shrdl $8, %%edx, %%eax" \
95 		: "=a" (__a), "=d" (__d) \
96 		: "a" (__a), "d" (__d) : "cc"); \
97 	 __a; })
98 #define mulscale9(a,d) \
99 	({ int __a=(a), __d=(d); \
100 	   __asm__ __volatile__ ("imull %%edx; shrdl $9, %%edx, %%eax" \
101 		: "=a" (__a), "=d" (__d) \
102 		: "a" (__a), "d" (__d) : "cc"); \
103 	 __a; })
104 #define mulscale10(a,d) \
105 	({ int __a=(a), __d=(d); \
106 	   __asm__ __volatile__ ("imull %%edx; shrdl $10, %%edx, %%eax" \
107 		: "=a" (__a), "=d" (__d) \
108 		: "a" (__a), "d" (__d) : "cc"); \
109 	 __a; })
110 #define mulscale11(a,d) \
111 	({ int __a=(a), __d=(d); \
112 	   __asm__ __volatile__ ("imull %%edx; shrdl $11, %%edx, %%eax" \
113 		: "=a" (__a), "=d" (__d) \
114 		: "a" (__a), "d" (__d) : "cc"); \
115 	 __a; })
116 #define mulscale12(a,d) \
117 	({ int __a=(a), __d=(d); \
118 	   __asm__ __volatile__ ("imull %%edx; shrdl $12, %%edx, %%eax" \
119 		: "=a" (__a), "=d" (__d) \
120 		: "a" (__a), "d" (__d) : "cc"); \
121 	 __a; })
122 #define mulscale13(a,d) \
123 	({ int __a=(a), __d=(d); \
124 	   __asm__ __volatile__ ("imull %%edx; shrdl $13, %%edx, %%eax" \
125 		: "=a" (__a), "=d" (__d) \
126 		: "a" (__a), "d" (__d) : "cc"); \
127 	 __a; })
128 #define mulscale14(a,d) \
129 	({ int __a=(a), __d=(d); \
130 	   __asm__ __volatile__ ("imull %%edx; shrdl $14, %%edx, %%eax" \
131 		: "=a" (__a), "=d" (__d) \
132 		: "a" (__a), "d" (__d) : "cc"); \
133 	 __a; })
134 #define mulscale15(a,d) \
135 	({ int __a=(a), __d=(d); \
136 	   __asm__ __volatile__ ("imull %%edx; shrdl $15, %%edx, %%eax" \
137 		: "=a" (__a), "=d" (__d) \
138 		: "a" (__a), "d" (__d) : "cc"); \
139 	 __a; })
140 #define mulscale16(a,d) \
141 	({ int __a=(a), __d=(d); \
142 	   __asm__ __volatile__ ("imull %%edx; shrdl $16, %%edx, %%eax" \
143 		: "=a" (__a), "=d" (__d) \
144 		: "a" (__a), "d" (__d) : "cc"); \
145 	 __a; })
146 #define mulscale17(a,d) \
147 	({ int __a=(a), __d=(d); \
148 	   __asm__ __volatile__ ("imull %%edx; shrdl $17, %%edx, %%eax" \
149 		: "=a" (__a), "=d" (__d) \
150 		: "a" (__a), "d" (__d) : "cc"); \
151 	 __a; })
152 #define mulscale18(a,d) \
153 	({ int __a=(a), __d=(d); \
154 	   __asm__ __volatile__ ("imull %%edx; shrdl $18, %%edx, %%eax" \
155 		: "=a" (__a), "=d" (__d) \
156 		: "a" (__a), "d" (__d) : "cc"); \
157 	 __a; })
158 #define mulscale19(a,d) \
159 	({ int __a=(a), __d=(d); \
160 	   __asm__ __volatile__ ("imull %%edx; shrdl $19, %%edx, %%eax" \
161 		: "=a" (__a), "=d" (__d) \
162 		: "a" (__a), "d" (__d) : "cc"); \
163 	 __a; })
164 #define mulscale20(a,d) \
165 	({ int __a=(a), __d=(d); \
166 	   __asm__ __volatile__ ("imull %%edx; shrdl $20, %%edx, %%eax" \
167 		: "=a" (__a), "=d" (__d) \
168 		: "a" (__a), "d" (__d) : "cc"); \
169 	 __a; })
170 #define mulscale21(a,d) \
171 	({ int __a=(a), __d=(d); \
172 	   __asm__ __volatile__ ("imull %%edx; shrdl $21, %%edx, %%eax" \
173 		: "=a" (__a), "=d" (__d) \
174 		: "a" (__a), "d" (__d) : "cc"); \
175 	 __a; })
176 #define mulscale22(a,d) \
177 	({ int __a=(a), __d=(d); \
178 	   __asm__ __volatile__ ("imull %%edx; shrdl $22, %%edx, %%eax" \
179 		: "=a" (__a), "=d" (__d) \
180 		: "a" (__a), "d" (__d) : "cc"); \
181 	 __a; })
182 #define mulscale23(a,d) \
183 	({ int __a=(a), __d=(d); \
184 	   __asm__ __volatile__ ("imull %%edx; shrdl $23, %%edx, %%eax" \
185 		: "=a" (__a), "=d" (__d) \
186 		: "a" (__a), "d" (__d) : "cc"); \
187 	 __a; })
188 #define mulscale24(a,d) \
189 	({ int __a=(a), __d=(d); \
190 	   __asm__ __volatile__ ("imull %%edx; shrdl $24, %%edx, %%eax" \
191 		: "=a" (__a), "=d" (__d) \
192 		: "a" (__a), "d" (__d) : "cc"); \
193 	 __a; })
194 #define mulscale25(a,d) \
195 	({ int __a=(a), __d=(d); \
196 	   __asm__ __volatile__ ("imull %%edx; shrdl $25, %%edx, %%eax" \
197 		: "=a" (__a), "=d" (__d) \
198 		: "a" (__a), "d" (__d) : "cc"); \
199 	 __a; })
200 #define mulscale26(a,d) \
201 	({ int __a=(a), __d=(d); \
202 	   __asm__ __volatile__ ("imull %%edx; shrdl $26, %%edx, %%eax" \
203 		: "=a" (__a), "=d" (__d) \
204 		: "a" (__a), "d" (__d) : "cc"); \
205 	 __a; })
206 #define mulscale27(a,d) \
207 	({ int __a=(a), __d=(d); \
208 	   __asm__ __volatile__ ("imull %%edx; shrdl $27, %%edx, %%eax" \
209 		: "=a" (__a), "=d" (__d) \
210 		: "a" (__a), "d" (__d) : "cc"); \
211 	 __a; })
212 #define mulscale28(a,d) \
213 	({ int __a=(a), __d=(d); \
214 	   __asm__ __volatile__ ("imull %%edx; shrdl $28, %%edx, %%eax" \
215 		: "=a" (__a), "=d" (__d) \
216 		: "a" (__a), "d" (__d) : "cc"); \
217 	 __a; })
218 #define mulscale29(a,d) \
219 	({ int __a=(a), __d=(d); \
220 	   __asm__ __volatile__ ("imull %%edx; shrdl $29, %%edx, %%eax" \
221 		: "=a" (__a), "=d" (__d) \
222 		: "a" (__a), "d" (__d) : "cc"); \
223 	 __a; })
224 #define mulscale30(a,d) \
225 	({ int __a=(a), __d=(d); \
226 	   __asm__ __volatile__ ("imull %%edx; shrdl $30, %%edx, %%eax" \
227 		: "=a" (__a), "=d" (__d) \
228 		: "a" (__a), "d" (__d) : "cc"); \
229 	 __a; })
230 #define mulscale31(a,d) \
231 	({ int __a=(a), __d=(d); \
232 	   __asm__ __volatile__ ("imull %%edx; shrdl $31, %%edx, %%eax" \
233 		: "=a" (__a), "=d" (__d) \
234 		: "a" (__a), "d" (__d) : "cc"); \
235 	 __a; })
236 #define mulscale32(a,d) \
237 	({ int __a=(a), __d=(d); \
238 	   __asm__ __volatile__ ("imull %%edx" \
239 		: "=a" (__a), "=d" (__d) \
240 		: "a" (__a), "d" (__d) : "cc"); \
241 	 __d; })
242 
243 #define dmulscale(a,d,S,D,c) \
244 	({ int __a=(a), __d=(d), __S=(S), __D=(D), __c=(c); \
245 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
246 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl %%cl, %%edx, %%eax" \
247 		: "=a" (__a), "=d" (__d), "=S" (__S) \
248 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D), "c" (__c) : "ebx", "cc"); \
249 	 __a; })
250 #define dmulscale1(a,d,S,D) \
251 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
252 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
253 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $1, %%edx, %%eax" \
254 		: "=a" (__a), "=d" (__d), "=S" (__S) \
255 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
256 	 __a; })
257 #define dmulscale2(a,d,S,D) \
258 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
259 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
260 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $2, %%edx, %%eax" \
261 		: "=a" (__a), "=d" (__d), "=S" (__S) \
262 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
263 	 __a; })
264 #define dmulscale3(a,d,S,D) \
265 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
266 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
267 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $3, %%edx, %%eax" \
268 		: "=a" (__a), "=d" (__d), "=S" (__S) \
269 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
270 	 __a; })
271 #define dmulscale4(a,d,S,D) \
272 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
273 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
274 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $4, %%edx, %%eax" \
275 		: "=a" (__a), "=d" (__d), "=S" (__S) \
276 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
277 	 __a; })
278 #define dmulscale5(a,d,S,D) \
279 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
280 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
281 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $5, %%edx, %%eax" \
282 		: "=a" (__a), "=d" (__d), "=S" (__S) \
283 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
284 	 __a; })
285 #define dmulscale6(a,d,S,D) \
286 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
287 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
288 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $6, %%edx, %%eax" \
289 		: "=a" (__a), "=d" (__d), "=S" (__S) \
290 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
291 	 __a; })
292 #define dmulscale7(a,d,S,D) \
293 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
294 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
295 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $7, %%edx, %%eax" \
296 		: "=a" (__a), "=d" (__d), "=S" (__S) \
297 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
298 	 __a; })
299 #define dmulscale8(a,d,S,D) \
300 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
301 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
302 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $8, %%edx, %%eax" \
303 		: "=a" (__a), "=d" (__d), "=S" (__S) \
304 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
305 	 __a; })
306 #define dmulscale9(a,d,S,D) \
307 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
308 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
309 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $9, %%edx, %%eax" \
310 		: "=a" (__a), "=d" (__d), "=S" (__S) \
311 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
312 	 __a; })
313 #define dmulscale10(a,d,S,D) \
314 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
315 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
316 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $10, %%edx, %%eax" \
317 		: "=a" (__a), "=d" (__d), "=S" (__S) \
318 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
319 	 __a; })
320 #define dmulscale11(a,d,S,D) \
321 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
322 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
323 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $11, %%edx, %%eax" \
324 		: "=a" (__a), "=d" (__d), "=S" (__S) \
325 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
326 	 __a; })
327 #define dmulscale12(a,d,S,D) \
328 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
329 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
330 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $12, %%edx, %%eax" \
331 		: "=a" (__a), "=d" (__d), "=S" (__S) \
332 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
333 	 __a; })
334 #define dmulscale13(a,d,S,D) \
335 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
336 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
337 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $13, %%edx, %%eax" \
338 		: "=a" (__a), "=d" (__d), "=S" (__S) \
339 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
340 	 __a; })
341 #define dmulscale14(a,d,S,D) \
342 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
343 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
344 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $14, %%edx, %%eax" \
345 		: "=a" (__a), "=d" (__d), "=S" (__S) \
346 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
347 	 __a; })
348 #define dmulscale15(a,d,S,D) \
349 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
350 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
351 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $15, %%edx, %%eax" \
352 		: "=a" (__a), "=d" (__d), "=S" (__S) \
353 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
354 	 __a; })
355 #define dmulscale16(a,d,S,D) \
356 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
357 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
358 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $16, %%edx, %%eax" \
359 		: "=a" (__a), "=d" (__d), "=S" (__S) \
360 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
361 	 __a; })
362 #define dmulscale17(a,d,S,D) \
363 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
364 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
365 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $17, %%edx, %%eax" \
366 		: "=a" (__a), "=d" (__d), "=S" (__S) \
367 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
368 	 __a; })
369 #define dmulscale18(a,d,S,D) \
370 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
371 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
372 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $18, %%edx, %%eax" \
373 		: "=a" (__a), "=d" (__d), "=S" (__S) \
374 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
375 	 __a; })
376 #define dmulscale19(a,d,S,D) \
377 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
378 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
379 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $19, %%edx, %%eax" \
380 		: "=a" (__a), "=d" (__d), "=S" (__S) \
381 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
382 	 __a; })
383 #define dmulscale20(a,d,S,D) \
384 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
385 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
386 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $20, %%edx, %%eax" \
387 		: "=a" (__a), "=d" (__d), "=S" (__S) \
388 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
389 	 __a; })
390 #define dmulscale21(a,d,S,D) \
391 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
392 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
393 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $21, %%edx, %%eax" \
394 		: "=a" (__a), "=d" (__d), "=S" (__S) \
395 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
396 	 __a; })
397 #define dmulscale22(a,d,S,D) \
398 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
399 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
400 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $22, %%edx, %%eax" \
401 		: "=a" (__a), "=d" (__d), "=S" (__S) \
402 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
403 	 __a; })
404 #define dmulscale23(a,d,S,D) \
405 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
406 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
407 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $23, %%edx, %%eax" \
408 		: "=a" (__a), "=d" (__d), "=S" (__S) \
409 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
410 	 __a; })
411 #define dmulscale24(a,d,S,D) \
412 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
413 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
414 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $24, %%edx, %%eax" \
415 		: "=a" (__a), "=d" (__d), "=S" (__S) \
416 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
417 	 __a; })
418 #define dmulscale25(a,d,S,D) \
419 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
420 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
421 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $25, %%edx, %%eax" \
422 		: "=a" (__a), "=d" (__d), "=S" (__S) \
423 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
424 	 __a; })
425 #define dmulscale26(a,d,S,D) \
426 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
427 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
428 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $26, %%edx, %%eax" \
429 		: "=a" (__a), "=d" (__d), "=S" (__S) \
430 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
431 	 __a; })
432 #define dmulscale27(a,d,S,D) \
433 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
434 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
435 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $27, %%edx, %%eax" \
436 		: "=a" (__a), "=d" (__d), "=S" (__S) \
437 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
438 	 __a; })
439 #define dmulscale28(a,d,S,D) \
440 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
441 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
442 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $28, %%edx, %%eax" \
443 		: "=a" (__a), "=d" (__d), "=S" (__S) \
444 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
445 	 __a; })
446 #define dmulscale29(a,d,S,D) \
447 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
448 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
449 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $29, %%edx, %%eax" \
450 		: "=a" (__a), "=d" (__d), "=S" (__S) \
451 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
452 	 __a; })
453 #define dmulscale30(a,d,S,D) \
454 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
455 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
456 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $30, %%edx, %%eax" \
457 		: "=a" (__a), "=d" (__d), "=S" (__S) \
458 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
459 	 __a; })
460 #define dmulscale31(a,d,S,D) \
461 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
462 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
463 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $31, %%edx, %%eax" \
464 		: "=a" (__a), "=d" (__d), "=S" (__S) \
465 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
466 	 __a; })
467 #define dmulscale32(a,d,S,D) \
468 	({ int __a=(a), __d=(d), __S=(S), __D=(D); \
469 	   __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
470 				"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx" \
471 		: "=a" (__a), "=d" (__d), "=S" (__S) \
472 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
473 	 __d; })
474 
475 #define tmulscale1(a,d,b,c,S,D) \
476 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
477 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
478 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
479 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $1, %%edx, %%eax" \
480 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
481 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
482 	 __a; })
483 #define tmulscale2(a,d,b,c,S,D) \
484 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
485 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
486 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
487 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $2, %%edx, %%eax" \
488 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
489 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
490 	 __a; })
491 #define tmulscale3(a,d,b,c,S,D) \
492 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
493 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
494 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
495 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $3, %%edx, %%eax" \
496 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
497 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
498 	 __a; })
499 #define tmulscale4(a,d,b,c,S,D) \
500 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
501 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
502 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
503 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $4, %%edx, %%eax" \
504 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
505 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
506 	 __a; })
507 #define tmulscale5(a,d,b,c,S,D) \
508 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
509 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
510 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
511 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $5, %%edx, %%eax" \
512 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
513 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
514 	 __a; })
515 #define tmulscale6(a,d,b,c,S,D) \
516 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
517 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
518 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
519 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $6, %%edx, %%eax" \
520 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
521 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
522 	 __a; })
523 #define tmulscale7(a,d,b,c,S,D) \
524 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
525 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
526 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
527 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $7, %%edx, %%eax" \
528 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
529 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
530 	 __a; })
531 #define tmulscale8(a,d,b,c,S,D) \
532 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
533 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
534 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
535 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $8, %%edx, %%eax" \
536 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
537 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
538 	 __a; })
539 #define tmulscale9(a,d,b,c,S,D) \
540 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
541 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
542 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
543 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $9, %%edx, %%eax" \
544 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
545 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
546 	 __a; })
547 #define tmulscale10(a,d,b,c,S,D) \
548 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
549 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
550 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
551 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $10, %%edx, %%eax" \
552 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
553 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
554 	 __a; })
555 #define tmulscale11(a,d,b,c,S,D) \
556 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
557 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
558 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
559 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $11, %%edx, %%eax" \
560 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
561 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
562 	 __a; })
563 #define tmulscale12(a,d,b,c,S,D) \
564 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
565 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
566 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
567 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $12, %%edx, %%eax" \
568 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
569 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
570 	 __a; })
571 #define tmulscale13(a,d,b,c,S,D) \
572 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
573 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
574 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
575 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $13, %%edx, %%eax" \
576 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
577 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
578 	 __a; })
579 #define tmulscale14(a,d,b,c,S,D) \
580 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
581 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
582 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
583 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $14, %%edx, %%eax" \
584 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
585 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
586 	 __a; })
587 #define tmulscale15(a,d,b,c,S,D) \
588 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
589 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
590 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
591 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $15, %%edx, %%eax" \
592 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
593 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
594 	 __a; })
595 #define tmulscale16(a,d,b,c,S,D) \
596 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
597 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
598 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
599 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $16, %%edx, %%eax" \
600 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
601 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
602 	 __a; })
603 #define tmulscale17(a,d,b,c,S,D) \
604 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
605 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
606 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
607 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $17, %%edx, %%eax" \
608 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
609 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
610 	 __a; })
611 #define tmulscale18(a,d,b,c,S,D) \
612 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
613 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
614 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
615 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $18, %%edx, %%eax" \
616 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
617 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
618 	 __a; })
619 #define tmulscale19(a,d,b,c,S,D) \
620 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
621 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
622 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
623 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $19, %%edx, %%eax" \
624 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
625 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
626 	 __a; })
627 #define tmulscale20(a,d,b,c,S,D) \
628 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
629 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
630 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
631 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $20, %%edx, %%eax" \
632 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
633 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
634 	 __a; })
635 #define tmulscale21(a,d,b,c,S,D) \
636 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
637 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
638 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
639 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $21, %%edx, %%eax" \
640 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
641 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
642 	 __a; })
643 #define tmulscale22(a,d,b,c,S,D) \
644 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
645 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
646 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
647 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $22, %%edx, %%eax" \
648 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
649 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
650 	 __a; })
651 #define tmulscale23(a,d,b,c,S,D) \
652 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
653 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
654 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
655 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $23, %%edx, %%eax" \
656 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
657 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
658 	 __a; })
659 #define tmulscale24(a,d,b,c,S,D) \
660 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
661 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
662 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
663 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $24, %%edx, %%eax" \
664 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
665 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
666 	 __a; })
667 #define tmulscale25(a,d,b,c,S,D) \
668 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
669 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
670 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
671 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $25, %%edx, %%eax" \
672 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
673 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
674 	 __a; })
675 #define tmulscale26(a,d,b,c,S,D) \
676 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
677 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
678 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
679 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $26, %%edx, %%eax" \
680 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
681 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
682 	 __a; })
683 #define tmulscale27(a,d,b,c,S,D) \
684 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
685 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
686 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
687 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $27, %%edx, %%eax" \
688 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
689 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
690 	 __a; })
691 #define tmulscale28(a,d,b,c,S,D) \
692 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
693 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
694 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
695 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $28, %%edx, %%eax" \
696 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
697 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
698 	 __a; })
699 #define tmulscale29(a,d,b,c,S,D) \
700 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
701 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
702 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
703 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $29, %%edx, %%eax" \
704 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
705 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
706 	 __a; })
707 #define tmulscale30(a,d,b,c,S,D) \
708 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
709 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
710 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
711 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $30, %%edx, %%eax" \
712 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
713 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
714 	 __a; })
715 #define tmulscale31(a,d,b,c,S,D) \
716 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
717 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
718 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
719 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $31, %%edx, %%eax" \
720 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
721 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
722 	 __a; })
723 #define tmulscale32(a,d,b,c,S,D) \
724 	({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
725 	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
726 				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
727 				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx" \
728 		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
729 		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
730 	 __d; })
731 
732 #define divscale(a,b,c) \
733 	({ int __a=(a), __b=(b), __c=(c); \
734 	   __asm__ __volatile__ ("movl %%eax, %%edx; shll %%cl, %%eax; negb %%cl; sarl %%cl, %%edx; idivl %%ebx" \
735 		: "=a" (__a) : "a" (__a), "c" (__c), "b" (__b) : "edx", "cc"); \
736 	 __a; })
737 #define divscale1(a,b) \
738 	({ int __a=(a), __b=(b); \
739 	   __asm__ __volatile__ ("addl %%eax, %%eax; sbbl %%edx, %%edx; idivl %%ebx" \
740 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
741 	 __a; })
742 #define divscale2(a,b) \
743 	({ int __a=(a), __b=(b); \
744 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $30, %%edx; leal (,%%eax,4), %%eax; idivl %%ebx" \
745 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
746 	 __a; })
747 #define divscale3(a,b) \
748 	({ int __a=(a), __b=(b); \
749 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $29, %%edx; leal (,%%eax,8), %%eax; idivl %%ebx" \
750 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
751 	 __a; })
752 #define divscale4(a,b) \
753 	({ int __a=(a), __b=(b); \
754 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $28, %%edx; shll $4, %%eax; idivl %%ebx" \
755 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
756 	 __a; })
757 #define divscale5(a,b) \
758 	({ int __a=(a), __b=(b); \
759 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $27, %%edx; shll $5, %%eax; idivl %%ebx" \
760 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
761 	 __a; })
762 #define divscale6(a,b) \
763 	({ int __a=(a), __b=(b); \
764 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $26, %%edx; shll $6, %%eax; idivl %%ebx" \
765 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
766 	 __a; })
767 #define divscale7(a,b) \
768 	({ int __a=(a), __b=(b); \
769 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $25, %%edx; shll $7, %%eax; idivl %%ebx" \
770 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
771 	 __a; })
772 #define divscale8(a,b) \
773 	({ int __a=(a), __b=(b); \
774 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $24, %%edx; shll $8, %%eax; idivl %%ebx" \
775 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
776 	 __a; })
777 #define divscale9(a,b) \
778 	({ int __a=(a), __b=(b); \
779 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $23, %%edx; shll $9, %%eax; idivl %%ebx" \
780 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
781 	 __a; })
782 #define divscale10(a,b) \
783 	({ int __a=(a), __b=(b); \
784 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $22, %%edx; shll $10, %%eax; idivl %%ebx" \
785 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
786 	 __a; })
787 #define divscale11(a,b) \
788 	({ int __a=(a), __b=(b); \
789 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $21, %%edx; shll $11, %%eax; idivl %%ebx" \
790 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
791 	 __a; })
792 #define divscale12(a,b) \
793 	({ int __a=(a), __b=(b); \
794 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $20, %%edx; shll $12, %%eax; idivl %%ebx" \
795 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
796 	 __a; })
797 #define divscale13(a,b) \
798 	({ int __a=(a), __b=(b); \
799 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $19, %%edx; shll $13, %%eax; idivl %%ebx" \
800 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
801 	 __a; })
802 #define divscale14(a,b) \
803 	({ int __a=(a), __b=(b); \
804 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $18, %%edx; shll $14, %%eax; idivl %%ebx" \
805 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
806 	 __a; })
807 #define divscale15(a,b) \
808 	({ int __a=(a), __b=(b); \
809 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $17, %%edx; shll $15, %%eax; idivl %%ebx" \
810 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
811 	 __a; })
812 #define divscale16(a,b) \
813 	({ int __a=(a), __b=(b); \
814 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $16, %%edx; shll $16, %%eax; idivl %%ebx" \
815 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
816 	 __a; })
817 #define divscale17(a,b) \
818 	({ int __a=(a), __b=(b); \
819 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $15, %%edx; shll $17, %%eax; idivl %%ebx" \
820 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
821 	 __a; })
822 #define divscale18(a,b) \
823 	({ int __a=(a), __b=(b); \
824 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $14, %%edx; shll $18, %%eax; idivl %%ebx" \
825 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
826 	 __a; })
827 #define divscale19(a,b) \
828 	({ int __a=(a), __b=(b); \
829 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $13, %%edx; shll $19, %%eax; idivl %%ebx" \
830 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
831 	 __a; })
832 #define divscale20(a,b) \
833 	({ int __a=(a), __b=(b); \
834 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $12, %%edx; shll $20, %%eax; idivl %%ebx" \
835 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
836 	 __a; })
837 #define divscale21(a,b) \
838 	({ int __a=(a), __b=(b); \
839 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $11, %%edx; shll $21, %%eax; idivl %%ebx" \
840 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
841 	 __a; })
842 #define divscale22(a,b) \
843 	({ int __a=(a), __b=(b); \
844 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $10, %%edx; shll $22, %%eax; idivl %%ebx" \
845 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
846 	 __a; })
847 #define divscale23(a,b) \
848 	({ int __a=(a), __b=(b); \
849 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $9, %%edx; shll $23, %%eax; idivl %%ebx" \
850 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
851 	 __a; })
852 #define divscale24(a,b) \
853 	({ int __a=(a), __b=(b); \
854 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $8, %%edx; shll $24, %%eax; idivl %%ebx" \
855 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
856 	 __a; })
857 #define divscale25(a,b) \
858 	({ int __a=(a), __b=(b); \
859 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $7, %%edx; shll $25, %%eax; idivl %%ebx" \
860 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
861 	 __a; })
862 #define divscale26(a,b) \
863 	({ int __a=(a), __b=(b); \
864 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $6, %%edx; shll $26, %%eax; idivl %%ebx" \
865 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
866 	 __a; })
867 #define divscale27(a,b) \
868 	({ int __a=(a), __b=(b); \
869 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $5, %%edx; shll $27, %%eax; idivl %%ebx" \
870 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
871 	 __a; })
872 #define divscale28(a,b) \
873 	({ int __a=(a), __b=(b); \
874 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $4, %%edx; shll $28, %%eax; idivl %%ebx" \
875 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
876 	 __a; })
877 #define divscale29(a,b) \
878 	({ int __a=(a), __b=(b); \
879 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $3, %%edx; shll $29, %%eax; idivl %%ebx" \
880 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
881 	 __a; })
882 #define divscale30(a,b) \
883 	({ int __a=(a), __b=(b); \
884 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $2, %%edx; shll $30, %%eax; idivl %%ebx" \
885 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
886 	 __a; })
887 #define divscale31(a,b) \
888 	({ int __a=(a), __b=(b); \
889 	   __asm__ __volatile__ ("movl %%eax, %%edx; sarl $1, %%edx; shll $31, %%eax; idivl %%ebx" \
890 		: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
891 	 __a; })
892 #define divscale32(d,b) \
893 	({ int __d=(d), __b=(b), __r; \
894 	   __asm__ __volatile__ ("xorl %%eax, %%eax; idivl %%ebx" \
895 		: "=a" (__r), "=d" (__d) : "d" (__d), "b" (__b) : "cc"); \
896 	 __r; })
897 
898 #define readpixel(D) \
899 	({ void *__D=(D); int __a; \
900 	   __asm__ __volatile__ ("movb (%%edi), %%al" \
901 		: "=a" (__a): "D" (__D) : "cc"); \
902 	 __a; })
903 #define drawpixel(D,a) \
904 	({ void *__D=(D); int __a=(a); \
905 	   __asm__ __volatile__ ("movb %%al, (%%edi)" \
906 		: : "D" (__D), "a" (__a) : "memory", "cc"); \
907 	 0; })
908 #define drawpixels(D,a) \
909 	({ void *__D=(D); int __a=(a); \
910 	   __asm__ __volatile__ ("movw %%ax, (%%edi)" \
911 		: : "D" (__D), "a" (__a) : "memory", "cc"); \
912 	 0; })
913 #define drawpixelses(D,a) \
914 	({ void *__D=(D); int __a=(a); \
915 	   __asm__ __volatile__ ("movl %%eax, (%%edi)" \
916 		: : "D" (__D), "a" (__a) : "memory", "cc"); \
917 	 0; })
918 #define clearbuf(D,c,a) \
919 	({ void *__D=(D); int __c=(c), __a=(a); \
920 	   __asm__ __volatile__ ("rep; stosl" \
921 		: "=&D" (__D), "=&c" (__c) : "0" (__D), "1" (__c), "a" (__a) : "memory", "cc"); \
922 	 0; })
923 #define copybuf(S,D,c) \
924 	({ void *__S=(S), *__D=(D); int __c=(c); \
925 	   __asm__ __volatile__ ("rep; movsl" \
926 		: "=&S" (__S), "=&D" (__D), "=&c" (__c) : "0" (__S), "1" (__D), "2" (__c) : "memory", "cc"); \
927 	 0; })
928 
929 #define mul3(a) \
930 	({ int __a=(a), __r; \
931 	   __asm__ __volatile__ ("lea (%1,%1,2), %0" \
932 		: "=r" (__r) : "0" (__a) : "cc"); \
933 	 __r; })
934 #define mul5(a) \
935 	({ int __a=(a), __r; \
936 	   __asm__ __volatile__ ("lea (%1,%1,4), %0" \
937 		: "=r" (__r) : "0" (__a) : "cc"); \
938 	 __r; })
939 #define mul9(a) \
940 	({ int __a=(a), __r; \
941 	   __asm__ __volatile__ ("lea (%1,%1,8), %0" \
942 		: "=r" (__r) : "0" (__a) : "cc"); \
943 	 __r; })
944 
945 //returns eax/ebx, dmval = eax%edx;
946 #define divmod(a,b) \
947 	({ int __a=(a), __b=(b); \
948 	   __asm__ __volatile__ ("xorl %%edx, %%edx; divl %%ebx; movl %%edx, %[dmval]" \
949 		: "+a" (__a) : "b" (__b), [dmval] "m" (dmval) : "edx", "memory", "cc"); \
950 	 __a; })
951 //returns eax%ebx, dmval = eax/edx;
952 #define moddiv(a,b) \
953 	({ int __a=(a), __b=(b), __d; \
954 	   __asm__ __volatile__ ("xorl %%edx, %%edx; divl %%ebx; movl %%eax, %[dmval]" \
955 		: "=d" (__d) : "a" (__a), "b" (__b), [dmval] "m" (dmval) : "eax", "memory", "cc"); \
956 	 __d; })
957 
958 #define klabs(a) \
959 	({ int __a=(a); \
960 	   __asm__ __volatile__ ("testl %%eax, %%eax; jns 0f; negl %%eax; 0:" \
961 		: "=a" (__a) : "a" (__a) : "cc"); \
962 	 __a; })
963 #define ksgn(b) \
964 	({ int __b=(b), __r; \
965 	   __asm__ __volatile__ ("addl %%ebx, %%ebx; sbbl %%eax, %%eax; cmpl %%ebx, %%eax; adcb $0, %%al" \
966 		: "=a" (__r) : "b" (__b) : "cc"); \
967 	 __r; })
968 
969 #define umin(a,b) \
970 	({ int __a=(a), __b=(b); \
971 	   __asm__ __volatile__ ("subl %%ebx, %%eax; sbbl %%ecx, %%ecx; andl %%ecx, %%eax; addl %%ebx, %%eax" \
972 	   	: "=a" (__a) : "a" (__a), "b" (__b) : "ecx", "cc"); \
973 	 __a; })
974 #define umax(a,b) \
975 	({ int __a=(a), __b=(b); \
976 	   __asm__ __volatile__ ("subl %%ebx, %%eax; sbbl %%ecx, %%ecx; xorl $0xffffffff, %%ecx; andl %%ecx, %%eax; addl %%ebx, %%eax" \
977 		: "=a" (__a) : "a" (__a), "b" (__b) : "ecx", "cc"); \
978 	 __a; })
979 
980 #define kmin(a,b) \
981 	({ int __a=(a), __b=(b); \
982 	   __asm__ __volatile__ ("cmpl %%ebx, %%eax; jl 0f; movl %%ebx, %%eax; 0:" \
983 		: "=a" (__a) : "a" (__a), "b" (__b) : "cc"); \
984 	 __a; })
985 #define kmax(a,b) \
986 	({ int __a=(a), __b=(b); \
987 	   __asm__ __volatile__ ("cmpl %%ebx, %%eax; jg 0f; movl %%ebx, %%eax; 0:" \
988 		: "=a" (__a) : "a" (__a), "b" (__b) : "cc"); \
989 	 __a; })
990 
991 #define swapchar(a,b) \
992 	({ void *__a=(a), *__b=(b); \
993 	   __asm__ __volatile__ ("movb (%%eax), %%cl; movb (%%ebx), %%ch; movb %%cl, (%%ebx); movb %%ch, (%%eax)" \
994 		: : "a" (__a), "b" (__b) : "ecx", "memory", "cc"); \
995 	 0; })
996 #define swapshort(a,b) \
997 	({ void *__a=(a), *__b=(b); \
998 	   __asm__ __volatile__ ("movw (%%eax), %%cx; movw (%%ebx), %%dx; movw %%cx, (%%ebx); movw %%dx, (%%eax)" \
999 		: : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \
1000 	 0; })
1001 #define swaplong(a,b) \
1002 	({ void *__a=(a), *__b=(b); \
1003 	   __asm__ __volatile__ ("movl (%%eax), %%ecx; movl (%%ebx), %%edx; movl %%ecx, (%%ebx); movl %%edx, (%%eax)" \
1004 		: : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \
1005 	 0; })
1006 #define swapbuf4(a,b,c) \
1007 	({ void *__a=(a), *__b=(b); int __c=(c); \
1008 	   __asm__ __volatile__ ("0: movl (%%eax), %%esi; movl (%%ebx), %%edi; movl %%esi, (%%ebx); " \
1009 				"movl %%edi, (%%eax); addl $4, %%eax; addl $4, %%ebx; decl %%ecx; jnz 0b" \
1010 		: : "a" (__a), "b" (__b), "c" (__c) : "esi", "edi", "memory", "cc"); \
1011 	 0; })
1012 #define swap64bit(a,b) \
1013 	({ void *__a=(a), *__b=(b); \
1014 	   __asm__ __volatile__ ("movl (%%eax), %%ecx; movl (%%ebx), %%edx; movl %%ecx, (%%ebx); " \
1015 				"movl 4(%%eax), %%ecx; movl %%edx, (%%eax); movl 4(%%ebx), %%edx; " \
1016 				"movl %%ecx, 4(%%ebx); movl %%edx, 4(%%eax)" \
1017 		: : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \
1018 	 0; })
1019 
1020 //swapchar2(ptr1,ptr2,xsiz); is the same as:
1021 //swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz);
1022 #define swapchar2(a,b,S) \
1023 	({ void *__a=(a), *__b=(b); int __S=(S); \
1024 	   __asm__ __volatile__ ("addl %%ebx, %%esi; movw (%%eax), %%cx; movb (%%ebx), %%dl; " \
1025 				"movb %%cl, (%%ebx); movb (%%esi), %%dh; movb %%ch, (%%esi); " \
1026 				"movw %%dx, (%%eax)" \
1027 		: "=S" (__S) : "a" (__a), "b" (__b), "S" (__S) : "ecx", "edx", "memory", "cc"); \
1028 	 0; })
1029 
1030 
1031 #define qinterpolatedown16(a,c,d,S) \
1032 	({ void *__a=(void*)(a); int __c=(c), __d=(d), __S=(S); \
1033 	   __asm__ __volatile__ ("movl %%ecx, %%ebx; shrl $1, %%ecx; jz 1f; " \
1034 				"0: leal (%%edx,%%esi,), %%edi; sarl $16, %%edx; movl %%edx, (%%eax); " \
1035 				"leal (%%edi,%%esi,), %%edx; sarl $16, %%edi; movl %%edi, 4(%%eax); " \
1036 				"addl $8, %%eax; decl %%ecx; jnz 0b; testl $1, %%ebx; jz 2f; " \
1037 				"1: sarl $16, %%edx; movl %%edx, (%%eax); 2:" \
1038 		: "=a" (__a), "=c" (__c), "=d" (__d) : "a" (__a), "c" (__c), "d" (__d), "S" (__S) \
1039 		: "ebx", "edi", "memory", "cc"); \
1040 	 0; })
1041 
1042 #define qinterpolatedown16short(a,c,d,S) \
1043 	({ void *__a=(void*)(a); int __c=(c), __d=(d), __S=(S); \
1044 	   __asm__ __volatile__ ("testl %%ecx, %%ecx; jz 3f; testb $2, %%al; jz 0f; movl %%edx, %%ebx; " \
1045 				"sarl $16, %%ebx; movw %%bx, (%%eax); addl %%esi, %%edx; addl $2, %%eax; " \
1046 				"decl %%ecx; jz 3f; " \
1047 				"0: subl $2, %%ecx; jc 2f; " \
1048 				"1: movl %%edx, %%ebx; addl %%esi, %%edx; sarl $16, %%ebx; movl %%edx, %%edi; " \
1049 				"andl $0xffff0000, %%edi; addl %%esi, %%edx; addl %%edi, %%ebx; " \
1050 				"movl %%ebx, (%%eax); addl $4, %%eax; subl $2, %%ecx; jnc 1b; testb $1, %%cl; " \
1051 				"jz 3f; " \
1052 				"2: movl %%edx, %%ebx; sarl $16, %%ebx; movw %%bx, (%%eax); 3:" \
1053 		: "=a" (__a), "=c" (__c), "=d" (__d) : "a" (__a), "c" (__c), "d" (__d), "S" (__S) \
1054 		: "ebx", "edi", "memory", "cc"); \
1055 	 0; })
1056 
1057 
1058 //}}}
1059 
1060 #elif defined(__WATCOMC__) && USE_ASM	// __GNUC__ && __i386__
1061 
1062 //
1063 // Watcom C inline assembler
1064 //
1065 
1066 //{{{
1067 int sqr(int);
1068 #pragma aux sqr =\
1069 	"imul eax, eax",\
1070 	parm nomemory [eax]\
1071 	modify exact [eax]\
1072 	value [eax]
1073 
1074 int scale(int,int,int);
1075 #pragma aux scale =\
1076 	"imul edx",\
1077 	"idiv ecx",\
1078 	parm nomemory [eax][edx][ecx]\
1079 	modify exact [eax edx]
1080 
1081 int mulscale(int,int,int);
1082 #pragma aux mulscale =\
1083 	"imul edx",\
1084 	"shrd eax, edx, cl",\
1085 	parm nomemory [eax][edx][ecx]\
1086 	modify exact [eax edx]
1087 
1088 int mulscale1(int,int);
1089 #pragma aux mulscale1 =\
1090 	"imul edx",\
1091 	"shrd eax, edx, 1",\
1092 	parm nomemory [eax][edx]\
1093 	modify exact [eax edx]
1094 
1095 int mulscale2(int,int);
1096 #pragma aux mulscale2 =\
1097 	"imul edx",\
1098 	"shrd eax, edx, 2",\
1099 	parm nomemory [eax][edx]\
1100 	modify exact [eax edx]
1101 
1102 int mulscale3(int,int);
1103 #pragma aux mulscale3 =\
1104 	"imul edx",\
1105 	"shrd eax, edx, 3",\
1106 	parm nomemory [eax][edx]\
1107 	modify exact [eax edx]
1108 
1109 int mulscale4(int,int);
1110 #pragma aux mulscale4 =\
1111 	"imul edx",\
1112 	"shrd eax, edx, 4",\
1113 	parm nomemory [eax][edx]\
1114 	modify exact [eax edx]
1115 
1116 int mulscale5(int,int);
1117 #pragma aux mulscale5 =\
1118 	"imul edx",\
1119 	"shrd eax, edx, 5",\
1120 	parm nomemory [eax][edx]\
1121 	modify exact [eax edx]
1122 
1123 int mulscale6(int,int);
1124 #pragma aux mulscale6 =\
1125 	"imul edx",\
1126 	"shrd eax, edx, 6",\
1127 	parm nomemory [eax][edx]\
1128 	modify exact [eax edx]
1129 
1130 int mulscale7(int,int);
1131 #pragma aux mulscale7 =\
1132 	"imul edx",\
1133 	"shrd eax, edx, 7",\
1134 	parm nomemory [eax][edx]\
1135 	modify exact [eax edx]
1136 
1137 int mulscale8(int,int);
1138 #pragma aux mulscale8 =\
1139 	"imul edx",\
1140 	"shrd eax, edx, 8",\
1141 	parm nomemory [eax][edx]\
1142 	modify exact [eax edx]
1143 
1144 int mulscale9(int,int);
1145 #pragma aux mulscale9 =\
1146 	"imul edx",\
1147 	"shrd eax, edx, 9",\
1148 	parm nomemory [eax][edx]\
1149 	modify exact [eax edx]
1150 
1151 int mulscale10(int,int);
1152 #pragma aux mulscale10 =\
1153 	"imul edx",\
1154 	"shrd eax, edx, 10",\
1155 	parm nomemory [eax][edx]\
1156 	modify exact [eax edx]
1157 
1158 int mulscale11(int,int);
1159 #pragma aux mulscale11 =\
1160 	"imul edx",\
1161 	"shrd eax, edx, 11",\
1162 	parm nomemory [eax][edx]\
1163 	modify exact [eax edx]
1164 
1165 int mulscale12(int,int);
1166 #pragma aux mulscale12 =\
1167 	"imul edx",\
1168 	"shrd eax, edx, 12",\
1169 	parm nomemory [eax][edx]\
1170 	modify exact [eax edx]
1171 
1172 int mulscale13(int,int);
1173 #pragma aux mulscale13 =\
1174 	"imul edx",\
1175 	"shrd eax, edx, 13",\
1176 	parm nomemory [eax][edx]\
1177 	modify exact [eax edx]
1178 
1179 int mulscale14(int,int);
1180 #pragma aux mulscale14 =\
1181 	"imul edx",\
1182 	"shrd eax, edx, 14",\
1183 	parm nomemory [eax][edx]\
1184 	modify exact [eax edx]
1185 
1186 int mulscale15(int,int);
1187 #pragma aux mulscale15 =\
1188 	"imul edx",\
1189 	"shrd eax, edx, 15",\
1190 	parm nomemory [eax][edx]\
1191 	modify exact [eax edx]
1192 
1193 int mulscale16(int,int);
1194 #pragma aux mulscale16 =\
1195 	"imul edx",\
1196 	"shrd eax, edx, 16",\
1197 	parm nomemory [eax][edx]\
1198 	modify exact [eax edx]
1199 
1200 int mulscale17(int,int);
1201 #pragma aux mulscale17 =\
1202 	"imul edx",\
1203 	"shrd eax, edx, 17",\
1204 	parm nomemory [eax][edx]\
1205 	modify exact [eax edx]
1206 
1207 int mulscale18(int,int);
1208 #pragma aux mulscale18 =\
1209 	"imul edx",\
1210 	"shrd eax, edx, 18",\
1211 	parm nomemory [eax][edx]\
1212 	modify exact [eax edx]
1213 
1214 int mulscale19(int,int);
1215 #pragma aux mulscale19 =\
1216 	"imul edx",\
1217 	"shrd eax, edx, 19",\
1218 	parm nomemory [eax][edx]\
1219 	modify exact [eax edx]
1220 
1221 int mulscale20(int,int);
1222 #pragma aux mulscale20 =\
1223 	"imul edx",\
1224 	"shrd eax, edx, 20",\
1225 	parm nomemory [eax][edx]\
1226 	modify exact [eax edx]
1227 
1228 int mulscale21(int,int);
1229 #pragma aux mulscale21 =\
1230 	"imul edx",\
1231 	"shrd eax, edx, 21",\
1232 	parm nomemory [eax][edx]\
1233 	modify exact [eax edx]
1234 
1235 int mulscale22(int,int);
1236 #pragma aux mulscale22 =\
1237 	"imul edx",\
1238 	"shrd eax, edx, 22",\
1239 	parm nomemory [eax][edx]\
1240 	modify exact [eax edx]
1241 
1242 int mulscale23(int,int);
1243 #pragma aux mulscale23 =\
1244 	"imul edx",\
1245 	"shrd eax, edx, 23",\
1246 	parm nomemory [eax][edx]\
1247 	modify exact [eax edx]
1248 
1249 int mulscale24(int,int);
1250 #pragma aux mulscale24 =\
1251 	"imul edx",\
1252 	"shrd eax, edx, 24",\
1253 	parm nomemory [eax][edx]\
1254 	modify exact [eax edx]
1255 
1256 int mulscale25(int,int);
1257 #pragma aux mulscale25 =\
1258 	"imul edx",\
1259 	"shrd eax, edx, 25",\
1260 	parm nomemory [eax][edx]\
1261 	modify exact [eax edx]
1262 
1263 int mulscale26(int,int);
1264 #pragma aux mulscale26 =\
1265 	"imul edx",\
1266 	"shrd eax, edx, 26",\
1267 	parm nomemory [eax][edx]\
1268 	modify exact [eax edx]
1269 
1270 int mulscale27(int,int);
1271 #pragma aux mulscale27 =\
1272 	"imul edx",\
1273 	"shrd eax, edx, 27",\
1274 	parm nomemory [eax][edx]\
1275 	modify exact [eax edx]
1276 
1277 int mulscale28(int,int);
1278 #pragma aux mulscale28 =\
1279 	"imul edx",\
1280 	"shrd eax, edx, 28",\
1281 	parm nomemory [eax][edx]\
1282 	modify exact [eax edx]
1283 
1284 int mulscale29(int,int);
1285 #pragma aux mulscale29 =\
1286 	"imul edx",\
1287 	"shrd eax, edx, 29",\
1288 	parm nomemory [eax][edx]\
1289 	modify exact [eax edx]
1290 
1291 int mulscale30(int,int);
1292 #pragma aux mulscale30 =\
1293 	"imul edx",\
1294 	"shrd eax, edx, 30",\
1295 	parm nomemory [eax][edx]\
1296 	modify exact [eax edx]
1297 
1298 int mulscale31(int,int);
1299 #pragma aux mulscale31 =\
1300 	"imul edx",\
1301 	"shrd eax, edx, 31",\
1302 	parm nomemory [eax][edx]\
1303 	modify exact [eax edx]
1304 
1305 int mulscale32(int,int);
1306 #pragma aux mulscale32 =\
1307 	"imul edx",\
1308 	parm nomemory [eax][edx]\
1309 	modify exact [eax edx]\
1310 	value [edx]
1311 
1312 int dmulscale(int,int,int,int,int);
1313 #pragma aux dmulscale =\
1314 	"imul edx",\
1315 	"mov ebx, eax",\
1316 	"mov eax, esi",\
1317 	"mov esi, edx",\
1318 	"imul edi",\
1319 	"add eax, ebx",\
1320 	"adc edx, esi",\
1321 	"shrd eax, edx, cl",\
1322 	parm nomemory [eax][edx][esi][edi][ecx]\
1323 	modify exact [eax ebx edx esi]
1324 
1325 int dmulscale1(int,int,int,int);
1326 #pragma aux dmulscale1 =\
1327 	"imul edx",\
1328 	"mov ebx, eax",\
1329 	"mov eax, esi",\
1330 	"mov esi, edx",\
1331 	"imul edi",\
1332 	"add eax, ebx",\
1333 	"adc edx, esi",\
1334 	"shrd eax, edx, 1",\
1335 	parm nomemory [eax][edx][esi][edi]\
1336 	modify exact [eax ebx edx esi]
1337 
1338 int dmulscale2(int,int,int,int);
1339 #pragma aux dmulscale2 =\
1340 	"imul edx",\
1341 	"mov ebx, eax",\
1342 	"mov eax, esi",\
1343 	"mov esi, edx",\
1344 	"imul edi",\
1345 	"add eax, ebx",\
1346 	"adc edx, esi",\
1347 	"shrd eax, edx, 2",\
1348 	parm nomemory [eax][edx][esi][edi]\
1349 	modify exact [eax ebx edx esi]
1350 
1351 int dmulscale3(int,int,int,int);
1352 #pragma aux dmulscale3 =\
1353 	"imul edx",\
1354 	"mov ebx, eax",\
1355 	"mov eax, esi",\
1356 	"mov esi, edx",\
1357 	"imul edi",\
1358 	"add eax, ebx",\
1359 	"adc edx, esi",\
1360 	"shrd eax, edx, 3",\
1361 	parm nomemory [eax][edx][esi][edi]\
1362 	modify exact [eax ebx edx esi]
1363 
1364 int dmulscale4(int,int,int,int);
1365 #pragma aux dmulscale4 =\
1366 	"imul edx",\
1367 	"mov ebx, eax",\
1368 	"mov eax, esi",\
1369 	"mov esi, edx",\
1370 	"imul edi",\
1371 	"add eax, ebx",\
1372 	"adc edx, esi",\
1373 	"shrd eax, edx, 4",\
1374 	parm nomemory [eax][edx][esi][edi]\
1375 	modify exact [eax ebx edx esi]
1376 
1377 int dmulscale5(int,int,int,int);
1378 #pragma aux dmulscale5 =\
1379 	"imul edx",\
1380 	"mov ebx, eax",\
1381 	"mov eax, esi",\
1382 	"mov esi, edx",\
1383 	"imul edi",\
1384 	"add eax, ebx",\
1385 	"adc edx, esi",\
1386 	"shrd eax, edx, 5",\
1387 	parm nomemory [eax][edx][esi][edi]\
1388 	modify exact [eax ebx edx esi]
1389 
1390 int dmulscale6(int,int,int,int);
1391 #pragma aux dmulscale6 =\
1392 	"imul edx",\
1393 	"mov ebx, eax",\
1394 	"mov eax, esi",\
1395 	"mov esi, edx",\
1396 	"imul edi",\
1397 	"add eax, ebx",\
1398 	"adc edx, esi",\
1399 	"shrd eax, edx, 6",\
1400 	parm nomemory [eax][edx][esi][edi]\
1401 	modify exact [eax ebx edx esi]
1402 
1403 int dmulscale7(int,int,int,int);
1404 #pragma aux dmulscale7 =\
1405 	"imul edx",\
1406 	"mov ebx, eax",\
1407 	"mov eax, esi",\
1408 	"mov esi, edx",\
1409 	"imul edi",\
1410 	"add eax, ebx",\
1411 	"adc edx, esi",\
1412 	"shrd eax, edx, 7",\
1413 	parm nomemory [eax][edx][esi][edi]\
1414 	modify exact [eax ebx edx esi]
1415 
1416 int dmulscale8(int,int,int,int);
1417 #pragma aux dmulscale8 =\
1418 	"imul edx",\
1419 	"mov ebx, eax",\
1420 	"mov eax, esi",\
1421 	"mov esi, edx",\
1422 	"imul edi",\
1423 	"add eax, ebx",\
1424 	"adc edx, esi",\
1425 	"shrd eax, edx, 8",\
1426 	parm nomemory [eax][edx][esi][edi]\
1427 	modify exact [eax ebx edx esi]
1428 
1429 int dmulscale9(int,int,int,int);
1430 #pragma aux dmulscale9 =\
1431 	"imul edx",\
1432 	"mov ebx, eax",\
1433 	"mov eax, esi",\
1434 	"mov esi, edx",\
1435 	"imul edi",\
1436 	"add eax, ebx",\
1437 	"adc edx, esi",\
1438 	"shrd eax, edx, 9",\
1439 	parm nomemory [eax][edx][esi][edi]\
1440 	modify exact [eax ebx edx esi]
1441 
1442 int dmulscale10(int,int,int,int);
1443 #pragma aux dmulscale10 =\
1444 	"imul edx",\
1445 	"mov ebx, eax",\
1446 	"mov eax, esi",\
1447 	"mov esi, edx",\
1448 	"imul edi",\
1449 	"add eax, ebx",\
1450 	"adc edx, esi",\
1451 	"shrd eax, edx, 10",\
1452 	parm nomemory [eax][edx][esi][edi]\
1453 	modify exact [eax ebx edx esi]
1454 
1455 int dmulscale11(int,int,int,int);
1456 #pragma aux dmulscale11 =\
1457 	"imul edx",\
1458 	"mov ebx, eax",\
1459 	"mov eax, esi",\
1460 	"mov esi, edx",\
1461 	"imul edi",\
1462 	"add eax, ebx",\
1463 	"adc edx, esi",\
1464 	"shrd eax, edx, 11",\
1465 	parm nomemory [eax][edx][esi][edi]\
1466 	modify exact [eax ebx edx esi]
1467 
1468 int dmulscale12(int,int,int,int);
1469 #pragma aux dmulscale12 =\
1470 	"imul edx",\
1471 	"mov ebx, eax",\
1472 	"mov eax, esi",\
1473 	"mov esi, edx",\
1474 	"imul edi",\
1475 	"add eax, ebx",\
1476 	"adc edx, esi",\
1477 	"shrd eax, edx, 12",\
1478 	parm nomemory [eax][edx][esi][edi]\
1479 	modify exact [eax ebx edx esi]
1480 
1481 int dmulscale13(int,int,int,int);
1482 #pragma aux dmulscale13 =\
1483 	"imul edx",\
1484 	"mov ebx, eax",\
1485 	"mov eax, esi",\
1486 	"mov esi, edx",\
1487 	"imul edi",\
1488 	"add eax, ebx",\
1489 	"adc edx, esi",\
1490 	"shrd eax, edx, 13",\
1491 	parm nomemory [eax][edx][esi][edi]\
1492 	modify exact [eax ebx edx esi]
1493 
1494 int dmulscale14(int,int,int,int);
1495 #pragma aux dmulscale14 =\
1496 	"imul edx",\
1497 	"mov ebx, eax",\
1498 	"mov eax, esi",\
1499 	"mov esi, edx",\
1500 	"imul edi",\
1501 	"add eax, ebx",\
1502 	"adc edx, esi",\
1503 	"shrd eax, edx, 14",\
1504 	parm nomemory [eax][edx][esi][edi]\
1505 	modify exact [eax ebx edx esi]
1506 
1507 int dmulscale15(int,int,int,int);
1508 #pragma aux dmulscale15 =\
1509 	"imul edx",\
1510 	"mov ebx, eax",\
1511 	"mov eax, esi",\
1512 	"mov esi, edx",\
1513 	"imul edi",\
1514 	"add eax, ebx",\
1515 	"adc edx, esi",\
1516 	"shrd eax, edx, 15",\
1517 	parm nomemory [eax][edx][esi][edi]\
1518 	modify exact [eax ebx edx esi]
1519 
1520 int dmulscale16(int,int,int,int);
1521 #pragma aux dmulscale16 =\
1522 	"imul edx",\
1523 	"mov ebx, eax",\
1524 	"mov eax, esi",\
1525 	"mov esi, edx",\
1526 	"imul edi",\
1527 	"add eax, ebx",\
1528 	"adc edx, esi",\
1529 	"shrd eax, edx, 16",\
1530 	parm nomemory [eax][edx][esi][edi]\
1531 	modify exact [eax ebx edx esi]
1532 
1533 int dmulscale17(int,int,int,int);
1534 #pragma aux dmulscale17 =\
1535 	"imul edx",\
1536 	"mov ebx, eax",\
1537 	"mov eax, esi",\
1538 	"mov esi, edx",\
1539 	"imul edi",\
1540 	"add eax, ebx",\
1541 	"adc edx, esi",\
1542 	"shrd eax, edx, 17",\
1543 	parm nomemory [eax][edx][esi][edi]\
1544 	modify exact [eax ebx edx esi]
1545 
1546 int dmulscale18(int,int,int,int);
1547 #pragma aux dmulscale18 =\
1548 	"imul edx",\
1549 	"mov ebx, eax",\
1550 	"mov eax, esi",\
1551 	"mov esi, edx",\
1552 	"imul edi",\
1553 	"add eax, ebx",\
1554 	"adc edx, esi",\
1555 	"shrd eax, edx, 18",\
1556 	parm nomemory [eax][edx][esi][edi]\
1557 	modify exact [eax ebx edx esi]
1558 
1559 int dmulscale19(int,int,int,int);
1560 #pragma aux dmulscale19 =\
1561 	"imul edx",\
1562 	"mov ebx, eax",\
1563 	"mov eax, esi",\
1564 	"mov esi, edx",\
1565 	"imul edi",\
1566 	"add eax, ebx",\
1567 	"adc edx, esi",\
1568 	"shrd eax, edx, 19",\
1569 	parm nomemory [eax][edx][esi][edi]\
1570 	modify exact [eax ebx edx esi]
1571 
1572 int dmulscale20(int,int,int,int);
1573 #pragma aux dmulscale20 =\
1574 	"imul edx",\
1575 	"mov ebx, eax",\
1576 	"mov eax, esi",\
1577 	"mov esi, edx",\
1578 	"imul edi",\
1579 	"add eax, ebx",\
1580 	"adc edx, esi",\
1581 	"shrd eax, edx, 20",\
1582 	parm nomemory [eax][edx][esi][edi]\
1583 	modify exact [eax ebx edx esi]
1584 
1585 int dmulscale21(int,int,int,int);
1586 #pragma aux dmulscale21 =\
1587 	"imul edx",\
1588 	"mov ebx, eax",\
1589 	"mov eax, esi",\
1590 	"mov esi, edx",\
1591 	"imul edi",\
1592 	"add eax, ebx",\
1593 	"adc edx, esi",\
1594 	"shrd eax, edx, 21",\
1595 	parm nomemory [eax][edx][esi][edi]\
1596 	modify exact [eax ebx edx esi]
1597 
1598 int dmulscale22(int,int,int,int);
1599 #pragma aux dmulscale22 =\
1600 	"imul edx",\
1601 	"mov ebx, eax",\
1602 	"mov eax, esi",\
1603 	"mov esi, edx",\
1604 	"imul edi",\
1605 	"add eax, ebx",\
1606 	"adc edx, esi",\
1607 	"shrd eax, edx, 22",\
1608 	parm nomemory [eax][edx][esi][edi]\
1609 	modify exact [eax ebx edx esi]
1610 
1611 int dmulscale23(int,int,int,int);
1612 #pragma aux dmulscale23 =\
1613 	"imul edx",\
1614 	"mov ebx, eax",\
1615 	"mov eax, esi",\
1616 	"mov esi, edx",\
1617 	"imul edi",\
1618 	"add eax, ebx",\
1619 	"adc edx, esi",\
1620 	"shrd eax, edx, 23",\
1621 	parm nomemory [eax][edx][esi][edi]\
1622 	modify exact [eax ebx edx esi]
1623 
1624 int dmulscale24(int,int,int,int);
1625 #pragma aux dmulscale24 =\
1626 	"imul edx",\
1627 	"mov ebx, eax",\
1628 	"mov eax, esi",\
1629 	"mov esi, edx",\
1630 	"imul edi",\
1631 	"add eax, ebx",\
1632 	"adc edx, esi",\
1633 	"shrd eax, edx, 24",\
1634 	parm nomemory [eax][edx][esi][edi]\
1635 	modify exact [eax ebx edx esi]
1636 
1637 int dmulscale25(int,int,int,int);
1638 #pragma aux dmulscale25 =\
1639 	"imul edx",\
1640 	"mov ebx, eax",\
1641 	"mov eax, esi",\
1642 	"mov esi, edx",\
1643 	"imul edi",\
1644 	"add eax, ebx",\
1645 	"adc edx, esi",\
1646 	"shrd eax, edx, 25",\
1647 	parm nomemory [eax][edx][esi][edi]\
1648 	modify exact [eax ebx edx esi]
1649 
1650 int dmulscale26(int,int,int,int);
1651 #pragma aux dmulscale26 =\
1652 	"imul edx",\
1653 	"mov ebx, eax",\
1654 	"mov eax, esi",\
1655 	"mov esi, edx",\
1656 	"imul edi",\
1657 	"add eax, ebx",\
1658 	"adc edx, esi",\
1659 	"shrd eax, edx, 26",\
1660 	parm nomemory [eax][edx][esi][edi]\
1661 	modify exact [eax ebx edx esi]
1662 
1663 int dmulscale27(int,int,int,int);
1664 #pragma aux dmulscale27 =\
1665 	"imul edx",\
1666 	"mov ebx, eax",\
1667 	"mov eax, esi",\
1668 	"mov esi, edx",\
1669 	"imul edi",\
1670 	"add eax, ebx",\
1671 	"adc edx, esi",\
1672 	"shrd eax, edx, 27",\
1673 	parm nomemory [eax][edx][esi][edi]\
1674 	modify exact [eax ebx edx esi]
1675 
1676 int dmulscale28(int,int,int,int);
1677 #pragma aux dmulscale28 =\
1678 	"imul edx",\
1679 	"mov ebx, eax",\
1680 	"mov eax, esi",\
1681 	"mov esi, edx",\
1682 	"imul edi",\
1683 	"add eax, ebx",\
1684 	"adc edx, esi",\
1685 	"shrd eax, edx, 28",\
1686 	parm nomemory [eax][edx][esi][edi]\
1687 	modify exact [eax ebx edx esi]
1688 
1689 int dmulscale29(int,int,int,int);
1690 #pragma aux dmulscale29 =\
1691 	"imul edx",\
1692 	"mov ebx, eax",\
1693 	"mov eax, esi",\
1694 	"mov esi, edx",\
1695 	"imul edi",\
1696 	"add eax, ebx",\
1697 	"adc edx, esi",\
1698 	"shrd eax, edx, 29",\
1699 	parm nomemory [eax][edx][esi][edi]\
1700 	modify exact [eax ebx edx esi]
1701 
1702 int dmulscale30(int,int,int,int);
1703 #pragma aux dmulscale30 =\
1704 	"imul edx",\
1705 	"mov ebx, eax",\
1706 	"mov eax, esi",\
1707 	"mov esi, edx",\
1708 	"imul edi",\
1709 	"add eax, ebx",\
1710 	"adc edx, esi",\
1711 	"shrd eax, edx, 30",\
1712 	parm nomemory [eax][edx][esi][edi]\
1713 	modify exact [eax ebx edx esi]
1714 
1715 int dmulscale31(int,int,int,int);
1716 #pragma aux dmulscale31 =\
1717 	"imul edx",\
1718 	"mov ebx, eax",\
1719 	"mov eax, esi",\
1720 	"mov esi, edx",\
1721 	"imul edi",\
1722 	"add eax, ebx",\
1723 	"adc edx, esi",\
1724 	"shrd eax, edx, 31",\
1725 	parm nomemory [eax][edx][esi][edi]\
1726 	modify exact [eax ebx edx esi]
1727 
1728 int dmulscale32(int,int,int,int);
1729 #pragma aux dmulscale32 =\
1730 	"imul edx",\
1731 	"mov ebx, eax",\
1732 	"mov eax, esi",\
1733 	"mov esi, edx",\
1734 	"imul edi",\
1735 	"add eax, ebx",\
1736 	"adc edx, esi",\
1737 	parm nomemory [eax][edx][esi][edi]\
1738 	modify exact [eax ebx edx esi]\
1739 	value [edx]
1740 
1741 int tmulscale1(int,int,int,int,int,int);
1742 #pragma aux tmulscale1 =\
1743 	"imul edx",\
1744 	"xchg eax, ebx",\
1745 	"xchg edx, ecx",\
1746 	"imul edx",\
1747 	"add ebx, eax",\
1748 	"adc ecx, edx",\
1749 	"mov eax, esi",\
1750 	"imul edi",\
1751 	"add eax, ebx",\
1752 	"adc edx, ecx",\
1753 	"shrd eax, edx, 1",\
1754 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1755 	modify exact [eax ebx ecx edx]
1756 
1757 int tmulscale2(int,int,int,int,int,int);
1758 #pragma aux tmulscale2 =\
1759 	"imul edx",\
1760 	"xchg eax, ebx",\
1761 	"xchg edx, ecx",\
1762 	"imul edx",\
1763 	"add ebx, eax",\
1764 	"adc ecx, edx",\
1765 	"mov eax, esi",\
1766 	"imul edi",\
1767 	"add eax, ebx",\
1768 	"adc edx, ecx",\
1769 	"shrd eax, edx, 2",\
1770 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1771 	modify exact [eax ebx ecx edx]
1772 
1773 int tmulscale3(int,int,int,int,int,int);
1774 #pragma aux tmulscale3 =\
1775 	"imul edx",\
1776 	"xchg eax, ebx",\
1777 	"xchg edx, ecx",\
1778 	"imul edx",\
1779 	"add ebx, eax",\
1780 	"adc ecx, edx",\
1781 	"mov eax, esi",\
1782 	"imul edi",\
1783 	"add eax, ebx",\
1784 	"adc edx, ecx",\
1785 	"shrd eax, edx, 3",\
1786 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1787 	modify exact [eax ebx ecx edx]
1788 
1789 int tmulscale4(int,int,int,int,int,int);
1790 #pragma aux tmulscale4 =\
1791 	"imul edx",\
1792 	"xchg eax, ebx",\
1793 	"xchg edx, ecx",\
1794 	"imul edx",\
1795 	"add ebx, eax",\
1796 	"adc ecx, edx",\
1797 	"mov eax, esi",\
1798 	"imul edi",\
1799 	"add eax, ebx",\
1800 	"adc edx, ecx",\
1801 	"shrd eax, edx, 4",\
1802 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1803 	modify exact [eax ebx ecx edx]
1804 
1805 int tmulscale5(int,int,int,int,int,int);
1806 #pragma aux tmulscale5 =\
1807 	"imul edx",\
1808 	"xchg eax, ebx",\
1809 	"xchg edx, ecx",\
1810 	"imul edx",\
1811 	"add ebx, eax",\
1812 	"adc ecx, edx",\
1813 	"mov eax, esi",\
1814 	"imul edi",\
1815 	"add eax, ebx",\
1816 	"adc edx, ecx",\
1817 	"shrd eax, edx, 5",\
1818 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1819 	modify exact [eax ebx ecx edx]
1820 
1821 int tmulscale6(int,int,int,int,int,int);
1822 #pragma aux tmulscale6 =\
1823 	"imul edx",\
1824 	"xchg eax, ebx",\
1825 	"xchg edx, ecx",\
1826 	"imul edx",\
1827 	"add ebx, eax",\
1828 	"adc ecx, edx",\
1829 	"mov eax, esi",\
1830 	"imul edi",\
1831 	"add eax, ebx",\
1832 	"adc edx, ecx",\
1833 	"shrd eax, edx, 6",\
1834 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1835 	modify exact [eax ebx ecx edx]
1836 
1837 int tmulscale7(int,int,int,int,int,int);
1838 #pragma aux tmulscale7 =\
1839 	"imul edx",\
1840 	"xchg eax, ebx",\
1841 	"xchg edx, ecx",\
1842 	"imul edx",\
1843 	"add ebx, eax",\
1844 	"adc ecx, edx",\
1845 	"mov eax, esi",\
1846 	"imul edi",\
1847 	"add eax, ebx",\
1848 	"adc edx, ecx",\
1849 	"shrd eax, edx, 7",\
1850 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1851 	modify exact [eax ebx ecx edx]
1852 
1853 int tmulscale8(int,int,int,int,int,int);
1854 #pragma aux tmulscale8 =\
1855 	"imul edx",\
1856 	"xchg eax, ebx",\
1857 	"xchg edx, ecx",\
1858 	"imul edx",\
1859 	"add ebx, eax",\
1860 	"adc ecx, edx",\
1861 	"mov eax, esi",\
1862 	"imul edi",\
1863 	"add eax, ebx",\
1864 	"adc edx, ecx",\
1865 	"shrd eax, edx, 8",\
1866 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1867 	modify exact [eax ebx ecx edx]
1868 
1869 int tmulscale9(int,int,int,int,int,int);
1870 #pragma aux tmulscale9 =\
1871 	"imul edx",\
1872 	"xchg eax, ebx",\
1873 	"xchg edx, ecx",\
1874 	"imul edx",\
1875 	"add ebx, eax",\
1876 	"adc ecx, edx",\
1877 	"mov eax, esi",\
1878 	"imul edi",\
1879 	"add eax, ebx",\
1880 	"adc edx, ecx",\
1881 	"shrd eax, edx, 9",\
1882 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1883 	modify exact [eax ebx ecx edx]
1884 
1885 int tmulscale10(int,int,int,int,int,int);
1886 #pragma aux tmulscale10 =\
1887 	"imul edx",\
1888 	"xchg eax, ebx",\
1889 	"xchg edx, ecx",\
1890 	"imul edx",\
1891 	"add ebx, eax",\
1892 	"adc ecx, edx",\
1893 	"mov eax, esi",\
1894 	"imul edi",\
1895 	"add eax, ebx",\
1896 	"adc edx, ecx",\
1897 	"shrd eax, edx, 10",\
1898 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1899 	modify exact [eax ebx ecx edx]
1900 
1901 int tmulscale11(int,int,int,int,int,int);
1902 #pragma aux tmulscale11 =\
1903 	"imul edx",\
1904 	"xchg eax, ebx",\
1905 	"xchg edx, ecx",\
1906 	"imul edx",\
1907 	"add ebx, eax",\
1908 	"adc ecx, edx",\
1909 	"mov eax, esi",\
1910 	"imul edi",\
1911 	"add eax, ebx",\
1912 	"adc edx, ecx",\
1913 	"shrd eax, edx, 11",\
1914 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1915 	modify exact [eax ebx ecx edx]
1916 
1917 int tmulscale12(int,int,int,int,int,int);
1918 #pragma aux tmulscale12 =\
1919 	"imul edx",\
1920 	"xchg eax, ebx",\
1921 	"xchg edx, ecx",\
1922 	"imul edx",\
1923 	"add ebx, eax",\
1924 	"adc ecx, edx",\
1925 	"mov eax, esi",\
1926 	"imul edi",\
1927 	"add eax, ebx",\
1928 	"adc edx, ecx",\
1929 	"shrd eax, edx, 12",\
1930 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1931 	modify exact [eax ebx ecx edx]
1932 
1933 int tmulscale13(int,int,int,int,int,int);
1934 #pragma aux tmulscale13 =\
1935 	"imul edx",\
1936 	"xchg eax, ebx",\
1937 	"xchg edx, ecx",\
1938 	"imul edx",\
1939 	"add ebx, eax",\
1940 	"adc ecx, edx",\
1941 	"mov eax, esi",\
1942 	"imul edi",\
1943 	"add eax, ebx",\
1944 	"adc edx, ecx",\
1945 	"shrd eax, edx, 13",\
1946 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1947 	modify exact [eax ebx ecx edx]
1948 
1949 int tmulscale14(int,int,int,int,int,int);
1950 #pragma aux tmulscale14 =\
1951 	"imul edx",\
1952 	"xchg eax, ebx",\
1953 	"xchg edx, ecx",\
1954 	"imul edx",\
1955 	"add ebx, eax",\
1956 	"adc ecx, edx",\
1957 	"mov eax, esi",\
1958 	"imul edi",\
1959 	"add eax, ebx",\
1960 	"adc edx, ecx",\
1961 	"shrd eax, edx, 14",\
1962 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1963 	modify exact [eax ebx ecx edx]
1964 
1965 int tmulscale15(int,int,int,int,int,int);
1966 #pragma aux tmulscale15 =\
1967 	"imul edx",\
1968 	"xchg eax, ebx",\
1969 	"xchg edx, ecx",\
1970 	"imul edx",\
1971 	"add ebx, eax",\
1972 	"adc ecx, edx",\
1973 	"mov eax, esi",\
1974 	"imul edi",\
1975 	"add eax, ebx",\
1976 	"adc edx, ecx",\
1977 	"shrd eax, edx, 15",\
1978 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1979 	modify exact [eax ebx ecx edx]
1980 
1981 int tmulscale16(int,int,int,int,int,int);
1982 #pragma aux tmulscale16 =\
1983 	"imul edx",\
1984 	"xchg eax, ebx",\
1985 	"xchg edx, ecx",\
1986 	"imul edx",\
1987 	"add ebx, eax",\
1988 	"adc ecx, edx",\
1989 	"mov eax, esi",\
1990 	"imul edi",\
1991 	"add eax, ebx",\
1992 	"adc edx, ecx",\
1993 	"shrd eax, edx, 16",\
1994 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1995 	modify exact [eax ebx ecx edx]
1996 
1997 int tmulscale17(int,int,int,int,int,int);
1998 #pragma aux tmulscale17 =\
1999 	"imul edx",\
2000 	"xchg eax, ebx",\
2001 	"xchg edx, ecx",\
2002 	"imul edx",\
2003 	"add ebx, eax",\
2004 	"adc ecx, edx",\
2005 	"mov eax, esi",\
2006 	"imul edi",\
2007 	"add eax, ebx",\
2008 	"adc edx, ecx",\
2009 	"shrd eax, edx, 17",\
2010 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2011 	modify exact [eax ebx ecx edx]
2012 
2013 int tmulscale18(int,int,int,int,int,int);
2014 #pragma aux tmulscale18 =\
2015 	"imul edx",\
2016 	"xchg eax, ebx",\
2017 	"xchg edx, ecx",\
2018 	"imul edx",\
2019 	"add ebx, eax",\
2020 	"adc ecx, edx",\
2021 	"mov eax, esi",\
2022 	"imul edi",\
2023 	"add eax, ebx",\
2024 	"adc edx, ecx",\
2025 	"shrd eax, edx, 18",\
2026 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2027 	modify exact [eax ebx ecx edx]
2028 
2029 int tmulscale19(int,int,int,int,int,int);
2030 #pragma aux tmulscale19 =\
2031 	"imul edx",\
2032 	"xchg eax, ebx",\
2033 	"xchg edx, ecx",\
2034 	"imul edx",\
2035 	"add ebx, eax",\
2036 	"adc ecx, edx",\
2037 	"mov eax, esi",\
2038 	"imul edi",\
2039 	"add eax, ebx",\
2040 	"adc edx, ecx",\
2041 	"shrd eax, edx, 19",\
2042 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2043 	modify exact [eax ebx ecx edx]
2044 
2045 int tmulscale20(int,int,int,int,int,int);
2046 #pragma aux tmulscale20 =\
2047 	"imul edx",\
2048 	"xchg eax, ebx",\
2049 	"xchg edx, ecx",\
2050 	"imul edx",\
2051 	"add ebx, eax",\
2052 	"adc ecx, edx",\
2053 	"mov eax, esi",\
2054 	"imul edi",\
2055 	"add eax, ebx",\
2056 	"adc edx, ecx",\
2057 	"shrd eax, edx, 20",\
2058 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2059 	modify exact [eax ebx ecx edx]
2060 
2061 int tmulscale21(int,int,int,int,int,int);
2062 #pragma aux tmulscale21 =\
2063 	"imul edx",\
2064 	"xchg eax, ebx",\
2065 	"xchg edx, ecx",\
2066 	"imul edx",\
2067 	"add ebx, eax",\
2068 	"adc ecx, edx",\
2069 	"mov eax, esi",\
2070 	"imul edi",\
2071 	"add eax, ebx",\
2072 	"adc edx, ecx",\
2073 	"shrd eax, edx, 21",\
2074 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2075 	modify exact [eax ebx ecx edx]
2076 
2077 int tmulscale22(int,int,int,int,int,int);
2078 #pragma aux tmulscale22 =\
2079 	"imul edx",\
2080 	"xchg eax, ebx",\
2081 	"xchg edx, ecx",\
2082 	"imul edx",\
2083 	"add ebx, eax",\
2084 	"adc ecx, edx",\
2085 	"mov eax, esi",\
2086 	"imul edi",\
2087 	"add eax, ebx",\
2088 	"adc edx, ecx",\
2089 	"shrd eax, edx, 22",\
2090 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2091 	modify exact [eax ebx ecx edx]
2092 
2093 int tmulscale23(int,int,int,int,int,int);
2094 #pragma aux tmulscale23 =\
2095 	"imul edx",\
2096 	"xchg eax, ebx",\
2097 	"xchg edx, ecx",\
2098 	"imul edx",\
2099 	"add ebx, eax",\
2100 	"adc ecx, edx",\
2101 	"mov eax, esi",\
2102 	"imul edi",\
2103 	"add eax, ebx",\
2104 	"adc edx, ecx",\
2105 	"shrd eax, edx, 23",\
2106 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2107 	modify exact [eax ebx ecx edx]
2108 
2109 int tmulscale24(int,int,int,int,int,int);
2110 #pragma aux tmulscale24 =\
2111 	"imul edx",\
2112 	"xchg eax, ebx",\
2113 	"xchg edx, ecx",\
2114 	"imul edx",\
2115 	"add ebx, eax",\
2116 	"adc ecx, edx",\
2117 	"mov eax, esi",\
2118 	"imul edi",\
2119 	"add eax, ebx",\
2120 	"adc edx, ecx",\
2121 	"shrd eax, edx, 24",\
2122 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2123 	modify exact [eax ebx ecx edx]
2124 
2125 int tmulscale25(int,int,int,int,int,int);
2126 #pragma aux tmulscale25 =\
2127 	"imul edx",\
2128 	"xchg eax, ebx",\
2129 	"xchg edx, ecx",\
2130 	"imul edx",\
2131 	"add ebx, eax",\
2132 	"adc ecx, edx",\
2133 	"mov eax, esi",\
2134 	"imul edi",\
2135 	"add eax, ebx",\
2136 	"adc edx, ecx",\
2137 	"shrd eax, edx, 25",\
2138 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2139 	modify exact [eax ebx ecx edx]
2140 
2141 int tmulscale26(int,int,int,int,int,int);
2142 #pragma aux tmulscale26 =\
2143 	"imul edx",\
2144 	"xchg eax, ebx",\
2145 	"xchg edx, ecx",\
2146 	"imul edx",\
2147 	"add ebx, eax",\
2148 	"adc ecx, edx",\
2149 	"mov eax, esi",\
2150 	"imul edi",\
2151 	"add eax, ebx",\
2152 	"adc edx, ecx",\
2153 	"shrd eax, edx, 26",\
2154 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2155 	modify exact [eax ebx ecx edx]
2156 
2157 int tmulscale27(int,int,int,int,int,int);
2158 #pragma aux tmulscale27 =\
2159 	"imul edx",\
2160 	"xchg eax, ebx",\
2161 	"xchg edx, ecx",\
2162 	"imul edx",\
2163 	"add ebx, eax",\
2164 	"adc ecx, edx",\
2165 	"mov eax, esi",\
2166 	"imul edi",\
2167 	"add eax, ebx",\
2168 	"adc edx, ecx",\
2169 	"shrd eax, edx, 27",\
2170 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2171 	modify exact [eax ebx ecx edx]
2172 
2173 int tmulscale28(int,int,int,int,int,int);
2174 #pragma aux tmulscale28 =\
2175 	"imul edx",\
2176 	"xchg eax, ebx",\
2177 	"xchg edx, ecx",\
2178 	"imul edx",\
2179 	"add ebx, eax",\
2180 	"adc ecx, edx",\
2181 	"mov eax, esi",\
2182 	"imul edi",\
2183 	"add eax, ebx",\
2184 	"adc edx, ecx",\
2185 	"shrd eax, edx, 28",\
2186 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2187 	modify exact [eax ebx ecx edx]
2188 
2189 int tmulscale29(int,int,int,int,int,int);
2190 #pragma aux tmulscale29 =\
2191 	"imul edx",\
2192 	"xchg eax, ebx",\
2193 	"xchg edx, ecx",\
2194 	"imul edx",\
2195 	"add ebx, eax",\
2196 	"adc ecx, edx",\
2197 	"mov eax, esi",\
2198 	"imul edi",\
2199 	"add eax, ebx",\
2200 	"adc edx, ecx",\
2201 	"shrd eax, edx, 29",\
2202 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2203 	modify exact [eax ebx ecx edx]
2204 
2205 int tmulscale30(int,int,int,int,int,int);
2206 #pragma aux tmulscale30 =\
2207 	"imul edx",\
2208 	"xchg eax, ebx",\
2209 	"xchg edx, ecx",\
2210 	"imul edx",\
2211 	"add ebx, eax",\
2212 	"adc ecx, edx",\
2213 	"mov eax, esi",\
2214 	"imul edi",\
2215 	"add eax, ebx",\
2216 	"adc edx, ecx",\
2217 	"shrd eax, edx, 30",\
2218 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2219 	modify exact [eax ebx ecx edx]
2220 
2221 int tmulscale31(int,int,int,int,int,int);
2222 #pragma aux tmulscale31 =\
2223 	"imul edx",\
2224 	"xchg eax, ebx",\
2225 	"xchg edx, ecx",\
2226 	"imul edx",\
2227 	"add ebx, eax",\
2228 	"adc ecx, edx",\
2229 	"mov eax, esi",\
2230 	"imul edi",\
2231 	"add eax, ebx",\
2232 	"adc edx, ecx",\
2233 	"shrd eax, edx, 31",\
2234 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2235 	modify exact [eax ebx ecx edx]
2236 
2237 int tmulscale32(int,int,int,int,int,int);
2238 #pragma aux tmulscale32 =\
2239 	"imul edx",\
2240 	"xchg eax, ebx",\
2241 	"xchg edx, ecx",\
2242 	"imul edx",\
2243 	"add ebx, eax",\
2244 	"adc ecx, edx",\
2245 	"mov eax, esi",\
2246 	"imul edi",\
2247 	"add eax, ebx",\
2248 	"adc edx, ecx",\
2249 	parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2250 	modify exact [eax ebx ecx edx]\
2251 	value [edx]
2252 
2253 int boundmulscale(int,int,int);
2254 #pragma aux boundmulscale =\
2255 	"imul ebx",\
2256 	"mov ebx, edx",\
2257 	"shrd eax, edx, cl",\
2258 	"sar edx, cl",\
2259 	"xor edx, eax",\
2260 	"js checkit",\
2261 	"xor edx, eax",\
2262 	"jz skipboundit",\
2263 	"cmp edx, 0xffffffff",\
2264 	"je skipboundit",\
2265 	"checkit:",\
2266 	"mov eax, ebx",\
2267 	"sar eax, 31",\
2268 	"xor eax, 0x7fffffff",\
2269 	"skipboundit:",\
2270 	parm nomemory [eax][ebx][ecx]\
2271 	modify exact [eax ebx edx]
2272 
2273 int divscale(int,int,int);
2274 #pragma aux divscale =\
2275 	"mov edx, eax",\
2276 	"shl eax, cl",\
2277 	"neg cl",\
2278 	"sar edx, cl",\
2279 	"idiv ebx",\
2280 	parm nomemory [eax][ebx][ecx]\
2281 	modify exact [eax ecx edx]
2282 
2283 int divscale1(int,int);
2284 #pragma aux divscale1 =\
2285 	"add eax, eax",\
2286 	"sbb edx, edx",\
2287 	"idiv ebx",\
2288 	parm nomemory [eax][ebx]\
2289 	modify exact [eax edx]
2290 
2291 int divscale2(int,int);
2292 #pragma aux divscale2 =\
2293 	"mov edx, eax",\
2294 	"sar edx, 30",\
2295 	"lea eax, [eax*4]",\
2296 	"idiv ebx",\
2297 	parm nomemory [eax][ebx]\
2298 	modify exact [eax edx]
2299 
2300 int divscale3(int,int);
2301 #pragma aux divscale3 =\
2302 	"mov edx, eax",\
2303 	"sar edx, 29",\
2304 	"lea eax, [eax*8]",\
2305 	"idiv ebx",\
2306 	parm nomemory [eax][ebx]\
2307 	modify exact [eax edx]
2308 
2309 int divscale4(int,int);
2310 #pragma aux divscale4 =\
2311 	"mov edx, eax",\
2312 	"sar edx, 28",\
2313 	"shl eax, 4",\
2314 	"idiv ebx",\
2315 	parm nomemory [eax][ebx]\
2316 	modify exact [eax edx]
2317 
2318 int divscale5(int,int);
2319 #pragma aux divscale5 =\
2320 	"mov edx, eax",\
2321 	"sar edx, 27",\
2322 	"shl eax, 5",\
2323 	"idiv ebx",\
2324 	parm nomemory [eax][ebx]\
2325 	modify exact [eax edx]
2326 
2327 int divscale6(int,int);
2328 #pragma aux divscale6 =\
2329 	"mov edx, eax",\
2330 	"sar edx, 26",\
2331 	"shl eax, 6",\
2332 	"idiv ebx",\
2333 	parm nomemory [eax][ebx]\
2334 	modify exact [eax edx]
2335 
2336 int divscale7(int,int);
2337 #pragma aux divscale7 =\
2338 	"mov edx, eax",\
2339 	"sar edx, 25",\
2340 	"shl eax, 7",\
2341 	"idiv ebx",\
2342 	parm nomemory [eax][ebx]\
2343 	modify exact [eax edx]
2344 
2345 int divscale8(int,int);
2346 #pragma aux divscale8 =\
2347 	"mov edx, eax",\
2348 	"sar edx, 24",\
2349 	"shl eax, 8",\
2350 	"idiv ebx",\
2351 	parm nomemory [eax][ebx]\
2352 	modify exact [eax edx]
2353 
2354 int divscale9(int,int);
2355 #pragma aux divscale9 =\
2356 	"mov edx, eax",\
2357 	"sar edx, 23",\
2358 	"shl eax, 9",\
2359 	"idiv ebx",\
2360 	parm nomemory [eax][ebx]\
2361 	modify exact [eax edx]
2362 
2363 int divscale10(int,int);
2364 #pragma aux divscale10 =\
2365 	"mov edx, eax",\
2366 	"sar edx, 22",\
2367 	"shl eax, 10",\
2368 	"idiv ebx",\
2369 	parm nomemory [eax][ebx]\
2370 	modify exact [eax edx]
2371 
2372 int divscale11(int,int);
2373 #pragma aux divscale11 =\
2374 	"mov edx, eax",\
2375 	"sar edx, 21",\
2376 	"shl eax, 11",\
2377 	"idiv ebx",\
2378 	parm nomemory [eax][ebx]\
2379 	modify exact [eax edx]
2380 
2381 int divscale12(int,int);
2382 #pragma aux divscale12 =\
2383 	"mov edx, eax",\
2384 	"sar edx, 20",\
2385 	"shl eax, 12",\
2386 	"idiv ebx",\
2387 	parm nomemory [eax][ebx]\
2388 	modify exact [eax edx]
2389 
2390 int divscale13(int,int);
2391 #pragma aux divscale13 =\
2392 	"mov edx, eax",\
2393 	"sar edx, 19",\
2394 	"shl eax, 13",\
2395 	"idiv ebx",\
2396 	parm nomemory [eax][ebx]\
2397 	modify exact [eax edx]
2398 
2399 int divscale14(int,int);
2400 #pragma aux divscale14 =\
2401 	"mov edx, eax",\
2402 	"sar edx, 18",\
2403 	"shl eax, 14",\
2404 	"idiv ebx",\
2405 	parm nomemory [eax][ebx]\
2406 	modify exact [eax edx]
2407 
2408 int divscale15(int,int);
2409 #pragma aux divscale15 =\
2410 	"mov edx, eax",\
2411 	"sar edx, 17",\
2412 	"shl eax, 15",\
2413 	"idiv ebx",\
2414 	parm nomemory [eax][ebx]\
2415 	modify exact [eax edx]
2416 
2417 int divscale16(int,int);
2418 #pragma aux divscale16 =\
2419 	"mov edx, eax",\
2420 	"sar edx, 16",\
2421 	"shl eax, 16",\
2422 	"idiv ebx",\
2423 	parm nomemory [eax][ebx]\
2424 	modify exact [eax edx]
2425 
2426 int divscale17(int,int);
2427 #pragma aux divscale17 =\
2428 	"mov edx, eax",\
2429 	"sar edx, 15",\
2430 	"shl eax, 17",\
2431 	"idiv ebx",\
2432 	parm nomemory [eax][ebx]\
2433 	modify exact [eax edx]
2434 
2435 int divscale18(int,int);
2436 #pragma aux divscale18 =\
2437 	"mov edx, eax",\
2438 	"sar edx, 14",\
2439 	"shl eax, 18",\
2440 	"idiv ebx",\
2441 	parm nomemory [eax][ebx]\
2442 	modify exact [eax edx]
2443 
2444 int divscale19(int,int);
2445 #pragma aux divscale19 =\
2446 	"mov edx, eax",\
2447 	"sar edx, 13",\
2448 	"shl eax, 19",\
2449 	"idiv ebx",\
2450 	parm nomemory [eax][ebx]\
2451 	modify exact [eax edx]
2452 
2453 int divscale20(int,int);
2454 #pragma aux divscale20 =\
2455 	"mov edx, eax",\
2456 	"sar edx, 12",\
2457 	"shl eax, 20",\
2458 	"idiv ebx",\
2459 	parm nomemory [eax][ebx]\
2460 	modify exact [eax edx]
2461 
2462 int divscale21(int,int);
2463 #pragma aux divscale21 =\
2464 	"mov edx, eax",\
2465 	"sar edx, 11",\
2466 	"shl eax, 21",\
2467 	"idiv ebx",\
2468 	parm nomemory [eax][ebx]\
2469 	modify exact [eax edx]
2470 
2471 int divscale22(int,int);
2472 #pragma aux divscale22 =\
2473 	"mov edx, eax",\
2474 	"sar edx, 10",\
2475 	"shl eax, 22",\
2476 	"idiv ebx",\
2477 	parm nomemory [eax][ebx]\
2478 	modify exact [eax edx]
2479 
2480 int divscale23(int,int);
2481 #pragma aux divscale23 =\
2482 	"mov edx, eax",\
2483 	"sar edx, 9",\
2484 	"shl eax, 23",\
2485 	"idiv ebx",\
2486 	parm nomemory [eax][ebx]\
2487 	modify exact [eax edx]
2488 
2489 int divscale24(int,int);
2490 #pragma aux divscale24 =\
2491 	"mov edx, eax",\
2492 	"sar edx, 8",\
2493 	"shl eax, 24",\
2494 	"idiv ebx",\
2495 	parm nomemory [eax][ebx]\
2496 	modify exact [eax edx]
2497 
2498 int divscale25(int,int);
2499 #pragma aux divscale25 =\
2500 	"mov edx, eax",\
2501 	"sar edx, 7",\
2502 	"shl eax, 25",\
2503 	"idiv ebx",\
2504 	parm nomemory [eax][ebx]\
2505 	modify exact [eax edx]
2506 
2507 int divscale26(int,int);
2508 #pragma aux divscale26 =\
2509 	"mov edx, eax",\
2510 	"sar edx, 6",\
2511 	"shl eax, 26",\
2512 	"idiv ebx",\
2513 	parm nomemory [eax][ebx]\
2514 	modify exact [eax edx]
2515 
2516 int divscale27(int,int);
2517 #pragma aux divscale27 =\
2518 	"mov edx, eax",\
2519 	"sar edx, 5",\
2520 	"shl eax, 27",\
2521 	"idiv ebx",\
2522 	parm nomemory [eax][ebx]\
2523 	modify exact [eax edx]
2524 
2525 int divscale28(int,int);
2526 #pragma aux divscale28 =\
2527 	"mov edx, eax",\
2528 	"sar edx, 4",\
2529 	"shl eax, 28",\
2530 	"idiv ebx",\
2531 	parm nomemory [eax][ebx]\
2532 	modify exact [eax edx]
2533 
2534 int divscale29(int,int);
2535 #pragma aux divscale29 =\
2536 	"mov edx, eax",\
2537 	"sar edx, 3",\
2538 	"shl eax, 29",\
2539 	"idiv ebx",\
2540 	parm nomemory [eax][ebx]\
2541 	modify exact [eax edx]
2542 
2543 int divscale30(int,int);
2544 #pragma aux divscale30 =\
2545 	"mov edx, eax",\
2546 	"sar edx, 2",\
2547 	"shl eax, 30",\
2548 	"idiv ebx",\
2549 	parm nomemory [eax][ebx]\
2550 	modify exact [eax edx]
2551 
2552 int divscale31(int,int);
2553 #pragma aux divscale31 =\
2554 	"mov edx, eax",\
2555 	"sar edx, 1",\
2556 	"shl eax, 31",\
2557 	"idiv ebx",\
2558 	parm nomemory [eax][ebx]\
2559 	modify exact [eax edx]
2560 
2561 int divscale32(int,int);
2562 #pragma aux divscale32 =\
2563 	"xor eax, eax",\
2564 	"idiv ebx",\
2565 	parm nomemory [edx][ebx]\
2566 	modify exact [eax edx]
2567 
2568 int readpixel(void*);
2569 #pragma aux readpixel =\
2570 	"mov al, byte ptr [edi]",\
2571 	parm nomemory [edi]\
2572 	modify exact [eax]
2573 
2574 int drawpixel(void*,int);
2575 #pragma aux drawpixel =\
2576 	"mov byte ptr [edi], al",\
2577 	parm [edi][eax]\
2578 	modify exact
2579 
2580 int drawpixels(void*,int);
2581 #pragma aux drawpixels =\
2582 	"mov word ptr [edi], ax",\
2583 	parm [edi][eax]\
2584 	modify exact
2585 
2586 int drawpixelses(void*,int);
2587 #pragma aux drawpixelses =\
2588 	"mov dword ptr [edi], eax",\
2589 	parm [edi][eax]\
2590 	modify exact
2591 
2592 int clearbuf(void*,int,int);
2593 #pragma aux clearbuf =\
2594 	"rep stosd",\
2595 	parm [edi][ecx][eax]\
2596 	modify exact [edi ecx]
2597 
2598 int clearbufbyte(void*,int,int);
2599 #pragma aux clearbufbyte =\
2600 	"cmp ecx, 4",\
2601 	"jae longcopy",\
2602 	"test cl, 1",\
2603 	"jz preskip",\
2604 	"stosb",\
2605 	"preskip: shr ecx, 1",\
2606 	"rep stosw",\
2607 	"jmp endit",\
2608 	"longcopy: test edi, 1",\
2609 	"jz skip1",\
2610 	"stosb",\
2611 	"dec ecx",\
2612 	"skip1: test edi, 2",\
2613 	"jz skip2",\
2614 	"stosw",\
2615 	"sub ecx, 2",\
2616 	"skip2: mov ebx, ecx",\
2617 	"shr ecx, 2",\
2618 	"rep stosd",\
2619 	"test bl, 2",\
2620 	"jz skip3",\
2621 	"stosw",\
2622 	"skip3: test bl, 1",\
2623 	"jz endit",\
2624 	"stosb",\
2625 	"endit:",\
2626 	parm [edi][ecx][eax]\
2627 	modify [ebx]
2628 
2629 int copybuf(void*,void*,int);
2630 #pragma aux copybuf =\
2631 	"rep movsd",\
2632 	parm [esi][edi][ecx]\
2633 	modify exact [ecx esi edi]
2634 
2635 int copybufbyte(void*,void*,int);
2636 #pragma aux copybufbyte =\
2637 	"cmp ecx, 4",\
2638 	"jae longcopy",\
2639 	"test cl, 1",\
2640 	"jz preskip",\
2641 	"movsb",\
2642 	"preskip: shr ecx, 1",\
2643 	"rep movsw",\
2644 	"jmp endit",\
2645 	"longcopy: test edi, 1",\
2646 	"jz skip1",\
2647 	"movsb",\
2648 	"dec ecx",\
2649 	"skip1: test edi, 2",\
2650 	"jz skip2",\
2651 	"movsw",\
2652 	"sub ecx, 2",\
2653 	"skip2: mov ebx, ecx",\
2654 	"shr ecx, 2",\
2655 	"rep movsd",\
2656 	"test bl, 2",\
2657 	"jz skip3",\
2658 	"movsw",\
2659 	"skip3: test bl, 1",\
2660 	"jz endit",\
2661 	"movsb",\
2662 	"endit:",\
2663 	parm [esi][edi][ecx]\
2664 	modify [ebx]
2665 
2666 int copybufreverse(void*,void*,int);
2667 #pragma aux copybufreverse =\
2668 	"shr ecx, 1",\
2669 	"jnc skipit1",\
2670 	"mov al, byte ptr [esi]",\
2671 	"dec esi",\
2672 	"mov byte ptr [edi], al",\
2673 	"inc edi",\
2674 	"skipit1: shr ecx, 1",\
2675 	"jnc skipit2",\
2676 	"mov ax, word ptr [esi-1]",\
2677 	"sub esi, 2",\
2678 	"ror ax, 8",\
2679 	"mov word ptr [edi], ax",\
2680 	"add edi, 2",\
2681 	"skipit2: test ecx, ecx",\
2682 	"jz endloop",\
2683 	"begloop: mov eax, dword ptr [esi-3]",\
2684 	"sub esi, 4",\
2685 	"bswap eax",\
2686 	"mov dword ptr [edi], eax",\
2687 	"add edi, 4",\
2688 	"dec ecx",\
2689 	"jnz begloop",\
2690 	"endloop:",\
2691 	parm [esi][edi][ecx]
2692 
2693 int qinterpolatedown16(int,int,int,int);
2694 #pragma aux qinterpolatedown16 =\
2695 	"mov ebx, ecx",\
2696 	"shr ecx, 1",\
2697 	"jz skipbegcalc",\
2698 	"begqcalc: lea edi, [edx+esi]",\
2699 	"sar edx, 16",\
2700 	"mov dword ptr [eax], edx",\
2701 	"lea edx, [edi+esi]",\
2702 	"sar edi, 16",\
2703 	"mov dword ptr [eax+4], edi",\
2704 	"add eax, 8",\
2705 	"dec ecx",\
2706 	"jnz begqcalc",\
2707 	"test ebx, 1",\
2708 	"jz skipbegqcalc2",\
2709 	"skipbegcalc: sar edx, 16",\
2710 	"mov dword ptr [eax], edx",\
2711 	"skipbegqcalc2:",\
2712 	parm [eax][ecx][edx][esi]\
2713 	modify exact [eax ebx ecx edx edi]
2714 
2715 int qinterpolatedown16short(int,int,int,int);
2716 #pragma aux qinterpolatedown16short =\
2717 	"test ecx, ecx",\
2718 	"jz endit",\
2719 	"test al, 2",\
2720 	"jz skipalignit",\
2721 	"mov ebx, edx",\
2722 	"sar ebx, 16",\
2723 	"mov word ptr [eax], bx",\
2724 	"add edx, esi",\
2725 	"add eax, 2",\
2726 	"dec ecx",\
2727 	"jz endit",\
2728 	"skipalignit: sub ecx, 2",\
2729 	"jc finishit",\
2730 	"begqcalc: mov ebx, edx",\
2731 	"add edx, esi",\
2732 	"sar ebx, 16",\
2733 	"mov edi, edx",\
2734 	"and edi, 0ffff0000h",\
2735 	"add edx, esi",\
2736 	"add ebx, edi",\
2737 	"mov dword ptr [eax], ebx",\
2738 	"add eax, 4",\
2739 	"sub ecx, 2",\
2740 	"jnc begqcalc",\
2741 	"test cl, 1",\
2742 	"jz endit",\
2743 	"finishit: mov ebx, edx",\
2744 	"sar ebx, 16",\
2745 	"mov word ptr [eax], bx",\
2746 	"endit:",\
2747 	parm [eax][ecx][edx][esi]\
2748 	modify exact [eax ebx ecx edx edi]
2749 
2750 int mul3(int);
2751 #pragma aux mul3 =\
2752 	"lea eax, [eax+eax*2]",\
2753 	parm nomemory [eax]
2754 
2755 int mul5(int);
2756 #pragma aux mul5 =\
2757 	"lea eax, [eax+eax*4]",\
2758 	parm nomemory [eax]
2759 
2760 int mul9(int);
2761 #pragma aux mul9 =\
2762 	"lea eax, [eax+eax*8]",\
2763 	parm nomemory [eax]
2764 
2765 	//returns eax/ebx, dmval = eax%edx;
2766 int divmod(int,int);
2767 #pragma aux divmod =\
2768 	"xor edx, edx",\
2769 	"div ebx",\
2770 	"mov dmval, edx",\
2771 	parm [eax][ebx]\
2772 	modify exact [eax edx]\
2773 	value [eax]
2774 
2775 	//returns eax%ebx, dmval = eax/edx;
2776 int moddiv(int,int);
2777 #pragma aux moddiv =\
2778 	"xor edx, edx",\
2779 	"div ebx",\
2780 	"mov dmval, eax",\
2781 	parm [eax][ebx]\
2782 	modify exact [eax edx]\
2783 	value [edx]
2784 
2785 int klabs(int);
2786 #pragma aux klabs =\
2787 	"test eax, eax",\
2788 	"jns skipnegate",\
2789 	"neg eax",\
2790 	"skipnegate:",\
2791 	parm nomemory [eax]
2792 
2793 int ksgn(int);
2794 #pragma aux ksgn =\
2795 	"add ebx, ebx",\
2796 	"sbb eax, eax",\
2797 	"cmp eax, ebx",\
2798 	"adc al, 0",\
2799 	parm nomemory [ebx]\
2800 	modify exact [eax ebx]
2801 
2802 	//eax = (unsigned min)umin(eax,ebx)
2803 int umin(int,int);
2804 #pragma aux umin =\
2805 	"sub eax, ebx",\
2806 	"sbb ecx, ecx",\
2807 	"and eax, ecx",\
2808 	"add eax, ebx",\
2809 	parm nomemory [eax][ebx]\
2810 	modify exact [eax ecx]
2811 
2812 	//eax = (unsigned max)umax(eax,ebx)
2813 int umax(int,int);
2814 #pragma aux umax =\
2815 	"sub eax, ebx",\
2816 	"sbb ecx, ecx",\
2817 	"xor ecx, 0xffffffff",\
2818 	"and eax, ecx",\
2819 	"add eax, ebx",\
2820 	parm nomemory [eax][ebx]\
2821 	modify exact [eax ecx]
2822 
2823 int kmin(int,int);
2824 #pragma aux kmin =\
2825 	"cmp eax, ebx",\
2826 	"jl skipit",\
2827 	"mov eax, ebx",\
2828 	"skipit:",\
2829 	parm nomemory [eax][ebx]\
2830 	modify exact [eax]
2831 
2832 int kmax(int,int);
2833 #pragma aux kmax =\
2834 	"cmp eax, ebx",\
2835 	"jg skipit",\
2836 	"mov eax, ebx",\
2837 	"skipit:",\
2838 	parm nomemory [eax][ebx]\
2839 	modify exact [eax]
2840 
2841 int swapchar(void*,void*);
2842 #pragma aux swapchar =\
2843 	"mov cl, [eax]",\
2844 	"mov ch, [ebx]",\
2845 	"mov [ebx], cl",\
2846 	"mov [eax], ch",\
2847 	parm [eax][ebx]\
2848 	modify exact [ecx]
2849 
2850 int swapshort(void*,void*);
2851 #pragma aux swapshort =\
2852 	"mov cx, [eax]",\
2853 	"mov dx, [ebx]",\
2854 	"mov [ebx], cx",\
2855 	"mov [eax], dx",\
2856 	parm [eax][ebx]\
2857 	modify exact [ecx edx]
2858 
2859 int swaplong(void*,void*);
2860 #pragma aux swaplong =\
2861 	"mov ecx, [eax]",\
2862 	"mov edx, [ebx]",\
2863 	"mov [ebx], ecx",\
2864 	"mov [eax], edx",\
2865 	parm [eax][ebx]\
2866 	modify exact [ecx edx]
2867 
2868 int swapbuf4(void*,void*,int);
2869 #pragma aux swapbuf4 =\
2870 	"begswap:",\
2871 	"mov esi, [eax]",\
2872 	"mov edi, [ebx]",\
2873 	"mov [ebx], esi",\
2874 	"mov [eax], edi",\
2875 	"add eax, 4",\
2876 	"add ebx, 4",\
2877 	"dec ecx",\
2878 	"jnz short begswap",\
2879 	parm [eax][ebx][ecx]\
2880 	modify exact [eax ebx ecx esi edi]
2881 
2882 int swap64bit(void*,void*);
2883 #pragma aux swap64bit =\
2884 	"mov ecx, [eax]",\
2885 	"mov edx, [ebx]",\
2886 	"mov [ebx], ecx",\
2887 	"mov ecx, [eax+4]",\
2888 	"mov [eax], edx",\
2889 	"mov edx, [ebx+4]",\
2890 	"mov [ebx+4], ecx",\
2891 	"mov [eax+4], edx",\
2892 	parm [eax][ebx]\
2893 	modify exact [ecx edx]
2894 
2895 	//swapchar2(ptr1,ptr2,xsiz); is the same as:
2896 	//swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz);
2897 int swapchar2(void*,void*,int);
2898 #pragma aux swapchar2 =\
2899 	"add esi, ebx",\
2900 	"mov cx, [eax]",\
2901 	"mov dl, [ebx]",\
2902 	"mov [ebx], cl",\
2903 	"mov dh, [esi]",\
2904 	"mov [esi], ch",\
2905 	"mov [eax], dx",\
2906 	parm [eax][ebx][esi]\
2907 	modify exact [ecx edx esi]
2908 //}}}
2909 
2910 #elif defined(_MSC_VER) && defined(_M_IX86) && USE_ASM	// __WATCOMC__
2911 
2912 //
2913 // Microsoft C inline assembler
2914 //
2915 
2916 //{{{
sqr(int a)2917 static __inline int sqr(int a)
2918 {
2919 	_asm {
2920 		mov eax, a
2921 		imul eax, eax
2922 	}
2923 }
2924 
scale(int a,int d,int c)2925 static __inline int scale(int a, int d, int c)
2926 {
2927 	_asm {
2928 		mov eax, a
2929 		imul d
2930 		idiv c
2931 	}
2932 }
2933 
mulscale(int a,int d,int c)2934 static __inline int mulscale(int a, int d, int c)
2935 {
2936 	_asm {
2937 		mov ecx, c
2938 		mov eax, a
2939 		imul d
2940 		shrd eax, edx, cl
2941 	}
2942 }
2943 
2944 #define MULSCALE(x) \
2945 static __inline int mulscale##x (int a, int d) \
2946 { \
2947 	_asm mov eax, a \
2948 	_asm imul d \
2949 	_asm shrd eax, edx, x \
2950 }
2951 
2952 MULSCALE(1)	MULSCALE(2)	MULSCALE(3)	MULSCALE(4)
2953 MULSCALE(5)	MULSCALE(6)	MULSCALE(7)	MULSCALE(8)
2954 MULSCALE(9)	MULSCALE(10)	MULSCALE(11)	MULSCALE(12)
2955 MULSCALE(13)	MULSCALE(14)	MULSCALE(15)	MULSCALE(16)
2956 MULSCALE(17)	MULSCALE(18)	MULSCALE(19)	MULSCALE(20)
2957 MULSCALE(21)	MULSCALE(22)	MULSCALE(23)	MULSCALE(24)
2958 MULSCALE(25)	MULSCALE(26)	MULSCALE(27)	MULSCALE(28)
2959 MULSCALE(29)	MULSCALE(30)	MULSCALE(31)
2960 #undef MULSCALE
mulscale32(int a,int d)2961 static __inline int mulscale32(int a, int d)
2962 {
2963 	_asm {
2964 		mov eax, a
2965 		imul d
2966 		mov eax, edx
2967 	}
2968 }
2969 
dmulscale(int a,int d,int S,int D,int c)2970 static __inline int dmulscale(int a, int d, int S, int D, int c)
2971 {
2972 	_asm {
2973 		mov ecx, c
2974 		mov eax, a
2975 		imul d
2976 		mov ebx, eax
2977 		mov eax, S
2978 		mov esi, edx
2979 		imul D
2980 		add eax, ebx
2981 		adc edx, esi
2982 		shrd eax, edx, cl
2983 	}
2984 }
2985 
2986 #define DMULSCALE(x) \
2987 static __inline int dmulscale##x (int a, int d, int S, int D) \
2988 { \
2989 	_asm mov eax, a \
2990 	_asm imul d \
2991 	_asm mov ebx, eax \
2992 	_asm mov eax, S \
2993 	_asm mov esi, edx \
2994 	_asm imul D \
2995 	_asm add eax, ebx \
2996 	_asm adc edx, esi \
2997 	_asm shrd eax, edx, x \
2998 }
2999 
3000 DMULSCALE(1)	DMULSCALE(2)	DMULSCALE(3)	DMULSCALE(4)
3001 DMULSCALE(5)	DMULSCALE(6)	DMULSCALE(7)	DMULSCALE(8)
3002 DMULSCALE(9)	DMULSCALE(10)	DMULSCALE(11)	DMULSCALE(12)
3003 DMULSCALE(13)	DMULSCALE(14)	DMULSCALE(15)	DMULSCALE(16)
3004 DMULSCALE(17)	DMULSCALE(18)	DMULSCALE(19)	DMULSCALE(20)
3005 DMULSCALE(21)	DMULSCALE(22)	DMULSCALE(23)	DMULSCALE(24)
3006 DMULSCALE(25)	DMULSCALE(26)	DMULSCALE(27)	DMULSCALE(28)
3007 DMULSCALE(29)	DMULSCALE(30)	DMULSCALE(31)
3008 #undef DMULSCALE
dmulscale32(int a,int d,int S,int D)3009 static __inline int dmulscale32(int a, int d, int S, int D)
3010 {
3011 	_asm {
3012 		mov eax, a
3013 		imul d
3014 		mov ebx, eax
3015 		mov eax, S
3016 		mov esi, edx
3017 		imul D
3018 		add eax, ebx
3019 		adc edx, esi
3020 		mov eax, edx
3021 	}
3022 }
3023 
3024 #define TMULSCALE(x) \
3025 static __inline int tmulscale##x (int a, int d, int b, int c, int S, int D) \
3026 { \
3027 	_asm mov eax, a \
3028 	_asm mov ebx, b \
3029 	_asm imul d \
3030 	_asm xchg eax, ebx \
3031 	_asm mov ecx, c \
3032 	_asm xchg edx, ecx \
3033 	_asm imul edx \
3034 	_asm add ebx, eax \
3035 	_asm adc ecx, edx \
3036 	_asm mov eax, S \
3037 	_asm imul D \
3038 	_asm add eax, ebx \
3039 	_asm adc edx, ecx \
3040 	_asm shrd eax, edx, x \
3041 }
3042 
3043 TMULSCALE(1)	TMULSCALE(2)	TMULSCALE(3)	TMULSCALE(4)
3044 TMULSCALE(5)	TMULSCALE(6)	TMULSCALE(7)	TMULSCALE(8)
3045 TMULSCALE(9)	TMULSCALE(10)	TMULSCALE(11)	TMULSCALE(12)
3046 TMULSCALE(13)	TMULSCALE(14)	TMULSCALE(15)	TMULSCALE(16)
3047 TMULSCALE(17)	TMULSCALE(18)	TMULSCALE(19)	TMULSCALE(20)
3048 TMULSCALE(21)	TMULSCALE(22)	TMULSCALE(23)	TMULSCALE(24)
3049 TMULSCALE(25)	TMULSCALE(26)	TMULSCALE(27)	TMULSCALE(28)
3050 TMULSCALE(29)	TMULSCALE(30)	TMULSCALE(31)
3051 #undef TMULSCALE
tmulscale32(int a,int d,int b,int c,int S,int D)3052 static __inline int tmulscale32(int a, int d, int b, int c, int S, int D)
3053 {
3054 	_asm {
3055 		mov eax, a
3056 		mov ebx, b
3057 		imul d
3058 		xchg eax, ebx
3059 		mov ecx, c
3060 		xchg edx, ecx
3061 		imul edx
3062 		add ebx, eax
3063 		adc ecx, edx
3064 		mov eax, S
3065 		imul D
3066 		add eax, ebx
3067 		adc edx, ecx
3068 		mov eax, edx
3069 	}
3070 }
3071 
boundmulscale(int a,int b,int c)3072 static __inline int boundmulscale(int a, int b, int c)
3073 {
3074 	_asm {
3075 		mov eax, a
3076 		mov ecx, c
3077 		imul b
3078 		mov ebx, edx
3079 		shrd eax, edx, cl
3080 		sar edx, cl
3081 		xor edx, eax
3082 		js checkit
3083 		xor edx, eax
3084 		jz skipboundit
3085 		cmp edx, 0xffffffff
3086 		je skipboundit
3087 	checkit:
3088 		mov eax, ebx
3089 		sar eax, 31
3090 		xor eax, 0x7fffffff
3091 	skipboundit:
3092 	}
3093 }
3094 
divscale(int a,int b,int c)3095 static __inline int divscale(int a, int b, int c)
3096 {
3097 	_asm {
3098 		mov eax, a
3099 		mov ecx, c
3100 		mov edx, eax
3101 		shl eax, cl
3102 		neg cl
3103 		sar edx, cl
3104 		idiv b
3105 	}
3106 }
3107 
divscale1(int a,int b)3108 static __inline int divscale1(int a, int b)
3109 {
3110 	_asm {
3111 		mov eax, a
3112 		add eax, eax
3113 		sbb edx, edx
3114 		idiv b
3115 	}
3116 }
3117 
divscale2(int a,int b)3118 static __inline int divscale2(int a, int b)
3119 {
3120 	_asm {
3121 		mov eax, a
3122 		mov edx, eax
3123 		sar edx, 30
3124 		lea eax, [eax*4]
3125 		idiv b
3126 	}
3127 }
3128 
divscale3(int a,int b)3129 static __inline int divscale3(int a, int b)
3130 {
3131 	_asm {
3132 		mov eax, a
3133 		mov edx, eax
3134 		sar edx, 29
3135 		lea eax, [eax*8]
3136 		idiv b
3137 	}
3138 }
3139 
3140 #define DIVSCALE(x,y) \
3141 static __inline int divscale##y(int a, int b) \
3142 { \
3143 	_asm mov eax, a \
3144 	_asm mov edx, eax \
3145 	_asm sar edx, x \
3146 	_asm shl eax, y \
3147 	_asm idiv b \
3148 }
3149 
3150 DIVSCALE(28,4)	DIVSCALE(27,5)	DIVSCALE(26,6)	DIVSCALE(25,7)
3151 DIVSCALE(24,8)	DIVSCALE(23,9)	DIVSCALE(22,10)	DIVSCALE(21,11)
3152 DIVSCALE(20,12)	DIVSCALE(19,13)	DIVSCALE(18,14)	DIVSCALE(17,15)
3153 DIVSCALE(16,16)	DIVSCALE(15,17)	DIVSCALE(14,18)	DIVSCALE(13,19)
3154 DIVSCALE(12,20)	DIVSCALE(11,21)	DIVSCALE(10,22)	DIVSCALE(9,23)
3155 DIVSCALE(8,24)	DIVSCALE(7,25)	DIVSCALE(6,26)	DIVSCALE(5,27)
3156 DIVSCALE(4,28)	DIVSCALE(3,29)	DIVSCALE(2,30)	DIVSCALE(1,31)
3157 
divscale32(int d,int b)3158 static __inline int divscale32(int d, int b)
3159 {
3160 	_asm {
3161 		mov edx, d
3162 		xor eax, eax
3163 		idiv b
3164 	}
3165 }
3166 
readpixel(void * d)3167 static __inline char readpixel(void *d)
3168 {
3169 	_asm {
3170 		mov edx, d
3171 		mov al, byte ptr [edx]
3172 	}
3173 }
3174 
drawpixel(void * d,char a)3175 static __inline void drawpixel(void *d, char a)
3176 {
3177 	_asm {
3178 		mov edx, d
3179 		mov al, a
3180 		mov byte ptr [edx], al
3181 	}
3182 }
3183 
drawpixels(void * d,short a)3184 static __inline void drawpixels(void *d, short a)
3185 {
3186 	_asm {
3187 		mov edx, d
3188 		mov ax, a
3189 		mov word ptr [edx], ax
3190 	}
3191 }
3192 
drawpixelses(void * d,int a)3193 static __inline void drawpixelses(void *d, int a)
3194 {
3195 	_asm {
3196 		mov edx, d
3197 		mov eax, a
3198 		mov dword ptr [edx], eax
3199 	}
3200 }
3201 
clearbuf(void * d,int c,int a)3202 static __inline void clearbuf(void *d, int c, int a)
3203 {
3204 	_asm {
3205 		mov edi, d
3206 		mov ecx, c
3207 		mov eax, a
3208 		rep stosd
3209 	}
3210 }
3211 
clearbufbyte(void * d,int c,int a)3212 static __inline void clearbufbyte(void *d, int c, int a)
3213 {
3214 	_asm {
3215 		mov edi, d
3216 		mov ecx, c
3217 		mov eax, a
3218 		cmp ecx, 4
3219 		jae longcopy
3220 		test cl, 1
3221 		jz preskip
3222 		stosb
3223 	preskip:
3224 		shr ecx, 1
3225 		rep stosw
3226 		jmp endit
3227 	longcopy:
3228 		test edi, 1
3229 		jz skip1
3230 		stosb
3231 		dec ecx
3232 	skip1:
3233 		test edi, 2
3234 		jz skip2
3235 		stosw
3236 		sub ecx, 2
3237 	skip2:
3238 		mov ebx, ecx
3239 		shr ecx, 2
3240 		rep stosd
3241 		test bl, 2
3242 		jz skip3
3243 		stosw
3244 	skip3:
3245 		test bl, 1
3246 		jz endit
3247 		stosb
3248 	endit:
3249 	}
3250 }
3251 
copybuf(void * s,void * d,int c)3252 static __inline void copybuf(void *s, void *d, int c)
3253 {
3254 	_asm {
3255 		mov esi, s
3256 		mov edi, d
3257 		mov ecx, c
3258 		rep movsd
3259 	}
3260 }
3261 
copybufbyte(void * s,void * d,int c)3262 static __inline void copybufbyte(void *s, void *d, int c)
3263 {
3264 	_asm {
3265 		mov esi, s
3266 		mov edi, d
3267 		mov ecx, c
3268 		cmp ecx, 4
3269 		jae longcopy
3270 		test cl, 1
3271 		jz preskip
3272 		movsb
3273 	preskip:
3274 		shr ecx, 1
3275 		rep movsw
3276 		jmp endit
3277 	longcopy:
3278 		test edi, 1
3279 		jz skip1
3280 		movsb
3281 		dec ecx
3282 	skip1:
3283 		test edi, 2
3284 		jz skip2
3285 		movsw
3286 		sub ecx, 2
3287 	skip2:
3288 		mov ebx, ecx
3289 		shr ecx, 2
3290 		rep movsd
3291 		test bl, 2
3292 		jz skip3
3293 		movsw
3294 	skip3:
3295 		test bl, 1
3296 		jz endit
3297 		movsb
3298 	endit:
3299 	}
3300 }
3301 
copybufreverse(void * s,void * d,int c)3302 static __inline void copybufreverse(void *s, void *d, int c)
3303 {
3304 	_asm {
3305 		mov esi, s
3306 		mov edi, d
3307 		mov ecx, c
3308 		shr ecx, 1
3309 		jnc skipit1
3310 		mov al, byte ptr [esi]
3311 		dec esi
3312 		mov byte ptr [edi], al
3313 		inc edi
3314 	skipit1:
3315 		shr ecx, 1
3316 		jnc skipit2
3317 		mov ax, word ptr [esi-1]
3318 		sub esi, 2
3319 		ror ax, 8
3320 		mov word ptr [edi], ax
3321 		add edi, 2
3322 	skipit2:
3323 		test ecx, ecx
3324 		jz endloop
3325 	begloop:
3326 		mov eax, dword ptr [esi-3]
3327 		sub esi, 4
3328 		bswap eax
3329 		mov dword ptr [edi], eax
3330 		add edi, 4
3331 		dec ecx
3332 		jnz begloop
3333 	endloop:
3334 	}
3335 }
3336 
qinterpolatedown16(void * a,int c,int d,int s)3337 static __inline void qinterpolatedown16(void *a, int c, int d, int s)
3338 {
3339 	_asm {
3340 		mov eax, a
3341 		mov ecx, c
3342 		mov edx, d
3343 		mov esi, s
3344 		mov ebx, ecx
3345 		shr ecx, 1
3346 		jz skipbegcalc
3347 	begqcalc:
3348 		lea edi, [edx+esi]
3349 		sar edx, 16
3350 		mov dword ptr [eax], edx
3351 		lea edx, [edi+esi]
3352 		sar edi, 16
3353 		mov dword ptr [eax+4], edi
3354 		add eax, 8
3355 		dec ecx
3356 		jnz begqcalc
3357 		test ebx, 1
3358 		jz skipbegqcalc2
3359 	skipbegcalc:
3360 		sar edx, 16
3361 		mov dword ptr [eax], edx
3362 	skipbegqcalc2:
3363 	}
3364 }
3365 
qinterpolatedown16short(void * a,int c,int d,int s)3366 static __inline void qinterpolatedown16short(void *a, int c, int d, int s)
3367 {
3368 	_asm {
3369 		mov eax, a
3370 		mov ecx, c
3371 		mov edx, d
3372 		mov esi, s
3373 		test ecx, ecx
3374 		jz endit
3375 		test al, 2
3376 		jz skipalignit
3377 		mov ebx, edx
3378 		sar ebx, 16
3379 		mov word ptr [eax], bx
3380 		add edx, esi
3381 		add eax, 2
3382 		dec ecx
3383 		jz endit
3384 	skipalignit:
3385 		sub ecx, 2
3386 		jc finishit
3387 	begqcalc:
3388 		mov ebx, edx
3389 		add edx, esi
3390 		sar ebx, 16
3391 		mov edi, edx
3392 		and edi, 0ffff0000h
3393 		add edx, esi
3394 		add ebx, edi
3395 		mov dword ptr [eax], ebx
3396 		add eax, 4
3397 		sub ecx, 2
3398 		jnc begqcalc
3399 		test cl, 1
3400 		jz endit
3401 	finishit:
3402 		mov ebx, edx
3403 		sar ebx, 16
3404 		mov word ptr [eax], bx
3405 	endit:
3406 	}
3407 }
3408 
mul3(int a)3409 static __inline int mul3(int a)
3410 {
3411 	_asm {
3412 		mov eax, a
3413 		lea eax, [eax+eax*2]
3414 	}
3415 }
3416 
mul5(int a)3417 static __inline int mul5(int a)
3418 {
3419 	_asm {
3420 		mov eax, a
3421 		lea eax, [eax+eax*4]
3422 	}
3423 }
3424 
mul9(int a)3425 static __inline int mul9(int a)
3426 {
3427 	_asm {
3428 		mov eax, a
3429 		lea eax, [eax+eax*8]
3430 	}
3431 }
3432 
3433 	//returns eax/ebx, dmval = eax%edx;
divmod(int a,int b)3434 static __inline int divmod(int a, int b)
3435 {
3436 	_asm {
3437 		mov eax, a
3438 		xor edx, edx
3439 		div b
3440 		mov dmval, edx
3441 	}
3442 }
3443 
3444 	//returns eax%ebx, dmval = eax/edx;
moddiv(int a,int b)3445 static __inline int moddiv(int a, int b)
3446 {
3447 	_asm {
3448 		mov eax, a
3449 		xor edx, edx
3450 		div b
3451 		mov dmval, eax
3452 		mov eax, edx
3453 	}
3454 }
3455 
klabs(int a)3456 static __inline int klabs(int a)
3457 {
3458 	_asm {
3459 		mov eax, a
3460 		test eax, eax
3461 		jns skipnegate
3462 		neg eax
3463 	skipnegate:
3464 	}
3465 }
3466 
ksgn(int b)3467 static __inline int ksgn(int b)
3468 {
3469 	_asm {
3470 		mov ebx, b
3471 		add ebx, ebx
3472 		sbb eax, eax
3473 		cmp eax, ebx
3474 		adc al, 0
3475 	}
3476 }
3477 
3478 	//eax = (unsigned min)umin(eax,ebx)
umin(int a,int b)3479 static __inline int umin(int a, int b)
3480 {
3481 	_asm {
3482 		mov eax, a
3483 		sub eax, b
3484 		sbb ecx, ecx
3485 		and eax, ecx
3486 		add eax, b
3487 	}
3488 }
3489 
3490 	//eax = (unsigned max)umax(eax,ebx)
umax(int a,int b)3491 static __inline int umax(int a, int b)
3492 {
3493 	_asm {
3494 		mov eax, a
3495 		sub eax, b
3496 		sbb ecx, ecx
3497 		xor ecx, 0xffffffff
3498 		and eax, ecx
3499 		add eax, b
3500 	}
3501 }
3502 
kmin(int a,int b)3503 static __inline int kmin(int a, int b)
3504 {
3505 	_asm {
3506 		mov eax, a
3507 		mov ebx, b
3508 		cmp eax, ebx
3509 		jl skipit
3510 		mov eax, ebx
3511 	skipit:
3512 	}
3513 }
3514 
kmax(int a,int b)3515 static __inline int kmax(int a, int b)
3516 {
3517 	_asm {
3518 		mov eax, a
3519 		mov ebx, b
3520 		cmp eax, ebx
3521 		jg skipit
3522 		mov eax, ebx
3523 	skipit:
3524 	}
3525 }
3526 
swapchar(void * a,void * b)3527 static __inline void swapchar(void *a, void *b)
3528 {
3529 	_asm {
3530 		mov eax, a
3531 		mov ebx, b
3532 		mov cl, [eax]
3533 		mov ch, [ebx]
3534 		mov [ebx], cl
3535 		mov [eax], ch
3536 	}
3537 }
3538 
swapshort(void * a,void * b)3539 static __inline void swapshort(void *a, void *b)
3540 {
3541 	_asm {
3542 		mov eax, a
3543 		mov ebx, b
3544 		mov cx, [eax]
3545 		mov dx, [ebx]
3546 		mov [ebx], cx
3547 		mov [eax], dx
3548 	}
3549 }
3550 
swaplong(void * a,void * b)3551 static __inline void swaplong(void *a, void *b)
3552 {
3553 	_asm {
3554 		mov eax, a
3555 		mov ebx, b
3556 		mov ecx, [eax]
3557 		mov edx, [ebx]
3558 		mov [ebx], ecx
3559 		mov [eax], edx
3560 	}
3561 }
3562 
swapbuf4(void * a,void * b,int c)3563 static __inline void swapbuf4(void *a, void *b, int c)
3564 {
3565 	_asm {
3566 		mov eax, a
3567 		mov ebx, b
3568 		mov ecx, c
3569 	begswap:
3570 		mov esi, [eax]
3571 		mov edi, [ebx]
3572 		mov [ebx], esi
3573 		mov [eax], edi
3574 		add eax, 4
3575 		add ebx, 4
3576 		dec ecx
3577 		jnz short begswap
3578 	}
3579 }
3580 
swap64bit(void * a,void * b)3581 static __inline void swap64bit(void *a, void *b)
3582 {
3583 	_asm {
3584 		mov eax, a
3585 		mov ebx, b
3586 		mov ecx, [eax]
3587 		mov edx, [ebx]
3588 		mov [ebx], ecx
3589 		mov ecx, [eax+4]
3590 		mov [eax], edx
3591 		mov edx, [ebx+4]
3592 		mov [ebx+4], ecx
3593 		mov [eax+4], edx
3594 	}
3595 }
3596 
3597 	//swapchar2(ptr1,ptr2,xsiz); is the same as:
3598 	//swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz);
swapchar2(void * a,void * b,int s)3599 static __inline void swapchar2(void *a, void *b, int s)
3600 {
3601 	_asm {
3602 		mov eax, a
3603 		mov ebx, b
3604 		mov esi, s
3605 		add esi, ebx
3606 		mov cx, [eax]
3607 		mov dl, [ebx]
3608 		mov [ebx], cl
3609 		mov dh, [esi]
3610 		mov [esi], ch
3611 		mov [eax], dx
3612 	}
3613 }
3614 //}}}
3615 
3616 #else				// _MSC_VER
3617 
3618 //
3619 // Generic C
3620 //
3621 
3622 #define qw(x)	((int64_t)(x))		// quadword cast
3623 #define dw(x)	((int32_t)(x))		// doubleword cast
3624 #define wo(x)	((int16_t)(x))		// word cast
3625 #define by(x)	((int8_t)(x))		// byte cast
3626 
3627 #define _scaler(a) \
3628 static inline int mulscale##a(int eax, int edx) \
3629 { \
3630 	return dw((qw(eax) * qw(edx)) >> a); \
3631 } \
3632 \
3633 static inline int divscale##a(int eax, int ebx) \
3634 { \
3635 	return dw((qw(eax) << a) / qw(ebx)); \
3636 } \
3637 \
3638 static inline int dmulscale##a(int eax, int edx, int esi, int edi) \
3639 { \
3640 	return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> a); \
3641 } \
3642 \
3643 static inline int tmulscale##a(int eax, int edx, int ebx, int ecx, int esi, int edi) \
3644 { \
3645 	return dw(((qw(eax) * qw(edx)) + (qw(ebx) * qw(ecx)) + (qw(esi) * qw(edi))) >> a); \
3646 } \
3647 
3648 _scaler(1)	_scaler(2)	_scaler(3)	_scaler(4)
3649 _scaler(5)	_scaler(6)	_scaler(7)	_scaler(8)
3650 _scaler(9)	_scaler(10)	_scaler(11)	_scaler(12)
3651 _scaler(13)	_scaler(14)	_scaler(15)	_scaler(16)
3652 _scaler(17)	_scaler(18)	_scaler(19)	_scaler(20)
3653 _scaler(21)	_scaler(22)	_scaler(23)	_scaler(24)
3654 _scaler(25)	_scaler(26)	_scaler(27)	_scaler(28)
3655 _scaler(29)	_scaler(30)	_scaler(31)	_scaler(32)
3656 
swapchar(void * a,void * b)3657 static inline void swapchar(void* a, void* b)  { int8_t t = *((int8_t*)b); *((int8_t*)b) = *((int8_t*)a); *((int8_t*)a) = t; }
swapchar2(void * a,void * b,int s)3658 static inline void swapchar2(void* a, void* b, int s) { swapchar(a,b); swapchar((int8_t*)a+1, (int8_t*)b+s); }
swapshort(void * a,void * b)3659 static inline void swapshort(void* a, void* b) { int16_t t = *((int16_t*)b); *((int16_t*)b) = *((int16_t*)a); *((int16_t*)a) = t; }
swaplong(void * a,void * b)3660 static inline void swaplong(void* a, void* b)  { int32_t t = *((int32_t*)b); *((int32_t*)b) = *((int32_t*)a); *((int32_t*)a) = t; }
swap64bit(void * a,void * b)3661 static inline void swap64bit(void* a, void* b) { int64_t t = *((int64_t*)b); *((int64_t*)b) = *((int64_t*)a); *((int64_t*)a) = t; }
3662 
readpixel(void * s)3663 static inline int8_t readpixel(void* s)    { return (*((int8_t*)(s))); }
drawpixel(void * s,int8_t a)3664 static inline void drawpixel(void* s, int8_t a)    { *((int8_t*)(s)) = a; }
drawpixels(void * s,int16_t a)3665 static inline void drawpixels(void* s, int16_t a)  { *((int16_t*)(s)) = a; }
drawpixelses(void * s,int32_t a)3666 static inline void drawpixelses(void* s, int32_t a) { *((int32_t*)(s)) = a; }
3667 
mul3(int a)3668 static inline int mul3(int a) { return (a<<1)+a; }
mul5(int a)3669 static inline int mul5(int a) { return (a<<2)+a; }
mul9(int a)3670 static inline int mul9(int a) { return (a<<3)+a; }
3671 
divmod(int a,int b)3672 static inline int divmod(int a, int b) { unsigned int _a=(unsigned int)a, _b=(unsigned int)b; dmval = _a%_b; return _a/_b; }
moddiv(int a,int b)3673 static inline int moddiv(int a, int b) { unsigned int _a=(unsigned int)a, _b=(unsigned int)b; dmval = _a/_b; return _a%_b; }
3674 
klabs(int a)3675 static inline int klabs(int a) { if (a < 0) return -a; return a; }
ksgn(int a)3676 static inline int ksgn(int a)  { if (a > 0) return 1; if (a < 0) return -1; return 0; }
3677 
umin(int a,int b)3678 static inline int umin(int a, int b) { if ((unsigned int)a < (unsigned int)b) return a; return b; }
umax(int a,int b)3679 static inline int umax(int a, int b) { if ((unsigned int)a < (unsigned int)b) return b; return a; }
kmin(int a,int b)3680 static inline int kmin(int a, int b) { if ((signed int)a < (signed int)b) return a; return b; }
kmax(int a,int b)3681 static inline int kmax(int a, int b) { if ((signed int)a < (signed int)b) return b; return a; }
3682 
sqr(int eax)3683 static inline int sqr(int eax) { return (eax) * (eax); }
scale(int eax,int edx,int ecx)3684 static inline int scale(int eax, int edx, int ecx) { return dw((qw(eax) * qw(edx)) / qw(ecx)); }
mulscale(int eax,int edx,int ecx)3685 static inline int mulscale(int eax, int edx, int ecx) { return dw((qw(eax) * qw(edx)) >> by(ecx)); }
divscale(int eax,int ebx,int ecx)3686 static inline int divscale(int eax, int ebx, int ecx) { return dw((qw(eax) << by(ecx)) / qw(ebx)); }
dmulscale(int eax,int edx,int esi,int edi,int ecx)3687 static inline int dmulscale(int eax, int edx, int esi, int edi, int ecx) { return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> by(ecx)); }
3688 
boundmulscale(int a,int d,int c)3689 static inline int boundmulscale(int a, int d, int c)
3690 { // courtesy of Ken
3691     int64_t p;
3692     p = (((int64_t)a)*((int64_t)d))>>c;
3693     if (p >= INT_MAX) p = INT_MAX;
3694     if (p < INT_MIN) p = INT_MIN;
3695     return((int)p);
3696 }
3697 
3698 #undef qw
3699 #undef dw
3700 #undef wo
3701 #undef by
3702 #undef _scaler
3703 
3704 void qinterpolatedown16 (void *bufptr, int num, int val, int add);
3705 void qinterpolatedown16short (void *bufptr, int num, int val, int add);
3706 
3707 void clearbuf(void* d, int c, int a);
3708 void copybuf(void* s, void* d, int c);
3709 void swapbuf4(void* a, void* b, int c);
3710 
3711 void clearbufbyte(void *D, int c, int a);
3712 void copybufbyte(void *S, void *D, int c);
3713 void copybufreverse(void *S, void *D, int c);
3714 
3715 #endif
3716 
3717 #ifdef __cplusplus
3718 }
3719 #endif
3720 
3721 #endif // __pragmas_h__
3722 
3723