1 // This file has been modified from Ken Silverman's original release
2 // by Jonathon Fowler (jf@jonof.id.au)
3
4
5 #ifndef __pragmas_h__
6 #define __pragmas_h__
7
8 #ifdef __cplusplus
9 extern "C" {
10 #endif
11
12 extern int dmval;
13
14 #if defined(__GNUC__) && defined(__i386__) && USE_ASM
15
16 //
17 // GCC Inline Assembler version
18 //
19
20 //{{{
21
22 // maybe one day I'll make these into macros
23 int boundmulscale(int a, int b, int c);
24 void clearbufbyte(void *D, int c, int a);
25 void copybufbyte(void *S, void *D, int c);
26 void copybufreverse(void *S, void *D, int c);
27
28
29 #define sqr(a) \
30 ({ int __a=(a); \
31 __asm__ __volatile__ ("imull %0, %0" \
32 : "=q" (__a) \
33 : "0" (__a) \
34 : "cc"); \
35 __a; })
36
37 #define scale(a,d,c) \
38 ({ int __a=(a), __d=(d), __c=(c); \
39 __asm__ __volatile__ ("imull %%edx; idivl %%ecx" \
40 : "=a" (__a), "=d" (__d) \
41 : "0" (__a), "1" (__d), "c" (__c) : "cc"); \
42 __a; })
43
44 #define mulscale(a,d,c) \
45 ({ int __a=(a), __d=(d), __c=(c); \
46 __asm__ __volatile__ ("imull %%edx; shrdl %%cl, %%edx, %%eax" \
47 : "=a" (__a), "=d" (__d) \
48 : "a" (__a), "d" (__d), "c" (__c) : "cc"); \
49 __a; })
50 #define mulscale1(a,d) \
51 ({ int __a=(a), __d=(d); \
52 __asm__ __volatile__ ("imull %%edx; shrdl $1, %%edx, %%eax" \
53 : "=a" (__a), "=d" (__d) \
54 : "a" (__a), "d" (__d) : "cc"); \
55 __a; })
56 #define mulscale2(a,d) \
57 ({ int __a=(a), __d=(d); \
58 __asm__ __volatile__ ("imull %%edx; shrdl $2, %%edx, %%eax" \
59 : "=a" (__a), "=d" (__d) \
60 : "a" (__a), "d" (__d) : "cc"); \
61 __a; })
62 #define mulscale3(a,d) \
63 ({ int __a=(a), __d=(d); \
64 __asm__ __volatile__ ("imull %%edx; shrdl $3, %%edx, %%eax" \
65 : "=a" (__a), "=d" (__d) \
66 : "a" (__a), "d" (__d) : "cc"); \
67 __a; })
68 #define mulscale4(a,d) \
69 ({ int __a=(a), __d=(d); \
70 __asm__ __volatile__ ("imull %%edx; shrdl $4, %%edx, %%eax" \
71 : "=a" (__a), "=d" (__d) \
72 : "a" (__a), "d" (__d) : "cc"); \
73 __a; })
74 #define mulscale5(a,d) \
75 ({ int __a=(a), __d=(d); \
76 __asm__ __volatile__ ("imull %%edx; shrdl $5, %%edx, %%eax" \
77 : "=a" (__a), "=d" (__d) \
78 : "a" (__a), "d" (__d) : "cc"); \
79 __a; })
80 #define mulscale6(a,d) \
81 ({ int __a=(a), __d=(d); \
82 __asm__ __volatile__ ("imull %%edx; shrdl $6, %%edx, %%eax" \
83 : "=a" (__a), "=d" (__d) \
84 : "a" (__a), "d" (__d) : "cc"); \
85 __a; })
86 #define mulscale7(a,d) \
87 ({ int __a=(a), __d=(d); \
88 __asm__ __volatile__ ("imull %%edx; shrdl $7, %%edx, %%eax" \
89 : "=a" (__a), "=d" (__d) \
90 : "a" (__a), "d" (__d) : "cc"); \
91 __a; })
92 #define mulscale8(a,d) \
93 ({ int __a=(a), __d=(d); \
94 __asm__ __volatile__ ("imull %%edx; shrdl $8, %%edx, %%eax" \
95 : "=a" (__a), "=d" (__d) \
96 : "a" (__a), "d" (__d) : "cc"); \
97 __a; })
98 #define mulscale9(a,d) \
99 ({ int __a=(a), __d=(d); \
100 __asm__ __volatile__ ("imull %%edx; shrdl $9, %%edx, %%eax" \
101 : "=a" (__a), "=d" (__d) \
102 : "a" (__a), "d" (__d) : "cc"); \
103 __a; })
104 #define mulscale10(a,d) \
105 ({ int __a=(a), __d=(d); \
106 __asm__ __volatile__ ("imull %%edx; shrdl $10, %%edx, %%eax" \
107 : "=a" (__a), "=d" (__d) \
108 : "a" (__a), "d" (__d) : "cc"); \
109 __a; })
110 #define mulscale11(a,d) \
111 ({ int __a=(a), __d=(d); \
112 __asm__ __volatile__ ("imull %%edx; shrdl $11, %%edx, %%eax" \
113 : "=a" (__a), "=d" (__d) \
114 : "a" (__a), "d" (__d) : "cc"); \
115 __a; })
116 #define mulscale12(a,d) \
117 ({ int __a=(a), __d=(d); \
118 __asm__ __volatile__ ("imull %%edx; shrdl $12, %%edx, %%eax" \
119 : "=a" (__a), "=d" (__d) \
120 : "a" (__a), "d" (__d) : "cc"); \
121 __a; })
122 #define mulscale13(a,d) \
123 ({ int __a=(a), __d=(d); \
124 __asm__ __volatile__ ("imull %%edx; shrdl $13, %%edx, %%eax" \
125 : "=a" (__a), "=d" (__d) \
126 : "a" (__a), "d" (__d) : "cc"); \
127 __a; })
128 #define mulscale14(a,d) \
129 ({ int __a=(a), __d=(d); \
130 __asm__ __volatile__ ("imull %%edx; shrdl $14, %%edx, %%eax" \
131 : "=a" (__a), "=d" (__d) \
132 : "a" (__a), "d" (__d) : "cc"); \
133 __a; })
134 #define mulscale15(a,d) \
135 ({ int __a=(a), __d=(d); \
136 __asm__ __volatile__ ("imull %%edx; shrdl $15, %%edx, %%eax" \
137 : "=a" (__a), "=d" (__d) \
138 : "a" (__a), "d" (__d) : "cc"); \
139 __a; })
140 #define mulscale16(a,d) \
141 ({ int __a=(a), __d=(d); \
142 __asm__ __volatile__ ("imull %%edx; shrdl $16, %%edx, %%eax" \
143 : "=a" (__a), "=d" (__d) \
144 : "a" (__a), "d" (__d) : "cc"); \
145 __a; })
146 #define mulscale17(a,d) \
147 ({ int __a=(a), __d=(d); \
148 __asm__ __volatile__ ("imull %%edx; shrdl $17, %%edx, %%eax" \
149 : "=a" (__a), "=d" (__d) \
150 : "a" (__a), "d" (__d) : "cc"); \
151 __a; })
152 #define mulscale18(a,d) \
153 ({ int __a=(a), __d=(d); \
154 __asm__ __volatile__ ("imull %%edx; shrdl $18, %%edx, %%eax" \
155 : "=a" (__a), "=d" (__d) \
156 : "a" (__a), "d" (__d) : "cc"); \
157 __a; })
158 #define mulscale19(a,d) \
159 ({ int __a=(a), __d=(d); \
160 __asm__ __volatile__ ("imull %%edx; shrdl $19, %%edx, %%eax" \
161 : "=a" (__a), "=d" (__d) \
162 : "a" (__a), "d" (__d) : "cc"); \
163 __a; })
164 #define mulscale20(a,d) \
165 ({ int __a=(a), __d=(d); \
166 __asm__ __volatile__ ("imull %%edx; shrdl $20, %%edx, %%eax" \
167 : "=a" (__a), "=d" (__d) \
168 : "a" (__a), "d" (__d) : "cc"); \
169 __a; })
170 #define mulscale21(a,d) \
171 ({ int __a=(a), __d=(d); \
172 __asm__ __volatile__ ("imull %%edx; shrdl $21, %%edx, %%eax" \
173 : "=a" (__a), "=d" (__d) \
174 : "a" (__a), "d" (__d) : "cc"); \
175 __a; })
176 #define mulscale22(a,d) \
177 ({ int __a=(a), __d=(d); \
178 __asm__ __volatile__ ("imull %%edx; shrdl $22, %%edx, %%eax" \
179 : "=a" (__a), "=d" (__d) \
180 : "a" (__a), "d" (__d) : "cc"); \
181 __a; })
182 #define mulscale23(a,d) \
183 ({ int __a=(a), __d=(d); \
184 __asm__ __volatile__ ("imull %%edx; shrdl $23, %%edx, %%eax" \
185 : "=a" (__a), "=d" (__d) \
186 : "a" (__a), "d" (__d) : "cc"); \
187 __a; })
188 #define mulscale24(a,d) \
189 ({ int __a=(a), __d=(d); \
190 __asm__ __volatile__ ("imull %%edx; shrdl $24, %%edx, %%eax" \
191 : "=a" (__a), "=d" (__d) \
192 : "a" (__a), "d" (__d) : "cc"); \
193 __a; })
194 #define mulscale25(a,d) \
195 ({ int __a=(a), __d=(d); \
196 __asm__ __volatile__ ("imull %%edx; shrdl $25, %%edx, %%eax" \
197 : "=a" (__a), "=d" (__d) \
198 : "a" (__a), "d" (__d) : "cc"); \
199 __a; })
200 #define mulscale26(a,d) \
201 ({ int __a=(a), __d=(d); \
202 __asm__ __volatile__ ("imull %%edx; shrdl $26, %%edx, %%eax" \
203 : "=a" (__a), "=d" (__d) \
204 : "a" (__a), "d" (__d) : "cc"); \
205 __a; })
206 #define mulscale27(a,d) \
207 ({ int __a=(a), __d=(d); \
208 __asm__ __volatile__ ("imull %%edx; shrdl $27, %%edx, %%eax" \
209 : "=a" (__a), "=d" (__d) \
210 : "a" (__a), "d" (__d) : "cc"); \
211 __a; })
212 #define mulscale28(a,d) \
213 ({ int __a=(a), __d=(d); \
214 __asm__ __volatile__ ("imull %%edx; shrdl $28, %%edx, %%eax" \
215 : "=a" (__a), "=d" (__d) \
216 : "a" (__a), "d" (__d) : "cc"); \
217 __a; })
218 #define mulscale29(a,d) \
219 ({ int __a=(a), __d=(d); \
220 __asm__ __volatile__ ("imull %%edx; shrdl $29, %%edx, %%eax" \
221 : "=a" (__a), "=d" (__d) \
222 : "a" (__a), "d" (__d) : "cc"); \
223 __a; })
224 #define mulscale30(a,d) \
225 ({ int __a=(a), __d=(d); \
226 __asm__ __volatile__ ("imull %%edx; shrdl $30, %%edx, %%eax" \
227 : "=a" (__a), "=d" (__d) \
228 : "a" (__a), "d" (__d) : "cc"); \
229 __a; })
230 #define mulscale31(a,d) \
231 ({ int __a=(a), __d=(d); \
232 __asm__ __volatile__ ("imull %%edx; shrdl $31, %%edx, %%eax" \
233 : "=a" (__a), "=d" (__d) \
234 : "a" (__a), "d" (__d) : "cc"); \
235 __a; })
236 #define mulscale32(a,d) \
237 ({ int __a=(a), __d=(d); \
238 __asm__ __volatile__ ("imull %%edx" \
239 : "=a" (__a), "=d" (__d) \
240 : "a" (__a), "d" (__d) : "cc"); \
241 __d; })
242
243 #define dmulscale(a,d,S,D,c) \
244 ({ int __a=(a), __d=(d), __S=(S), __D=(D), __c=(c); \
245 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
246 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl %%cl, %%edx, %%eax" \
247 : "=a" (__a), "=d" (__d), "=S" (__S) \
248 : "a" (__a), "d" (__d), "S" (__S), "D" (__D), "c" (__c) : "ebx", "cc"); \
249 __a; })
250 #define dmulscale1(a,d,S,D) \
251 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
252 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
253 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $1, %%edx, %%eax" \
254 : "=a" (__a), "=d" (__d), "=S" (__S) \
255 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
256 __a; })
257 #define dmulscale2(a,d,S,D) \
258 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
259 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
260 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $2, %%edx, %%eax" \
261 : "=a" (__a), "=d" (__d), "=S" (__S) \
262 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
263 __a; })
264 #define dmulscale3(a,d,S,D) \
265 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
266 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
267 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $3, %%edx, %%eax" \
268 : "=a" (__a), "=d" (__d), "=S" (__S) \
269 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
270 __a; })
271 #define dmulscale4(a,d,S,D) \
272 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
273 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
274 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $4, %%edx, %%eax" \
275 : "=a" (__a), "=d" (__d), "=S" (__S) \
276 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
277 __a; })
278 #define dmulscale5(a,d,S,D) \
279 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
280 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
281 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $5, %%edx, %%eax" \
282 : "=a" (__a), "=d" (__d), "=S" (__S) \
283 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
284 __a; })
285 #define dmulscale6(a,d,S,D) \
286 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
287 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
288 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $6, %%edx, %%eax" \
289 : "=a" (__a), "=d" (__d), "=S" (__S) \
290 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
291 __a; })
292 #define dmulscale7(a,d,S,D) \
293 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
294 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
295 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $7, %%edx, %%eax" \
296 : "=a" (__a), "=d" (__d), "=S" (__S) \
297 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
298 __a; })
299 #define dmulscale8(a,d,S,D) \
300 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
301 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
302 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $8, %%edx, %%eax" \
303 : "=a" (__a), "=d" (__d), "=S" (__S) \
304 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
305 __a; })
306 #define dmulscale9(a,d,S,D) \
307 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
308 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
309 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $9, %%edx, %%eax" \
310 : "=a" (__a), "=d" (__d), "=S" (__S) \
311 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
312 __a; })
313 #define dmulscale10(a,d,S,D) \
314 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
315 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
316 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $10, %%edx, %%eax" \
317 : "=a" (__a), "=d" (__d), "=S" (__S) \
318 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
319 __a; })
320 #define dmulscale11(a,d,S,D) \
321 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
322 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
323 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $11, %%edx, %%eax" \
324 : "=a" (__a), "=d" (__d), "=S" (__S) \
325 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
326 __a; })
327 #define dmulscale12(a,d,S,D) \
328 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
329 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
330 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $12, %%edx, %%eax" \
331 : "=a" (__a), "=d" (__d), "=S" (__S) \
332 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
333 __a; })
334 #define dmulscale13(a,d,S,D) \
335 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
336 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
337 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $13, %%edx, %%eax" \
338 : "=a" (__a), "=d" (__d), "=S" (__S) \
339 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
340 __a; })
341 #define dmulscale14(a,d,S,D) \
342 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
343 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
344 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $14, %%edx, %%eax" \
345 : "=a" (__a), "=d" (__d), "=S" (__S) \
346 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
347 __a; })
348 #define dmulscale15(a,d,S,D) \
349 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
350 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
351 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $15, %%edx, %%eax" \
352 : "=a" (__a), "=d" (__d), "=S" (__S) \
353 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
354 __a; })
355 #define dmulscale16(a,d,S,D) \
356 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
357 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
358 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $16, %%edx, %%eax" \
359 : "=a" (__a), "=d" (__d), "=S" (__S) \
360 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
361 __a; })
362 #define dmulscale17(a,d,S,D) \
363 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
364 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
365 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $17, %%edx, %%eax" \
366 : "=a" (__a), "=d" (__d), "=S" (__S) \
367 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
368 __a; })
369 #define dmulscale18(a,d,S,D) \
370 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
371 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
372 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $18, %%edx, %%eax" \
373 : "=a" (__a), "=d" (__d), "=S" (__S) \
374 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
375 __a; })
376 #define dmulscale19(a,d,S,D) \
377 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
378 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
379 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $19, %%edx, %%eax" \
380 : "=a" (__a), "=d" (__d), "=S" (__S) \
381 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
382 __a; })
383 #define dmulscale20(a,d,S,D) \
384 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
385 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
386 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $20, %%edx, %%eax" \
387 : "=a" (__a), "=d" (__d), "=S" (__S) \
388 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
389 __a; })
390 #define dmulscale21(a,d,S,D) \
391 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
392 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
393 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $21, %%edx, %%eax" \
394 : "=a" (__a), "=d" (__d), "=S" (__S) \
395 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
396 __a; })
397 #define dmulscale22(a,d,S,D) \
398 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
399 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
400 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $22, %%edx, %%eax" \
401 : "=a" (__a), "=d" (__d), "=S" (__S) \
402 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
403 __a; })
404 #define dmulscale23(a,d,S,D) \
405 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
406 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
407 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $23, %%edx, %%eax" \
408 : "=a" (__a), "=d" (__d), "=S" (__S) \
409 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
410 __a; })
411 #define dmulscale24(a,d,S,D) \
412 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
413 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
414 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $24, %%edx, %%eax" \
415 : "=a" (__a), "=d" (__d), "=S" (__S) \
416 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
417 __a; })
418 #define dmulscale25(a,d,S,D) \
419 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
420 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
421 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $25, %%edx, %%eax" \
422 : "=a" (__a), "=d" (__d), "=S" (__S) \
423 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
424 __a; })
425 #define dmulscale26(a,d,S,D) \
426 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
427 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
428 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $26, %%edx, %%eax" \
429 : "=a" (__a), "=d" (__d), "=S" (__S) \
430 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
431 __a; })
432 #define dmulscale27(a,d,S,D) \
433 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
434 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
435 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $27, %%edx, %%eax" \
436 : "=a" (__a), "=d" (__d), "=S" (__S) \
437 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
438 __a; })
439 #define dmulscale28(a,d,S,D) \
440 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
441 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
442 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $28, %%edx, %%eax" \
443 : "=a" (__a), "=d" (__d), "=S" (__S) \
444 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
445 __a; })
446 #define dmulscale29(a,d,S,D) \
447 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
448 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
449 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $29, %%edx, %%eax" \
450 : "=a" (__a), "=d" (__d), "=S" (__S) \
451 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
452 __a; })
453 #define dmulscale30(a,d,S,D) \
454 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
455 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
456 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $30, %%edx, %%eax" \
457 : "=a" (__a), "=d" (__d), "=S" (__S) \
458 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
459 __a; })
460 #define dmulscale31(a,d,S,D) \
461 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
462 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
463 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $31, %%edx, %%eax" \
464 : "=a" (__a), "=d" (__d), "=S" (__S) \
465 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
466 __a; })
467 #define dmulscale32(a,d,S,D) \
468 ({ int __a=(a), __d=(d), __S=(S), __D=(D); \
469 __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
470 "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx" \
471 : "=a" (__a), "=d" (__d), "=S" (__S) \
472 : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
473 __d; })
474
475 #define tmulscale1(a,d,b,c,S,D) \
476 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
477 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
478 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
479 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $1, %%edx, %%eax" \
480 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
481 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
482 __a; })
483 #define tmulscale2(a,d,b,c,S,D) \
484 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
485 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
486 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
487 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $2, %%edx, %%eax" \
488 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
489 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
490 __a; })
491 #define tmulscale3(a,d,b,c,S,D) \
492 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
493 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
494 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
495 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $3, %%edx, %%eax" \
496 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
497 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
498 __a; })
499 #define tmulscale4(a,d,b,c,S,D) \
500 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
501 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
502 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
503 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $4, %%edx, %%eax" \
504 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
505 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
506 __a; })
507 #define tmulscale5(a,d,b,c,S,D) \
508 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
509 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
510 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
511 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $5, %%edx, %%eax" \
512 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
513 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
514 __a; })
515 #define tmulscale6(a,d,b,c,S,D) \
516 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
517 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
518 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
519 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $6, %%edx, %%eax" \
520 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
521 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
522 __a; })
523 #define tmulscale7(a,d,b,c,S,D) \
524 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
525 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
526 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
527 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $7, %%edx, %%eax" \
528 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
529 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
530 __a; })
531 #define tmulscale8(a,d,b,c,S,D) \
532 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
533 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
534 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
535 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $8, %%edx, %%eax" \
536 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
537 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
538 __a; })
539 #define tmulscale9(a,d,b,c,S,D) \
540 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
541 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
542 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
543 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $9, %%edx, %%eax" \
544 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
545 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
546 __a; })
547 #define tmulscale10(a,d,b,c,S,D) \
548 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
549 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
550 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
551 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $10, %%edx, %%eax" \
552 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
553 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
554 __a; })
555 #define tmulscale11(a,d,b,c,S,D) \
556 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
557 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
558 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
559 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $11, %%edx, %%eax" \
560 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
561 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
562 __a; })
563 #define tmulscale12(a,d,b,c,S,D) \
564 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
565 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
566 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
567 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $12, %%edx, %%eax" \
568 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
569 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
570 __a; })
571 #define tmulscale13(a,d,b,c,S,D) \
572 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
573 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
574 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
575 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $13, %%edx, %%eax" \
576 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
577 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
578 __a; })
579 #define tmulscale14(a,d,b,c,S,D) \
580 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
581 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
582 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
583 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $14, %%edx, %%eax" \
584 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
585 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
586 __a; })
587 #define tmulscale15(a,d,b,c,S,D) \
588 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
589 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
590 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
591 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $15, %%edx, %%eax" \
592 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
593 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
594 __a; })
595 #define tmulscale16(a,d,b,c,S,D) \
596 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
597 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
598 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
599 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $16, %%edx, %%eax" \
600 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
601 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
602 __a; })
603 #define tmulscale17(a,d,b,c,S,D) \
604 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
605 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
606 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
607 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $17, %%edx, %%eax" \
608 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
609 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
610 __a; })
611 #define tmulscale18(a,d,b,c,S,D) \
612 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
613 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
614 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
615 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $18, %%edx, %%eax" \
616 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
617 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
618 __a; })
619 #define tmulscale19(a,d,b,c,S,D) \
620 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
621 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
622 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
623 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $19, %%edx, %%eax" \
624 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
625 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
626 __a; })
627 #define tmulscale20(a,d,b,c,S,D) \
628 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
629 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
630 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
631 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $20, %%edx, %%eax" \
632 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
633 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
634 __a; })
635 #define tmulscale21(a,d,b,c,S,D) \
636 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
637 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
638 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
639 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $21, %%edx, %%eax" \
640 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
641 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
642 __a; })
643 #define tmulscale22(a,d,b,c,S,D) \
644 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
645 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
646 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
647 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $22, %%edx, %%eax" \
648 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
649 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
650 __a; })
651 #define tmulscale23(a,d,b,c,S,D) \
652 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
653 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
654 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
655 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $23, %%edx, %%eax" \
656 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
657 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
658 __a; })
659 #define tmulscale24(a,d,b,c,S,D) \
660 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
661 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
662 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
663 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $24, %%edx, %%eax" \
664 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
665 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
666 __a; })
667 #define tmulscale25(a,d,b,c,S,D) \
668 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
669 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
670 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
671 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $25, %%edx, %%eax" \
672 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
673 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
674 __a; })
675 #define tmulscale26(a,d,b,c,S,D) \
676 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
677 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
678 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
679 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $26, %%edx, %%eax" \
680 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
681 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
682 __a; })
683 #define tmulscale27(a,d,b,c,S,D) \
684 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
685 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
686 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
687 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $27, %%edx, %%eax" \
688 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
689 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
690 __a; })
691 #define tmulscale28(a,d,b,c,S,D) \
692 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
693 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
694 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
695 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $28, %%edx, %%eax" \
696 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
697 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
698 __a; })
699 #define tmulscale29(a,d,b,c,S,D) \
700 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
701 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
702 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
703 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $29, %%edx, %%eax" \
704 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
705 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
706 __a; })
707 #define tmulscale30(a,d,b,c,S,D) \
708 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
709 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
710 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
711 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $30, %%edx, %%eax" \
712 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
713 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
714 __a; })
715 #define tmulscale31(a,d,b,c,S,D) \
716 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
717 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
718 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
719 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $31, %%edx, %%eax" \
720 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
721 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
722 __a; })
723 #define tmulscale32(a,d,b,c,S,D) \
724 ({ int __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
725 __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
726 "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
727 "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx" \
728 : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
729 : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
730 __d; })
731
732 #define divscale(a,b,c) \
733 ({ int __a=(a), __b=(b), __c=(c); \
734 __asm__ __volatile__ ("movl %%eax, %%edx; shll %%cl, %%eax; negb %%cl; sarl %%cl, %%edx; idivl %%ebx" \
735 : "=a" (__a) : "a" (__a), "c" (__c), "b" (__b) : "edx", "cc"); \
736 __a; })
737 #define divscale1(a,b) \
738 ({ int __a=(a), __b=(b); \
739 __asm__ __volatile__ ("addl %%eax, %%eax; sbbl %%edx, %%edx; idivl %%ebx" \
740 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
741 __a; })
742 #define divscale2(a,b) \
743 ({ int __a=(a), __b=(b); \
744 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $30, %%edx; leal (,%%eax,4), %%eax; idivl %%ebx" \
745 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
746 __a; })
747 #define divscale3(a,b) \
748 ({ int __a=(a), __b=(b); \
749 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $29, %%edx; leal (,%%eax,8), %%eax; idivl %%ebx" \
750 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
751 __a; })
752 #define divscale4(a,b) \
753 ({ int __a=(a), __b=(b); \
754 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $28, %%edx; shll $4, %%eax; idivl %%ebx" \
755 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
756 __a; })
757 #define divscale5(a,b) \
758 ({ int __a=(a), __b=(b); \
759 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $27, %%edx; shll $5, %%eax; idivl %%ebx" \
760 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
761 __a; })
762 #define divscale6(a,b) \
763 ({ int __a=(a), __b=(b); \
764 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $26, %%edx; shll $6, %%eax; idivl %%ebx" \
765 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
766 __a; })
767 #define divscale7(a,b) \
768 ({ int __a=(a), __b=(b); \
769 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $25, %%edx; shll $7, %%eax; idivl %%ebx" \
770 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
771 __a; })
772 #define divscale8(a,b) \
773 ({ int __a=(a), __b=(b); \
774 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $24, %%edx; shll $8, %%eax; idivl %%ebx" \
775 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
776 __a; })
777 #define divscale9(a,b) \
778 ({ int __a=(a), __b=(b); \
779 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $23, %%edx; shll $9, %%eax; idivl %%ebx" \
780 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
781 __a; })
782 #define divscale10(a,b) \
783 ({ int __a=(a), __b=(b); \
784 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $22, %%edx; shll $10, %%eax; idivl %%ebx" \
785 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
786 __a; })
787 #define divscale11(a,b) \
788 ({ int __a=(a), __b=(b); \
789 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $21, %%edx; shll $11, %%eax; idivl %%ebx" \
790 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
791 __a; })
792 #define divscale12(a,b) \
793 ({ int __a=(a), __b=(b); \
794 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $20, %%edx; shll $12, %%eax; idivl %%ebx" \
795 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
796 __a; })
797 #define divscale13(a,b) \
798 ({ int __a=(a), __b=(b); \
799 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $19, %%edx; shll $13, %%eax; idivl %%ebx" \
800 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
801 __a; })
802 #define divscale14(a,b) \
803 ({ int __a=(a), __b=(b); \
804 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $18, %%edx; shll $14, %%eax; idivl %%ebx" \
805 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
806 __a; })
807 #define divscale15(a,b) \
808 ({ int __a=(a), __b=(b); \
809 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $17, %%edx; shll $15, %%eax; idivl %%ebx" \
810 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
811 __a; })
812 #define divscale16(a,b) \
813 ({ int __a=(a), __b=(b); \
814 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $16, %%edx; shll $16, %%eax; idivl %%ebx" \
815 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
816 __a; })
817 #define divscale17(a,b) \
818 ({ int __a=(a), __b=(b); \
819 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $15, %%edx; shll $17, %%eax; idivl %%ebx" \
820 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
821 __a; })
822 #define divscale18(a,b) \
823 ({ int __a=(a), __b=(b); \
824 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $14, %%edx; shll $18, %%eax; idivl %%ebx" \
825 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
826 __a; })
827 #define divscale19(a,b) \
828 ({ int __a=(a), __b=(b); \
829 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $13, %%edx; shll $19, %%eax; idivl %%ebx" \
830 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
831 __a; })
832 #define divscale20(a,b) \
833 ({ int __a=(a), __b=(b); \
834 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $12, %%edx; shll $20, %%eax; idivl %%ebx" \
835 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
836 __a; })
837 #define divscale21(a,b) \
838 ({ int __a=(a), __b=(b); \
839 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $11, %%edx; shll $21, %%eax; idivl %%ebx" \
840 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
841 __a; })
842 #define divscale22(a,b) \
843 ({ int __a=(a), __b=(b); \
844 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $10, %%edx; shll $22, %%eax; idivl %%ebx" \
845 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
846 __a; })
847 #define divscale23(a,b) \
848 ({ int __a=(a), __b=(b); \
849 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $9, %%edx; shll $23, %%eax; idivl %%ebx" \
850 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
851 __a; })
852 #define divscale24(a,b) \
853 ({ int __a=(a), __b=(b); \
854 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $8, %%edx; shll $24, %%eax; idivl %%ebx" \
855 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
856 __a; })
857 #define divscale25(a,b) \
858 ({ int __a=(a), __b=(b); \
859 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $7, %%edx; shll $25, %%eax; idivl %%ebx" \
860 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
861 __a; })
862 #define divscale26(a,b) \
863 ({ int __a=(a), __b=(b); \
864 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $6, %%edx; shll $26, %%eax; idivl %%ebx" \
865 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
866 __a; })
867 #define divscale27(a,b) \
868 ({ int __a=(a), __b=(b); \
869 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $5, %%edx; shll $27, %%eax; idivl %%ebx" \
870 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
871 __a; })
872 #define divscale28(a,b) \
873 ({ int __a=(a), __b=(b); \
874 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $4, %%edx; shll $28, %%eax; idivl %%ebx" \
875 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
876 __a; })
877 #define divscale29(a,b) \
878 ({ int __a=(a), __b=(b); \
879 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $3, %%edx; shll $29, %%eax; idivl %%ebx" \
880 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
881 __a; })
882 #define divscale30(a,b) \
883 ({ int __a=(a), __b=(b); \
884 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $2, %%edx; shll $30, %%eax; idivl %%ebx" \
885 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
886 __a; })
887 #define divscale31(a,b) \
888 ({ int __a=(a), __b=(b); \
889 __asm__ __volatile__ ("movl %%eax, %%edx; sarl $1, %%edx; shll $31, %%eax; idivl %%ebx" \
890 : "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
891 __a; })
892 #define divscale32(d,b) \
893 ({ int __d=(d), __b=(b), __r; \
894 __asm__ __volatile__ ("xorl %%eax, %%eax; idivl %%ebx" \
895 : "=a" (__r), "=d" (__d) : "d" (__d), "b" (__b) : "cc"); \
896 __r; })
897
898 #define readpixel(D) \
899 ({ void *__D=(D); int __a; \
900 __asm__ __volatile__ ("movb (%%edi), %%al" \
901 : "=a" (__a): "D" (__D) : "cc"); \
902 __a; })
903 #define drawpixel(D,a) \
904 ({ void *__D=(D); int __a=(a); \
905 __asm__ __volatile__ ("movb %%al, (%%edi)" \
906 : : "D" (__D), "a" (__a) : "memory", "cc"); \
907 0; })
908 #define drawpixels(D,a) \
909 ({ void *__D=(D); int __a=(a); \
910 __asm__ __volatile__ ("movw %%ax, (%%edi)" \
911 : : "D" (__D), "a" (__a) : "memory", "cc"); \
912 0; })
913 #define drawpixelses(D,a) \
914 ({ void *__D=(D); int __a=(a); \
915 __asm__ __volatile__ ("movl %%eax, (%%edi)" \
916 : : "D" (__D), "a" (__a) : "memory", "cc"); \
917 0; })
918 #define clearbuf(D,c,a) \
919 ({ void *__D=(D); int __c=(c), __a=(a); \
920 __asm__ __volatile__ ("rep; stosl" \
921 : "=&D" (__D), "=&c" (__c) : "0" (__D), "1" (__c), "a" (__a) : "memory", "cc"); \
922 0; })
923 #define copybuf(S,D,c) \
924 ({ void *__S=(S), *__D=(D); int __c=(c); \
925 __asm__ __volatile__ ("rep; movsl" \
926 : "=&S" (__S), "=&D" (__D), "=&c" (__c) : "0" (__S), "1" (__D), "2" (__c) : "memory", "cc"); \
927 0; })
928
929 #define mul3(a) \
930 ({ int __a=(a), __r; \
931 __asm__ __volatile__ ("lea (%1,%1,2), %0" \
932 : "=r" (__r) : "0" (__a) : "cc"); \
933 __r; })
934 #define mul5(a) \
935 ({ int __a=(a), __r; \
936 __asm__ __volatile__ ("lea (%1,%1,4), %0" \
937 : "=r" (__r) : "0" (__a) : "cc"); \
938 __r; })
939 #define mul9(a) \
940 ({ int __a=(a), __r; \
941 __asm__ __volatile__ ("lea (%1,%1,8), %0" \
942 : "=r" (__r) : "0" (__a) : "cc"); \
943 __r; })
944
945 //returns eax/ebx, dmval = eax%edx;
946 #define divmod(a,b) \
947 ({ int __a=(a), __b=(b); \
948 __asm__ __volatile__ ("xorl %%edx, %%edx; divl %%ebx; movl %%edx, %[dmval]" \
949 : "+a" (__a) : "b" (__b), [dmval] "m" (dmval) : "edx", "memory", "cc"); \
950 __a; })
951 //returns eax%ebx, dmval = eax/edx;
952 #define moddiv(a,b) \
953 ({ int __a=(a), __b=(b), __d; \
954 __asm__ __volatile__ ("xorl %%edx, %%edx; divl %%ebx; movl %%eax, %[dmval]" \
955 : "=d" (__d) : "a" (__a), "b" (__b), [dmval] "m" (dmval) : "eax", "memory", "cc"); \
956 __d; })
957
958 #define klabs(a) \
959 ({ int __a=(a); \
960 __asm__ __volatile__ ("testl %%eax, %%eax; jns 0f; negl %%eax; 0:" \
961 : "=a" (__a) : "a" (__a) : "cc"); \
962 __a; })
963 #define ksgn(b) \
964 ({ int __b=(b), __r; \
965 __asm__ __volatile__ ("addl %%ebx, %%ebx; sbbl %%eax, %%eax; cmpl %%ebx, %%eax; adcb $0, %%al" \
966 : "=a" (__r) : "b" (__b) : "cc"); \
967 __r; })
968
969 #define umin(a,b) \
970 ({ int __a=(a), __b=(b); \
971 __asm__ __volatile__ ("subl %%ebx, %%eax; sbbl %%ecx, %%ecx; andl %%ecx, %%eax; addl %%ebx, %%eax" \
972 : "=a" (__a) : "a" (__a), "b" (__b) : "ecx", "cc"); \
973 __a; })
974 #define umax(a,b) \
975 ({ int __a=(a), __b=(b); \
976 __asm__ __volatile__ ("subl %%ebx, %%eax; sbbl %%ecx, %%ecx; xorl $0xffffffff, %%ecx; andl %%ecx, %%eax; addl %%ebx, %%eax" \
977 : "=a" (__a) : "a" (__a), "b" (__b) : "ecx", "cc"); \
978 __a; })
979
980 #define kmin(a,b) \
981 ({ int __a=(a), __b=(b); \
982 __asm__ __volatile__ ("cmpl %%ebx, %%eax; jl 0f; movl %%ebx, %%eax; 0:" \
983 : "=a" (__a) : "a" (__a), "b" (__b) : "cc"); \
984 __a; })
985 #define kmax(a,b) \
986 ({ int __a=(a), __b=(b); \
987 __asm__ __volatile__ ("cmpl %%ebx, %%eax; jg 0f; movl %%ebx, %%eax; 0:" \
988 : "=a" (__a) : "a" (__a), "b" (__b) : "cc"); \
989 __a; })
990
991 #define swapchar(a,b) \
992 ({ void *__a=(a), *__b=(b); \
993 __asm__ __volatile__ ("movb (%%eax), %%cl; movb (%%ebx), %%ch; movb %%cl, (%%ebx); movb %%ch, (%%eax)" \
994 : : "a" (__a), "b" (__b) : "ecx", "memory", "cc"); \
995 0; })
996 #define swapshort(a,b) \
997 ({ void *__a=(a), *__b=(b); \
998 __asm__ __volatile__ ("movw (%%eax), %%cx; movw (%%ebx), %%dx; movw %%cx, (%%ebx); movw %%dx, (%%eax)" \
999 : : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \
1000 0; })
1001 #define swaplong(a,b) \
1002 ({ void *__a=(a), *__b=(b); \
1003 __asm__ __volatile__ ("movl (%%eax), %%ecx; movl (%%ebx), %%edx; movl %%ecx, (%%ebx); movl %%edx, (%%eax)" \
1004 : : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \
1005 0; })
1006 #define swapbuf4(a,b,c) \
1007 ({ void *__a=(a), *__b=(b); int __c=(c); \
1008 __asm__ __volatile__ ("0: movl (%%eax), %%esi; movl (%%ebx), %%edi; movl %%esi, (%%ebx); " \
1009 "movl %%edi, (%%eax); addl $4, %%eax; addl $4, %%ebx; decl %%ecx; jnz 0b" \
1010 : : "a" (__a), "b" (__b), "c" (__c) : "esi", "edi", "memory", "cc"); \
1011 0; })
1012 #define swap64bit(a,b) \
1013 ({ void *__a=(a), *__b=(b); \
1014 __asm__ __volatile__ ("movl (%%eax), %%ecx; movl (%%ebx), %%edx; movl %%ecx, (%%ebx); " \
1015 "movl 4(%%eax), %%ecx; movl %%edx, (%%eax); movl 4(%%ebx), %%edx; " \
1016 "movl %%ecx, 4(%%ebx); movl %%edx, 4(%%eax)" \
1017 : : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \
1018 0; })
1019
1020 //swapchar2(ptr1,ptr2,xsiz); is the same as:
1021 //swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz);
1022 #define swapchar2(a,b,S) \
1023 ({ void *__a=(a), *__b=(b); int __S=(S); \
1024 __asm__ __volatile__ ("addl %%ebx, %%esi; movw (%%eax), %%cx; movb (%%ebx), %%dl; " \
1025 "movb %%cl, (%%ebx); movb (%%esi), %%dh; movb %%ch, (%%esi); " \
1026 "movw %%dx, (%%eax)" \
1027 : "=S" (__S) : "a" (__a), "b" (__b), "S" (__S) : "ecx", "edx", "memory", "cc"); \
1028 0; })
1029
1030
1031 #define qinterpolatedown16(a,c,d,S) \
1032 ({ void *__a=(void*)(a); int __c=(c), __d=(d), __S=(S); \
1033 __asm__ __volatile__ ("movl %%ecx, %%ebx; shrl $1, %%ecx; jz 1f; " \
1034 "0: leal (%%edx,%%esi,), %%edi; sarl $16, %%edx; movl %%edx, (%%eax); " \
1035 "leal (%%edi,%%esi,), %%edx; sarl $16, %%edi; movl %%edi, 4(%%eax); " \
1036 "addl $8, %%eax; decl %%ecx; jnz 0b; testl $1, %%ebx; jz 2f; " \
1037 "1: sarl $16, %%edx; movl %%edx, (%%eax); 2:" \
1038 : "=a" (__a), "=c" (__c), "=d" (__d) : "a" (__a), "c" (__c), "d" (__d), "S" (__S) \
1039 : "ebx", "edi", "memory", "cc"); \
1040 0; })
1041
1042 #define qinterpolatedown16short(a,c,d,S) \
1043 ({ void *__a=(void*)(a); int __c=(c), __d=(d), __S=(S); \
1044 __asm__ __volatile__ ("testl %%ecx, %%ecx; jz 3f; testb $2, %%al; jz 0f; movl %%edx, %%ebx; " \
1045 "sarl $16, %%ebx; movw %%bx, (%%eax); addl %%esi, %%edx; addl $2, %%eax; " \
1046 "decl %%ecx; jz 3f; " \
1047 "0: subl $2, %%ecx; jc 2f; " \
1048 "1: movl %%edx, %%ebx; addl %%esi, %%edx; sarl $16, %%ebx; movl %%edx, %%edi; " \
1049 "andl $0xffff0000, %%edi; addl %%esi, %%edx; addl %%edi, %%ebx; " \
1050 "movl %%ebx, (%%eax); addl $4, %%eax; subl $2, %%ecx; jnc 1b; testb $1, %%cl; " \
1051 "jz 3f; " \
1052 "2: movl %%edx, %%ebx; sarl $16, %%ebx; movw %%bx, (%%eax); 3:" \
1053 : "=a" (__a), "=c" (__c), "=d" (__d) : "a" (__a), "c" (__c), "d" (__d), "S" (__S) \
1054 : "ebx", "edi", "memory", "cc"); \
1055 0; })
1056
1057
1058 //}}}
1059
1060 #elif defined(__WATCOMC__) && USE_ASM // __GNUC__ && __i386__
1061
1062 //
1063 // Watcom C inline assembler
1064 //
1065
1066 //{{{
1067 int sqr(int);
1068 #pragma aux sqr =\
1069 "imul eax, eax",\
1070 parm nomemory [eax]\
1071 modify exact [eax]\
1072 value [eax]
1073
1074 int scale(int,int,int);
1075 #pragma aux scale =\
1076 "imul edx",\
1077 "idiv ecx",\
1078 parm nomemory [eax][edx][ecx]\
1079 modify exact [eax edx]
1080
1081 int mulscale(int,int,int);
1082 #pragma aux mulscale =\
1083 "imul edx",\
1084 "shrd eax, edx, cl",\
1085 parm nomemory [eax][edx][ecx]\
1086 modify exact [eax edx]
1087
1088 int mulscale1(int,int);
1089 #pragma aux mulscale1 =\
1090 "imul edx",\
1091 "shrd eax, edx, 1",\
1092 parm nomemory [eax][edx]\
1093 modify exact [eax edx]
1094
1095 int mulscale2(int,int);
1096 #pragma aux mulscale2 =\
1097 "imul edx",\
1098 "shrd eax, edx, 2",\
1099 parm nomemory [eax][edx]\
1100 modify exact [eax edx]
1101
1102 int mulscale3(int,int);
1103 #pragma aux mulscale3 =\
1104 "imul edx",\
1105 "shrd eax, edx, 3",\
1106 parm nomemory [eax][edx]\
1107 modify exact [eax edx]
1108
1109 int mulscale4(int,int);
1110 #pragma aux mulscale4 =\
1111 "imul edx",\
1112 "shrd eax, edx, 4",\
1113 parm nomemory [eax][edx]\
1114 modify exact [eax edx]
1115
1116 int mulscale5(int,int);
1117 #pragma aux mulscale5 =\
1118 "imul edx",\
1119 "shrd eax, edx, 5",\
1120 parm nomemory [eax][edx]\
1121 modify exact [eax edx]
1122
1123 int mulscale6(int,int);
1124 #pragma aux mulscale6 =\
1125 "imul edx",\
1126 "shrd eax, edx, 6",\
1127 parm nomemory [eax][edx]\
1128 modify exact [eax edx]
1129
1130 int mulscale7(int,int);
1131 #pragma aux mulscale7 =\
1132 "imul edx",\
1133 "shrd eax, edx, 7",\
1134 parm nomemory [eax][edx]\
1135 modify exact [eax edx]
1136
1137 int mulscale8(int,int);
1138 #pragma aux mulscale8 =\
1139 "imul edx",\
1140 "shrd eax, edx, 8",\
1141 parm nomemory [eax][edx]\
1142 modify exact [eax edx]
1143
1144 int mulscale9(int,int);
1145 #pragma aux mulscale9 =\
1146 "imul edx",\
1147 "shrd eax, edx, 9",\
1148 parm nomemory [eax][edx]\
1149 modify exact [eax edx]
1150
1151 int mulscale10(int,int);
1152 #pragma aux mulscale10 =\
1153 "imul edx",\
1154 "shrd eax, edx, 10",\
1155 parm nomemory [eax][edx]\
1156 modify exact [eax edx]
1157
1158 int mulscale11(int,int);
1159 #pragma aux mulscale11 =\
1160 "imul edx",\
1161 "shrd eax, edx, 11",\
1162 parm nomemory [eax][edx]\
1163 modify exact [eax edx]
1164
1165 int mulscale12(int,int);
1166 #pragma aux mulscale12 =\
1167 "imul edx",\
1168 "shrd eax, edx, 12",\
1169 parm nomemory [eax][edx]\
1170 modify exact [eax edx]
1171
1172 int mulscale13(int,int);
1173 #pragma aux mulscale13 =\
1174 "imul edx",\
1175 "shrd eax, edx, 13",\
1176 parm nomemory [eax][edx]\
1177 modify exact [eax edx]
1178
1179 int mulscale14(int,int);
1180 #pragma aux mulscale14 =\
1181 "imul edx",\
1182 "shrd eax, edx, 14",\
1183 parm nomemory [eax][edx]\
1184 modify exact [eax edx]
1185
1186 int mulscale15(int,int);
1187 #pragma aux mulscale15 =\
1188 "imul edx",\
1189 "shrd eax, edx, 15",\
1190 parm nomemory [eax][edx]\
1191 modify exact [eax edx]
1192
1193 int mulscale16(int,int);
1194 #pragma aux mulscale16 =\
1195 "imul edx",\
1196 "shrd eax, edx, 16",\
1197 parm nomemory [eax][edx]\
1198 modify exact [eax edx]
1199
1200 int mulscale17(int,int);
1201 #pragma aux mulscale17 =\
1202 "imul edx",\
1203 "shrd eax, edx, 17",\
1204 parm nomemory [eax][edx]\
1205 modify exact [eax edx]
1206
1207 int mulscale18(int,int);
1208 #pragma aux mulscale18 =\
1209 "imul edx",\
1210 "shrd eax, edx, 18",\
1211 parm nomemory [eax][edx]\
1212 modify exact [eax edx]
1213
1214 int mulscale19(int,int);
1215 #pragma aux mulscale19 =\
1216 "imul edx",\
1217 "shrd eax, edx, 19",\
1218 parm nomemory [eax][edx]\
1219 modify exact [eax edx]
1220
1221 int mulscale20(int,int);
1222 #pragma aux mulscale20 =\
1223 "imul edx",\
1224 "shrd eax, edx, 20",\
1225 parm nomemory [eax][edx]\
1226 modify exact [eax edx]
1227
1228 int mulscale21(int,int);
1229 #pragma aux mulscale21 =\
1230 "imul edx",\
1231 "shrd eax, edx, 21",\
1232 parm nomemory [eax][edx]\
1233 modify exact [eax edx]
1234
1235 int mulscale22(int,int);
1236 #pragma aux mulscale22 =\
1237 "imul edx",\
1238 "shrd eax, edx, 22",\
1239 parm nomemory [eax][edx]\
1240 modify exact [eax edx]
1241
1242 int mulscale23(int,int);
1243 #pragma aux mulscale23 =\
1244 "imul edx",\
1245 "shrd eax, edx, 23",\
1246 parm nomemory [eax][edx]\
1247 modify exact [eax edx]
1248
1249 int mulscale24(int,int);
1250 #pragma aux mulscale24 =\
1251 "imul edx",\
1252 "shrd eax, edx, 24",\
1253 parm nomemory [eax][edx]\
1254 modify exact [eax edx]
1255
1256 int mulscale25(int,int);
1257 #pragma aux mulscale25 =\
1258 "imul edx",\
1259 "shrd eax, edx, 25",\
1260 parm nomemory [eax][edx]\
1261 modify exact [eax edx]
1262
1263 int mulscale26(int,int);
1264 #pragma aux mulscale26 =\
1265 "imul edx",\
1266 "shrd eax, edx, 26",\
1267 parm nomemory [eax][edx]\
1268 modify exact [eax edx]
1269
1270 int mulscale27(int,int);
1271 #pragma aux mulscale27 =\
1272 "imul edx",\
1273 "shrd eax, edx, 27",\
1274 parm nomemory [eax][edx]\
1275 modify exact [eax edx]
1276
1277 int mulscale28(int,int);
1278 #pragma aux mulscale28 =\
1279 "imul edx",\
1280 "shrd eax, edx, 28",\
1281 parm nomemory [eax][edx]\
1282 modify exact [eax edx]
1283
1284 int mulscale29(int,int);
1285 #pragma aux mulscale29 =\
1286 "imul edx",\
1287 "shrd eax, edx, 29",\
1288 parm nomemory [eax][edx]\
1289 modify exact [eax edx]
1290
1291 int mulscale30(int,int);
1292 #pragma aux mulscale30 =\
1293 "imul edx",\
1294 "shrd eax, edx, 30",\
1295 parm nomemory [eax][edx]\
1296 modify exact [eax edx]
1297
1298 int mulscale31(int,int);
1299 #pragma aux mulscale31 =\
1300 "imul edx",\
1301 "shrd eax, edx, 31",\
1302 parm nomemory [eax][edx]\
1303 modify exact [eax edx]
1304
1305 int mulscale32(int,int);
1306 #pragma aux mulscale32 =\
1307 "imul edx",\
1308 parm nomemory [eax][edx]\
1309 modify exact [eax edx]\
1310 value [edx]
1311
1312 int dmulscale(int,int,int,int,int);
1313 #pragma aux dmulscale =\
1314 "imul edx",\
1315 "mov ebx, eax",\
1316 "mov eax, esi",\
1317 "mov esi, edx",\
1318 "imul edi",\
1319 "add eax, ebx",\
1320 "adc edx, esi",\
1321 "shrd eax, edx, cl",\
1322 parm nomemory [eax][edx][esi][edi][ecx]\
1323 modify exact [eax ebx edx esi]
1324
1325 int dmulscale1(int,int,int,int);
1326 #pragma aux dmulscale1 =\
1327 "imul edx",\
1328 "mov ebx, eax",\
1329 "mov eax, esi",\
1330 "mov esi, edx",\
1331 "imul edi",\
1332 "add eax, ebx",\
1333 "adc edx, esi",\
1334 "shrd eax, edx, 1",\
1335 parm nomemory [eax][edx][esi][edi]\
1336 modify exact [eax ebx edx esi]
1337
1338 int dmulscale2(int,int,int,int);
1339 #pragma aux dmulscale2 =\
1340 "imul edx",\
1341 "mov ebx, eax",\
1342 "mov eax, esi",\
1343 "mov esi, edx",\
1344 "imul edi",\
1345 "add eax, ebx",\
1346 "adc edx, esi",\
1347 "shrd eax, edx, 2",\
1348 parm nomemory [eax][edx][esi][edi]\
1349 modify exact [eax ebx edx esi]
1350
1351 int dmulscale3(int,int,int,int);
1352 #pragma aux dmulscale3 =\
1353 "imul edx",\
1354 "mov ebx, eax",\
1355 "mov eax, esi",\
1356 "mov esi, edx",\
1357 "imul edi",\
1358 "add eax, ebx",\
1359 "adc edx, esi",\
1360 "shrd eax, edx, 3",\
1361 parm nomemory [eax][edx][esi][edi]\
1362 modify exact [eax ebx edx esi]
1363
1364 int dmulscale4(int,int,int,int);
1365 #pragma aux dmulscale4 =\
1366 "imul edx",\
1367 "mov ebx, eax",\
1368 "mov eax, esi",\
1369 "mov esi, edx",\
1370 "imul edi",\
1371 "add eax, ebx",\
1372 "adc edx, esi",\
1373 "shrd eax, edx, 4",\
1374 parm nomemory [eax][edx][esi][edi]\
1375 modify exact [eax ebx edx esi]
1376
1377 int dmulscale5(int,int,int,int);
1378 #pragma aux dmulscale5 =\
1379 "imul edx",\
1380 "mov ebx, eax",\
1381 "mov eax, esi",\
1382 "mov esi, edx",\
1383 "imul edi",\
1384 "add eax, ebx",\
1385 "adc edx, esi",\
1386 "shrd eax, edx, 5",\
1387 parm nomemory [eax][edx][esi][edi]\
1388 modify exact [eax ebx edx esi]
1389
1390 int dmulscale6(int,int,int,int);
1391 #pragma aux dmulscale6 =\
1392 "imul edx",\
1393 "mov ebx, eax",\
1394 "mov eax, esi",\
1395 "mov esi, edx",\
1396 "imul edi",\
1397 "add eax, ebx",\
1398 "adc edx, esi",\
1399 "shrd eax, edx, 6",\
1400 parm nomemory [eax][edx][esi][edi]\
1401 modify exact [eax ebx edx esi]
1402
1403 int dmulscale7(int,int,int,int);
1404 #pragma aux dmulscale7 =\
1405 "imul edx",\
1406 "mov ebx, eax",\
1407 "mov eax, esi",\
1408 "mov esi, edx",\
1409 "imul edi",\
1410 "add eax, ebx",\
1411 "adc edx, esi",\
1412 "shrd eax, edx, 7",\
1413 parm nomemory [eax][edx][esi][edi]\
1414 modify exact [eax ebx edx esi]
1415
1416 int dmulscale8(int,int,int,int);
1417 #pragma aux dmulscale8 =\
1418 "imul edx",\
1419 "mov ebx, eax",\
1420 "mov eax, esi",\
1421 "mov esi, edx",\
1422 "imul edi",\
1423 "add eax, ebx",\
1424 "adc edx, esi",\
1425 "shrd eax, edx, 8",\
1426 parm nomemory [eax][edx][esi][edi]\
1427 modify exact [eax ebx edx esi]
1428
1429 int dmulscale9(int,int,int,int);
1430 #pragma aux dmulscale9 =\
1431 "imul edx",\
1432 "mov ebx, eax",\
1433 "mov eax, esi",\
1434 "mov esi, edx",\
1435 "imul edi",\
1436 "add eax, ebx",\
1437 "adc edx, esi",\
1438 "shrd eax, edx, 9",\
1439 parm nomemory [eax][edx][esi][edi]\
1440 modify exact [eax ebx edx esi]
1441
1442 int dmulscale10(int,int,int,int);
1443 #pragma aux dmulscale10 =\
1444 "imul edx",\
1445 "mov ebx, eax",\
1446 "mov eax, esi",\
1447 "mov esi, edx",\
1448 "imul edi",\
1449 "add eax, ebx",\
1450 "adc edx, esi",\
1451 "shrd eax, edx, 10",\
1452 parm nomemory [eax][edx][esi][edi]\
1453 modify exact [eax ebx edx esi]
1454
1455 int dmulscale11(int,int,int,int);
1456 #pragma aux dmulscale11 =\
1457 "imul edx",\
1458 "mov ebx, eax",\
1459 "mov eax, esi",\
1460 "mov esi, edx",\
1461 "imul edi",\
1462 "add eax, ebx",\
1463 "adc edx, esi",\
1464 "shrd eax, edx, 11",\
1465 parm nomemory [eax][edx][esi][edi]\
1466 modify exact [eax ebx edx esi]
1467
1468 int dmulscale12(int,int,int,int);
1469 #pragma aux dmulscale12 =\
1470 "imul edx",\
1471 "mov ebx, eax",\
1472 "mov eax, esi",\
1473 "mov esi, edx",\
1474 "imul edi",\
1475 "add eax, ebx",\
1476 "adc edx, esi",\
1477 "shrd eax, edx, 12",\
1478 parm nomemory [eax][edx][esi][edi]\
1479 modify exact [eax ebx edx esi]
1480
1481 int dmulscale13(int,int,int,int);
1482 #pragma aux dmulscale13 =\
1483 "imul edx",\
1484 "mov ebx, eax",\
1485 "mov eax, esi",\
1486 "mov esi, edx",\
1487 "imul edi",\
1488 "add eax, ebx",\
1489 "adc edx, esi",\
1490 "shrd eax, edx, 13",\
1491 parm nomemory [eax][edx][esi][edi]\
1492 modify exact [eax ebx edx esi]
1493
1494 int dmulscale14(int,int,int,int);
1495 #pragma aux dmulscale14 =\
1496 "imul edx",\
1497 "mov ebx, eax",\
1498 "mov eax, esi",\
1499 "mov esi, edx",\
1500 "imul edi",\
1501 "add eax, ebx",\
1502 "adc edx, esi",\
1503 "shrd eax, edx, 14",\
1504 parm nomemory [eax][edx][esi][edi]\
1505 modify exact [eax ebx edx esi]
1506
1507 int dmulscale15(int,int,int,int);
1508 #pragma aux dmulscale15 =\
1509 "imul edx",\
1510 "mov ebx, eax",\
1511 "mov eax, esi",\
1512 "mov esi, edx",\
1513 "imul edi",\
1514 "add eax, ebx",\
1515 "adc edx, esi",\
1516 "shrd eax, edx, 15",\
1517 parm nomemory [eax][edx][esi][edi]\
1518 modify exact [eax ebx edx esi]
1519
1520 int dmulscale16(int,int,int,int);
1521 #pragma aux dmulscale16 =\
1522 "imul edx",\
1523 "mov ebx, eax",\
1524 "mov eax, esi",\
1525 "mov esi, edx",\
1526 "imul edi",\
1527 "add eax, ebx",\
1528 "adc edx, esi",\
1529 "shrd eax, edx, 16",\
1530 parm nomemory [eax][edx][esi][edi]\
1531 modify exact [eax ebx edx esi]
1532
1533 int dmulscale17(int,int,int,int);
1534 #pragma aux dmulscale17 =\
1535 "imul edx",\
1536 "mov ebx, eax",\
1537 "mov eax, esi",\
1538 "mov esi, edx",\
1539 "imul edi",\
1540 "add eax, ebx",\
1541 "adc edx, esi",\
1542 "shrd eax, edx, 17",\
1543 parm nomemory [eax][edx][esi][edi]\
1544 modify exact [eax ebx edx esi]
1545
1546 int dmulscale18(int,int,int,int);
1547 #pragma aux dmulscale18 =\
1548 "imul edx",\
1549 "mov ebx, eax",\
1550 "mov eax, esi",\
1551 "mov esi, edx",\
1552 "imul edi",\
1553 "add eax, ebx",\
1554 "adc edx, esi",\
1555 "shrd eax, edx, 18",\
1556 parm nomemory [eax][edx][esi][edi]\
1557 modify exact [eax ebx edx esi]
1558
1559 int dmulscale19(int,int,int,int);
1560 #pragma aux dmulscale19 =\
1561 "imul edx",\
1562 "mov ebx, eax",\
1563 "mov eax, esi",\
1564 "mov esi, edx",\
1565 "imul edi",\
1566 "add eax, ebx",\
1567 "adc edx, esi",\
1568 "shrd eax, edx, 19",\
1569 parm nomemory [eax][edx][esi][edi]\
1570 modify exact [eax ebx edx esi]
1571
1572 int dmulscale20(int,int,int,int);
1573 #pragma aux dmulscale20 =\
1574 "imul edx",\
1575 "mov ebx, eax",\
1576 "mov eax, esi",\
1577 "mov esi, edx",\
1578 "imul edi",\
1579 "add eax, ebx",\
1580 "adc edx, esi",\
1581 "shrd eax, edx, 20",\
1582 parm nomemory [eax][edx][esi][edi]\
1583 modify exact [eax ebx edx esi]
1584
1585 int dmulscale21(int,int,int,int);
1586 #pragma aux dmulscale21 =\
1587 "imul edx",\
1588 "mov ebx, eax",\
1589 "mov eax, esi",\
1590 "mov esi, edx",\
1591 "imul edi",\
1592 "add eax, ebx",\
1593 "adc edx, esi",\
1594 "shrd eax, edx, 21",\
1595 parm nomemory [eax][edx][esi][edi]\
1596 modify exact [eax ebx edx esi]
1597
1598 int dmulscale22(int,int,int,int);
1599 #pragma aux dmulscale22 =\
1600 "imul edx",\
1601 "mov ebx, eax",\
1602 "mov eax, esi",\
1603 "mov esi, edx",\
1604 "imul edi",\
1605 "add eax, ebx",\
1606 "adc edx, esi",\
1607 "shrd eax, edx, 22",\
1608 parm nomemory [eax][edx][esi][edi]\
1609 modify exact [eax ebx edx esi]
1610
1611 int dmulscale23(int,int,int,int);
1612 #pragma aux dmulscale23 =\
1613 "imul edx",\
1614 "mov ebx, eax",\
1615 "mov eax, esi",\
1616 "mov esi, edx",\
1617 "imul edi",\
1618 "add eax, ebx",\
1619 "adc edx, esi",\
1620 "shrd eax, edx, 23",\
1621 parm nomemory [eax][edx][esi][edi]\
1622 modify exact [eax ebx edx esi]
1623
1624 int dmulscale24(int,int,int,int);
1625 #pragma aux dmulscale24 =\
1626 "imul edx",\
1627 "mov ebx, eax",\
1628 "mov eax, esi",\
1629 "mov esi, edx",\
1630 "imul edi",\
1631 "add eax, ebx",\
1632 "adc edx, esi",\
1633 "shrd eax, edx, 24",\
1634 parm nomemory [eax][edx][esi][edi]\
1635 modify exact [eax ebx edx esi]
1636
1637 int dmulscale25(int,int,int,int);
1638 #pragma aux dmulscale25 =\
1639 "imul edx",\
1640 "mov ebx, eax",\
1641 "mov eax, esi",\
1642 "mov esi, edx",\
1643 "imul edi",\
1644 "add eax, ebx",\
1645 "adc edx, esi",\
1646 "shrd eax, edx, 25",\
1647 parm nomemory [eax][edx][esi][edi]\
1648 modify exact [eax ebx edx esi]
1649
1650 int dmulscale26(int,int,int,int);
1651 #pragma aux dmulscale26 =\
1652 "imul edx",\
1653 "mov ebx, eax",\
1654 "mov eax, esi",\
1655 "mov esi, edx",\
1656 "imul edi",\
1657 "add eax, ebx",\
1658 "adc edx, esi",\
1659 "shrd eax, edx, 26",\
1660 parm nomemory [eax][edx][esi][edi]\
1661 modify exact [eax ebx edx esi]
1662
1663 int dmulscale27(int,int,int,int);
1664 #pragma aux dmulscale27 =\
1665 "imul edx",\
1666 "mov ebx, eax",\
1667 "mov eax, esi",\
1668 "mov esi, edx",\
1669 "imul edi",\
1670 "add eax, ebx",\
1671 "adc edx, esi",\
1672 "shrd eax, edx, 27",\
1673 parm nomemory [eax][edx][esi][edi]\
1674 modify exact [eax ebx edx esi]
1675
1676 int dmulscale28(int,int,int,int);
1677 #pragma aux dmulscale28 =\
1678 "imul edx",\
1679 "mov ebx, eax",\
1680 "mov eax, esi",\
1681 "mov esi, edx",\
1682 "imul edi",\
1683 "add eax, ebx",\
1684 "adc edx, esi",\
1685 "shrd eax, edx, 28",\
1686 parm nomemory [eax][edx][esi][edi]\
1687 modify exact [eax ebx edx esi]
1688
1689 int dmulscale29(int,int,int,int);
1690 #pragma aux dmulscale29 =\
1691 "imul edx",\
1692 "mov ebx, eax",\
1693 "mov eax, esi",\
1694 "mov esi, edx",\
1695 "imul edi",\
1696 "add eax, ebx",\
1697 "adc edx, esi",\
1698 "shrd eax, edx, 29",\
1699 parm nomemory [eax][edx][esi][edi]\
1700 modify exact [eax ebx edx esi]
1701
1702 int dmulscale30(int,int,int,int);
1703 #pragma aux dmulscale30 =\
1704 "imul edx",\
1705 "mov ebx, eax",\
1706 "mov eax, esi",\
1707 "mov esi, edx",\
1708 "imul edi",\
1709 "add eax, ebx",\
1710 "adc edx, esi",\
1711 "shrd eax, edx, 30",\
1712 parm nomemory [eax][edx][esi][edi]\
1713 modify exact [eax ebx edx esi]
1714
1715 int dmulscale31(int,int,int,int);
1716 #pragma aux dmulscale31 =\
1717 "imul edx",\
1718 "mov ebx, eax",\
1719 "mov eax, esi",\
1720 "mov esi, edx",\
1721 "imul edi",\
1722 "add eax, ebx",\
1723 "adc edx, esi",\
1724 "shrd eax, edx, 31",\
1725 parm nomemory [eax][edx][esi][edi]\
1726 modify exact [eax ebx edx esi]
1727
1728 int dmulscale32(int,int,int,int);
1729 #pragma aux dmulscale32 =\
1730 "imul edx",\
1731 "mov ebx, eax",\
1732 "mov eax, esi",\
1733 "mov esi, edx",\
1734 "imul edi",\
1735 "add eax, ebx",\
1736 "adc edx, esi",\
1737 parm nomemory [eax][edx][esi][edi]\
1738 modify exact [eax ebx edx esi]\
1739 value [edx]
1740
1741 int tmulscale1(int,int,int,int,int,int);
1742 #pragma aux tmulscale1 =\
1743 "imul edx",\
1744 "xchg eax, ebx",\
1745 "xchg edx, ecx",\
1746 "imul edx",\
1747 "add ebx, eax",\
1748 "adc ecx, edx",\
1749 "mov eax, esi",\
1750 "imul edi",\
1751 "add eax, ebx",\
1752 "adc edx, ecx",\
1753 "shrd eax, edx, 1",\
1754 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1755 modify exact [eax ebx ecx edx]
1756
1757 int tmulscale2(int,int,int,int,int,int);
1758 #pragma aux tmulscale2 =\
1759 "imul edx",\
1760 "xchg eax, ebx",\
1761 "xchg edx, ecx",\
1762 "imul edx",\
1763 "add ebx, eax",\
1764 "adc ecx, edx",\
1765 "mov eax, esi",\
1766 "imul edi",\
1767 "add eax, ebx",\
1768 "adc edx, ecx",\
1769 "shrd eax, edx, 2",\
1770 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1771 modify exact [eax ebx ecx edx]
1772
1773 int tmulscale3(int,int,int,int,int,int);
1774 #pragma aux tmulscale3 =\
1775 "imul edx",\
1776 "xchg eax, ebx",\
1777 "xchg edx, ecx",\
1778 "imul edx",\
1779 "add ebx, eax",\
1780 "adc ecx, edx",\
1781 "mov eax, esi",\
1782 "imul edi",\
1783 "add eax, ebx",\
1784 "adc edx, ecx",\
1785 "shrd eax, edx, 3",\
1786 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1787 modify exact [eax ebx ecx edx]
1788
1789 int tmulscale4(int,int,int,int,int,int);
1790 #pragma aux tmulscale4 =\
1791 "imul edx",\
1792 "xchg eax, ebx",\
1793 "xchg edx, ecx",\
1794 "imul edx",\
1795 "add ebx, eax",\
1796 "adc ecx, edx",\
1797 "mov eax, esi",\
1798 "imul edi",\
1799 "add eax, ebx",\
1800 "adc edx, ecx",\
1801 "shrd eax, edx, 4",\
1802 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1803 modify exact [eax ebx ecx edx]
1804
1805 int tmulscale5(int,int,int,int,int,int);
1806 #pragma aux tmulscale5 =\
1807 "imul edx",\
1808 "xchg eax, ebx",\
1809 "xchg edx, ecx",\
1810 "imul edx",\
1811 "add ebx, eax",\
1812 "adc ecx, edx",\
1813 "mov eax, esi",\
1814 "imul edi",\
1815 "add eax, ebx",\
1816 "adc edx, ecx",\
1817 "shrd eax, edx, 5",\
1818 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1819 modify exact [eax ebx ecx edx]
1820
1821 int tmulscale6(int,int,int,int,int,int);
1822 #pragma aux tmulscale6 =\
1823 "imul edx",\
1824 "xchg eax, ebx",\
1825 "xchg edx, ecx",\
1826 "imul edx",\
1827 "add ebx, eax",\
1828 "adc ecx, edx",\
1829 "mov eax, esi",\
1830 "imul edi",\
1831 "add eax, ebx",\
1832 "adc edx, ecx",\
1833 "shrd eax, edx, 6",\
1834 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1835 modify exact [eax ebx ecx edx]
1836
1837 int tmulscale7(int,int,int,int,int,int);
1838 #pragma aux tmulscale7 =\
1839 "imul edx",\
1840 "xchg eax, ebx",\
1841 "xchg edx, ecx",\
1842 "imul edx",\
1843 "add ebx, eax",\
1844 "adc ecx, edx",\
1845 "mov eax, esi",\
1846 "imul edi",\
1847 "add eax, ebx",\
1848 "adc edx, ecx",\
1849 "shrd eax, edx, 7",\
1850 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1851 modify exact [eax ebx ecx edx]
1852
1853 int tmulscale8(int,int,int,int,int,int);
1854 #pragma aux tmulscale8 =\
1855 "imul edx",\
1856 "xchg eax, ebx",\
1857 "xchg edx, ecx",\
1858 "imul edx",\
1859 "add ebx, eax",\
1860 "adc ecx, edx",\
1861 "mov eax, esi",\
1862 "imul edi",\
1863 "add eax, ebx",\
1864 "adc edx, ecx",\
1865 "shrd eax, edx, 8",\
1866 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1867 modify exact [eax ebx ecx edx]
1868
1869 int tmulscale9(int,int,int,int,int,int);
1870 #pragma aux tmulscale9 =\
1871 "imul edx",\
1872 "xchg eax, ebx",\
1873 "xchg edx, ecx",\
1874 "imul edx",\
1875 "add ebx, eax",\
1876 "adc ecx, edx",\
1877 "mov eax, esi",\
1878 "imul edi",\
1879 "add eax, ebx",\
1880 "adc edx, ecx",\
1881 "shrd eax, edx, 9",\
1882 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1883 modify exact [eax ebx ecx edx]
1884
1885 int tmulscale10(int,int,int,int,int,int);
1886 #pragma aux tmulscale10 =\
1887 "imul edx",\
1888 "xchg eax, ebx",\
1889 "xchg edx, ecx",\
1890 "imul edx",\
1891 "add ebx, eax",\
1892 "adc ecx, edx",\
1893 "mov eax, esi",\
1894 "imul edi",\
1895 "add eax, ebx",\
1896 "adc edx, ecx",\
1897 "shrd eax, edx, 10",\
1898 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1899 modify exact [eax ebx ecx edx]
1900
1901 int tmulscale11(int,int,int,int,int,int);
1902 #pragma aux tmulscale11 =\
1903 "imul edx",\
1904 "xchg eax, ebx",\
1905 "xchg edx, ecx",\
1906 "imul edx",\
1907 "add ebx, eax",\
1908 "adc ecx, edx",\
1909 "mov eax, esi",\
1910 "imul edi",\
1911 "add eax, ebx",\
1912 "adc edx, ecx",\
1913 "shrd eax, edx, 11",\
1914 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1915 modify exact [eax ebx ecx edx]
1916
1917 int tmulscale12(int,int,int,int,int,int);
1918 #pragma aux tmulscale12 =\
1919 "imul edx",\
1920 "xchg eax, ebx",\
1921 "xchg edx, ecx",\
1922 "imul edx",\
1923 "add ebx, eax",\
1924 "adc ecx, edx",\
1925 "mov eax, esi",\
1926 "imul edi",\
1927 "add eax, ebx",\
1928 "adc edx, ecx",\
1929 "shrd eax, edx, 12",\
1930 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1931 modify exact [eax ebx ecx edx]
1932
1933 int tmulscale13(int,int,int,int,int,int);
1934 #pragma aux tmulscale13 =\
1935 "imul edx",\
1936 "xchg eax, ebx",\
1937 "xchg edx, ecx",\
1938 "imul edx",\
1939 "add ebx, eax",\
1940 "adc ecx, edx",\
1941 "mov eax, esi",\
1942 "imul edi",\
1943 "add eax, ebx",\
1944 "adc edx, ecx",\
1945 "shrd eax, edx, 13",\
1946 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1947 modify exact [eax ebx ecx edx]
1948
1949 int tmulscale14(int,int,int,int,int,int);
1950 #pragma aux tmulscale14 =\
1951 "imul edx",\
1952 "xchg eax, ebx",\
1953 "xchg edx, ecx",\
1954 "imul edx",\
1955 "add ebx, eax",\
1956 "adc ecx, edx",\
1957 "mov eax, esi",\
1958 "imul edi",\
1959 "add eax, ebx",\
1960 "adc edx, ecx",\
1961 "shrd eax, edx, 14",\
1962 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1963 modify exact [eax ebx ecx edx]
1964
1965 int tmulscale15(int,int,int,int,int,int);
1966 #pragma aux tmulscale15 =\
1967 "imul edx",\
1968 "xchg eax, ebx",\
1969 "xchg edx, ecx",\
1970 "imul edx",\
1971 "add ebx, eax",\
1972 "adc ecx, edx",\
1973 "mov eax, esi",\
1974 "imul edi",\
1975 "add eax, ebx",\
1976 "adc edx, ecx",\
1977 "shrd eax, edx, 15",\
1978 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1979 modify exact [eax ebx ecx edx]
1980
1981 int tmulscale16(int,int,int,int,int,int);
1982 #pragma aux tmulscale16 =\
1983 "imul edx",\
1984 "xchg eax, ebx",\
1985 "xchg edx, ecx",\
1986 "imul edx",\
1987 "add ebx, eax",\
1988 "adc ecx, edx",\
1989 "mov eax, esi",\
1990 "imul edi",\
1991 "add eax, ebx",\
1992 "adc edx, ecx",\
1993 "shrd eax, edx, 16",\
1994 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
1995 modify exact [eax ebx ecx edx]
1996
1997 int tmulscale17(int,int,int,int,int,int);
1998 #pragma aux tmulscale17 =\
1999 "imul edx",\
2000 "xchg eax, ebx",\
2001 "xchg edx, ecx",\
2002 "imul edx",\
2003 "add ebx, eax",\
2004 "adc ecx, edx",\
2005 "mov eax, esi",\
2006 "imul edi",\
2007 "add eax, ebx",\
2008 "adc edx, ecx",\
2009 "shrd eax, edx, 17",\
2010 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2011 modify exact [eax ebx ecx edx]
2012
2013 int tmulscale18(int,int,int,int,int,int);
2014 #pragma aux tmulscale18 =\
2015 "imul edx",\
2016 "xchg eax, ebx",\
2017 "xchg edx, ecx",\
2018 "imul edx",\
2019 "add ebx, eax",\
2020 "adc ecx, edx",\
2021 "mov eax, esi",\
2022 "imul edi",\
2023 "add eax, ebx",\
2024 "adc edx, ecx",\
2025 "shrd eax, edx, 18",\
2026 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2027 modify exact [eax ebx ecx edx]
2028
2029 int tmulscale19(int,int,int,int,int,int);
2030 #pragma aux tmulscale19 =\
2031 "imul edx",\
2032 "xchg eax, ebx",\
2033 "xchg edx, ecx",\
2034 "imul edx",\
2035 "add ebx, eax",\
2036 "adc ecx, edx",\
2037 "mov eax, esi",\
2038 "imul edi",\
2039 "add eax, ebx",\
2040 "adc edx, ecx",\
2041 "shrd eax, edx, 19",\
2042 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2043 modify exact [eax ebx ecx edx]
2044
2045 int tmulscale20(int,int,int,int,int,int);
2046 #pragma aux tmulscale20 =\
2047 "imul edx",\
2048 "xchg eax, ebx",\
2049 "xchg edx, ecx",\
2050 "imul edx",\
2051 "add ebx, eax",\
2052 "adc ecx, edx",\
2053 "mov eax, esi",\
2054 "imul edi",\
2055 "add eax, ebx",\
2056 "adc edx, ecx",\
2057 "shrd eax, edx, 20",\
2058 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2059 modify exact [eax ebx ecx edx]
2060
2061 int tmulscale21(int,int,int,int,int,int);
2062 #pragma aux tmulscale21 =\
2063 "imul edx",\
2064 "xchg eax, ebx",\
2065 "xchg edx, ecx",\
2066 "imul edx",\
2067 "add ebx, eax",\
2068 "adc ecx, edx",\
2069 "mov eax, esi",\
2070 "imul edi",\
2071 "add eax, ebx",\
2072 "adc edx, ecx",\
2073 "shrd eax, edx, 21",\
2074 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2075 modify exact [eax ebx ecx edx]
2076
2077 int tmulscale22(int,int,int,int,int,int);
2078 #pragma aux tmulscale22 =\
2079 "imul edx",\
2080 "xchg eax, ebx",\
2081 "xchg edx, ecx",\
2082 "imul edx",\
2083 "add ebx, eax",\
2084 "adc ecx, edx",\
2085 "mov eax, esi",\
2086 "imul edi",\
2087 "add eax, ebx",\
2088 "adc edx, ecx",\
2089 "shrd eax, edx, 22",\
2090 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2091 modify exact [eax ebx ecx edx]
2092
2093 int tmulscale23(int,int,int,int,int,int);
2094 #pragma aux tmulscale23 =\
2095 "imul edx",\
2096 "xchg eax, ebx",\
2097 "xchg edx, ecx",\
2098 "imul edx",\
2099 "add ebx, eax",\
2100 "adc ecx, edx",\
2101 "mov eax, esi",\
2102 "imul edi",\
2103 "add eax, ebx",\
2104 "adc edx, ecx",\
2105 "shrd eax, edx, 23",\
2106 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2107 modify exact [eax ebx ecx edx]
2108
2109 int tmulscale24(int,int,int,int,int,int);
2110 #pragma aux tmulscale24 =\
2111 "imul edx",\
2112 "xchg eax, ebx",\
2113 "xchg edx, ecx",\
2114 "imul edx",\
2115 "add ebx, eax",\
2116 "adc ecx, edx",\
2117 "mov eax, esi",\
2118 "imul edi",\
2119 "add eax, ebx",\
2120 "adc edx, ecx",\
2121 "shrd eax, edx, 24",\
2122 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2123 modify exact [eax ebx ecx edx]
2124
2125 int tmulscale25(int,int,int,int,int,int);
2126 #pragma aux tmulscale25 =\
2127 "imul edx",\
2128 "xchg eax, ebx",\
2129 "xchg edx, ecx",\
2130 "imul edx",\
2131 "add ebx, eax",\
2132 "adc ecx, edx",\
2133 "mov eax, esi",\
2134 "imul edi",\
2135 "add eax, ebx",\
2136 "adc edx, ecx",\
2137 "shrd eax, edx, 25",\
2138 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2139 modify exact [eax ebx ecx edx]
2140
2141 int tmulscale26(int,int,int,int,int,int);
2142 #pragma aux tmulscale26 =\
2143 "imul edx",\
2144 "xchg eax, ebx",\
2145 "xchg edx, ecx",\
2146 "imul edx",\
2147 "add ebx, eax",\
2148 "adc ecx, edx",\
2149 "mov eax, esi",\
2150 "imul edi",\
2151 "add eax, ebx",\
2152 "adc edx, ecx",\
2153 "shrd eax, edx, 26",\
2154 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2155 modify exact [eax ebx ecx edx]
2156
2157 int tmulscale27(int,int,int,int,int,int);
2158 #pragma aux tmulscale27 =\
2159 "imul edx",\
2160 "xchg eax, ebx",\
2161 "xchg edx, ecx",\
2162 "imul edx",\
2163 "add ebx, eax",\
2164 "adc ecx, edx",\
2165 "mov eax, esi",\
2166 "imul edi",\
2167 "add eax, ebx",\
2168 "adc edx, ecx",\
2169 "shrd eax, edx, 27",\
2170 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2171 modify exact [eax ebx ecx edx]
2172
2173 int tmulscale28(int,int,int,int,int,int);
2174 #pragma aux tmulscale28 =\
2175 "imul edx",\
2176 "xchg eax, ebx",\
2177 "xchg edx, ecx",\
2178 "imul edx",\
2179 "add ebx, eax",\
2180 "adc ecx, edx",\
2181 "mov eax, esi",\
2182 "imul edi",\
2183 "add eax, ebx",\
2184 "adc edx, ecx",\
2185 "shrd eax, edx, 28",\
2186 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2187 modify exact [eax ebx ecx edx]
2188
2189 int tmulscale29(int,int,int,int,int,int);
2190 #pragma aux tmulscale29 =\
2191 "imul edx",\
2192 "xchg eax, ebx",\
2193 "xchg edx, ecx",\
2194 "imul edx",\
2195 "add ebx, eax",\
2196 "adc ecx, edx",\
2197 "mov eax, esi",\
2198 "imul edi",\
2199 "add eax, ebx",\
2200 "adc edx, ecx",\
2201 "shrd eax, edx, 29",\
2202 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2203 modify exact [eax ebx ecx edx]
2204
2205 int tmulscale30(int,int,int,int,int,int);
2206 #pragma aux tmulscale30 =\
2207 "imul edx",\
2208 "xchg eax, ebx",\
2209 "xchg edx, ecx",\
2210 "imul edx",\
2211 "add ebx, eax",\
2212 "adc ecx, edx",\
2213 "mov eax, esi",\
2214 "imul edi",\
2215 "add eax, ebx",\
2216 "adc edx, ecx",\
2217 "shrd eax, edx, 30",\
2218 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2219 modify exact [eax ebx ecx edx]
2220
2221 int tmulscale31(int,int,int,int,int,int);
2222 #pragma aux tmulscale31 =\
2223 "imul edx",\
2224 "xchg eax, ebx",\
2225 "xchg edx, ecx",\
2226 "imul edx",\
2227 "add ebx, eax",\
2228 "adc ecx, edx",\
2229 "mov eax, esi",\
2230 "imul edi",\
2231 "add eax, ebx",\
2232 "adc edx, ecx",\
2233 "shrd eax, edx, 31",\
2234 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2235 modify exact [eax ebx ecx edx]
2236
2237 int tmulscale32(int,int,int,int,int,int);
2238 #pragma aux tmulscale32 =\
2239 "imul edx",\
2240 "xchg eax, ebx",\
2241 "xchg edx, ecx",\
2242 "imul edx",\
2243 "add ebx, eax",\
2244 "adc ecx, edx",\
2245 "mov eax, esi",\
2246 "imul edi",\
2247 "add eax, ebx",\
2248 "adc edx, ecx",\
2249 parm nomemory [eax][edx][ebx][ecx][esi][edi]\
2250 modify exact [eax ebx ecx edx]\
2251 value [edx]
2252
2253 int boundmulscale(int,int,int);
2254 #pragma aux boundmulscale =\
2255 "imul ebx",\
2256 "mov ebx, edx",\
2257 "shrd eax, edx, cl",\
2258 "sar edx, cl",\
2259 "xor edx, eax",\
2260 "js checkit",\
2261 "xor edx, eax",\
2262 "jz skipboundit",\
2263 "cmp edx, 0xffffffff",\
2264 "je skipboundit",\
2265 "checkit:",\
2266 "mov eax, ebx",\
2267 "sar eax, 31",\
2268 "xor eax, 0x7fffffff",\
2269 "skipboundit:",\
2270 parm nomemory [eax][ebx][ecx]\
2271 modify exact [eax ebx edx]
2272
2273 int divscale(int,int,int);
2274 #pragma aux divscale =\
2275 "mov edx, eax",\
2276 "shl eax, cl",\
2277 "neg cl",\
2278 "sar edx, cl",\
2279 "idiv ebx",\
2280 parm nomemory [eax][ebx][ecx]\
2281 modify exact [eax ecx edx]
2282
2283 int divscale1(int,int);
2284 #pragma aux divscale1 =\
2285 "add eax, eax",\
2286 "sbb edx, edx",\
2287 "idiv ebx",\
2288 parm nomemory [eax][ebx]\
2289 modify exact [eax edx]
2290
2291 int divscale2(int,int);
2292 #pragma aux divscale2 =\
2293 "mov edx, eax",\
2294 "sar edx, 30",\
2295 "lea eax, [eax*4]",\
2296 "idiv ebx",\
2297 parm nomemory [eax][ebx]\
2298 modify exact [eax edx]
2299
2300 int divscale3(int,int);
2301 #pragma aux divscale3 =\
2302 "mov edx, eax",\
2303 "sar edx, 29",\
2304 "lea eax, [eax*8]",\
2305 "idiv ebx",\
2306 parm nomemory [eax][ebx]\
2307 modify exact [eax edx]
2308
2309 int divscale4(int,int);
2310 #pragma aux divscale4 =\
2311 "mov edx, eax",\
2312 "sar edx, 28",\
2313 "shl eax, 4",\
2314 "idiv ebx",\
2315 parm nomemory [eax][ebx]\
2316 modify exact [eax edx]
2317
2318 int divscale5(int,int);
2319 #pragma aux divscale5 =\
2320 "mov edx, eax",\
2321 "sar edx, 27",\
2322 "shl eax, 5",\
2323 "idiv ebx",\
2324 parm nomemory [eax][ebx]\
2325 modify exact [eax edx]
2326
2327 int divscale6(int,int);
2328 #pragma aux divscale6 =\
2329 "mov edx, eax",\
2330 "sar edx, 26",\
2331 "shl eax, 6",\
2332 "idiv ebx",\
2333 parm nomemory [eax][ebx]\
2334 modify exact [eax edx]
2335
2336 int divscale7(int,int);
2337 #pragma aux divscale7 =\
2338 "mov edx, eax",\
2339 "sar edx, 25",\
2340 "shl eax, 7",\
2341 "idiv ebx",\
2342 parm nomemory [eax][ebx]\
2343 modify exact [eax edx]
2344
2345 int divscale8(int,int);
2346 #pragma aux divscale8 =\
2347 "mov edx, eax",\
2348 "sar edx, 24",\
2349 "shl eax, 8",\
2350 "idiv ebx",\
2351 parm nomemory [eax][ebx]\
2352 modify exact [eax edx]
2353
2354 int divscale9(int,int);
2355 #pragma aux divscale9 =\
2356 "mov edx, eax",\
2357 "sar edx, 23",\
2358 "shl eax, 9",\
2359 "idiv ebx",\
2360 parm nomemory [eax][ebx]\
2361 modify exact [eax edx]
2362
2363 int divscale10(int,int);
2364 #pragma aux divscale10 =\
2365 "mov edx, eax",\
2366 "sar edx, 22",\
2367 "shl eax, 10",\
2368 "idiv ebx",\
2369 parm nomemory [eax][ebx]\
2370 modify exact [eax edx]
2371
2372 int divscale11(int,int);
2373 #pragma aux divscale11 =\
2374 "mov edx, eax",\
2375 "sar edx, 21",\
2376 "shl eax, 11",\
2377 "idiv ebx",\
2378 parm nomemory [eax][ebx]\
2379 modify exact [eax edx]
2380
2381 int divscale12(int,int);
2382 #pragma aux divscale12 =\
2383 "mov edx, eax",\
2384 "sar edx, 20",\
2385 "shl eax, 12",\
2386 "idiv ebx",\
2387 parm nomemory [eax][ebx]\
2388 modify exact [eax edx]
2389
2390 int divscale13(int,int);
2391 #pragma aux divscale13 =\
2392 "mov edx, eax",\
2393 "sar edx, 19",\
2394 "shl eax, 13",\
2395 "idiv ebx",\
2396 parm nomemory [eax][ebx]\
2397 modify exact [eax edx]
2398
2399 int divscale14(int,int);
2400 #pragma aux divscale14 =\
2401 "mov edx, eax",\
2402 "sar edx, 18",\
2403 "shl eax, 14",\
2404 "idiv ebx",\
2405 parm nomemory [eax][ebx]\
2406 modify exact [eax edx]
2407
2408 int divscale15(int,int);
2409 #pragma aux divscale15 =\
2410 "mov edx, eax",\
2411 "sar edx, 17",\
2412 "shl eax, 15",\
2413 "idiv ebx",\
2414 parm nomemory [eax][ebx]\
2415 modify exact [eax edx]
2416
2417 int divscale16(int,int);
2418 #pragma aux divscale16 =\
2419 "mov edx, eax",\
2420 "sar edx, 16",\
2421 "shl eax, 16",\
2422 "idiv ebx",\
2423 parm nomemory [eax][ebx]\
2424 modify exact [eax edx]
2425
2426 int divscale17(int,int);
2427 #pragma aux divscale17 =\
2428 "mov edx, eax",\
2429 "sar edx, 15",\
2430 "shl eax, 17",\
2431 "idiv ebx",\
2432 parm nomemory [eax][ebx]\
2433 modify exact [eax edx]
2434
2435 int divscale18(int,int);
2436 #pragma aux divscale18 =\
2437 "mov edx, eax",\
2438 "sar edx, 14",\
2439 "shl eax, 18",\
2440 "idiv ebx",\
2441 parm nomemory [eax][ebx]\
2442 modify exact [eax edx]
2443
2444 int divscale19(int,int);
2445 #pragma aux divscale19 =\
2446 "mov edx, eax",\
2447 "sar edx, 13",\
2448 "shl eax, 19",\
2449 "idiv ebx",\
2450 parm nomemory [eax][ebx]\
2451 modify exact [eax edx]
2452
2453 int divscale20(int,int);
2454 #pragma aux divscale20 =\
2455 "mov edx, eax",\
2456 "sar edx, 12",\
2457 "shl eax, 20",\
2458 "idiv ebx",\
2459 parm nomemory [eax][ebx]\
2460 modify exact [eax edx]
2461
2462 int divscale21(int,int);
2463 #pragma aux divscale21 =\
2464 "mov edx, eax",\
2465 "sar edx, 11",\
2466 "shl eax, 21",\
2467 "idiv ebx",\
2468 parm nomemory [eax][ebx]\
2469 modify exact [eax edx]
2470
2471 int divscale22(int,int);
2472 #pragma aux divscale22 =\
2473 "mov edx, eax",\
2474 "sar edx, 10",\
2475 "shl eax, 22",\
2476 "idiv ebx",\
2477 parm nomemory [eax][ebx]\
2478 modify exact [eax edx]
2479
2480 int divscale23(int,int);
2481 #pragma aux divscale23 =\
2482 "mov edx, eax",\
2483 "sar edx, 9",\
2484 "shl eax, 23",\
2485 "idiv ebx",\
2486 parm nomemory [eax][ebx]\
2487 modify exact [eax edx]
2488
2489 int divscale24(int,int);
2490 #pragma aux divscale24 =\
2491 "mov edx, eax",\
2492 "sar edx, 8",\
2493 "shl eax, 24",\
2494 "idiv ebx",\
2495 parm nomemory [eax][ebx]\
2496 modify exact [eax edx]
2497
2498 int divscale25(int,int);
2499 #pragma aux divscale25 =\
2500 "mov edx, eax",\
2501 "sar edx, 7",\
2502 "shl eax, 25",\
2503 "idiv ebx",\
2504 parm nomemory [eax][ebx]\
2505 modify exact [eax edx]
2506
2507 int divscale26(int,int);
2508 #pragma aux divscale26 =\
2509 "mov edx, eax",\
2510 "sar edx, 6",\
2511 "shl eax, 26",\
2512 "idiv ebx",\
2513 parm nomemory [eax][ebx]\
2514 modify exact [eax edx]
2515
2516 int divscale27(int,int);
2517 #pragma aux divscale27 =\
2518 "mov edx, eax",\
2519 "sar edx, 5",\
2520 "shl eax, 27",\
2521 "idiv ebx",\
2522 parm nomemory [eax][ebx]\
2523 modify exact [eax edx]
2524
2525 int divscale28(int,int);
2526 #pragma aux divscale28 =\
2527 "mov edx, eax",\
2528 "sar edx, 4",\
2529 "shl eax, 28",\
2530 "idiv ebx",\
2531 parm nomemory [eax][ebx]\
2532 modify exact [eax edx]
2533
2534 int divscale29(int,int);
2535 #pragma aux divscale29 =\
2536 "mov edx, eax",\
2537 "sar edx, 3",\
2538 "shl eax, 29",\
2539 "idiv ebx",\
2540 parm nomemory [eax][ebx]\
2541 modify exact [eax edx]
2542
2543 int divscale30(int,int);
2544 #pragma aux divscale30 =\
2545 "mov edx, eax",\
2546 "sar edx, 2",\
2547 "shl eax, 30",\
2548 "idiv ebx",\
2549 parm nomemory [eax][ebx]\
2550 modify exact [eax edx]
2551
2552 int divscale31(int,int);
2553 #pragma aux divscale31 =\
2554 "mov edx, eax",\
2555 "sar edx, 1",\
2556 "shl eax, 31",\
2557 "idiv ebx",\
2558 parm nomemory [eax][ebx]\
2559 modify exact [eax edx]
2560
2561 int divscale32(int,int);
2562 #pragma aux divscale32 =\
2563 "xor eax, eax",\
2564 "idiv ebx",\
2565 parm nomemory [edx][ebx]\
2566 modify exact [eax edx]
2567
2568 int readpixel(void*);
2569 #pragma aux readpixel =\
2570 "mov al, byte ptr [edi]",\
2571 parm nomemory [edi]\
2572 modify exact [eax]
2573
2574 int drawpixel(void*,int);
2575 #pragma aux drawpixel =\
2576 "mov byte ptr [edi], al",\
2577 parm [edi][eax]\
2578 modify exact
2579
2580 int drawpixels(void*,int);
2581 #pragma aux drawpixels =\
2582 "mov word ptr [edi], ax",\
2583 parm [edi][eax]\
2584 modify exact
2585
2586 int drawpixelses(void*,int);
2587 #pragma aux drawpixelses =\
2588 "mov dword ptr [edi], eax",\
2589 parm [edi][eax]\
2590 modify exact
2591
2592 int clearbuf(void*,int,int);
2593 #pragma aux clearbuf =\
2594 "rep stosd",\
2595 parm [edi][ecx][eax]\
2596 modify exact [edi ecx]
2597
2598 int clearbufbyte(void*,int,int);
2599 #pragma aux clearbufbyte =\
2600 "cmp ecx, 4",\
2601 "jae longcopy",\
2602 "test cl, 1",\
2603 "jz preskip",\
2604 "stosb",\
2605 "preskip: shr ecx, 1",\
2606 "rep stosw",\
2607 "jmp endit",\
2608 "longcopy: test edi, 1",\
2609 "jz skip1",\
2610 "stosb",\
2611 "dec ecx",\
2612 "skip1: test edi, 2",\
2613 "jz skip2",\
2614 "stosw",\
2615 "sub ecx, 2",\
2616 "skip2: mov ebx, ecx",\
2617 "shr ecx, 2",\
2618 "rep stosd",\
2619 "test bl, 2",\
2620 "jz skip3",\
2621 "stosw",\
2622 "skip3: test bl, 1",\
2623 "jz endit",\
2624 "stosb",\
2625 "endit:",\
2626 parm [edi][ecx][eax]\
2627 modify [ebx]
2628
2629 int copybuf(void*,void*,int);
2630 #pragma aux copybuf =\
2631 "rep movsd",\
2632 parm [esi][edi][ecx]\
2633 modify exact [ecx esi edi]
2634
2635 int copybufbyte(void*,void*,int);
2636 #pragma aux copybufbyte =\
2637 "cmp ecx, 4",\
2638 "jae longcopy",\
2639 "test cl, 1",\
2640 "jz preskip",\
2641 "movsb",\
2642 "preskip: shr ecx, 1",\
2643 "rep movsw",\
2644 "jmp endit",\
2645 "longcopy: test edi, 1",\
2646 "jz skip1",\
2647 "movsb",\
2648 "dec ecx",\
2649 "skip1: test edi, 2",\
2650 "jz skip2",\
2651 "movsw",\
2652 "sub ecx, 2",\
2653 "skip2: mov ebx, ecx",\
2654 "shr ecx, 2",\
2655 "rep movsd",\
2656 "test bl, 2",\
2657 "jz skip3",\
2658 "movsw",\
2659 "skip3: test bl, 1",\
2660 "jz endit",\
2661 "movsb",\
2662 "endit:",\
2663 parm [esi][edi][ecx]\
2664 modify [ebx]
2665
2666 int copybufreverse(void*,void*,int);
2667 #pragma aux copybufreverse =\
2668 "shr ecx, 1",\
2669 "jnc skipit1",\
2670 "mov al, byte ptr [esi]",\
2671 "dec esi",\
2672 "mov byte ptr [edi], al",\
2673 "inc edi",\
2674 "skipit1: shr ecx, 1",\
2675 "jnc skipit2",\
2676 "mov ax, word ptr [esi-1]",\
2677 "sub esi, 2",\
2678 "ror ax, 8",\
2679 "mov word ptr [edi], ax",\
2680 "add edi, 2",\
2681 "skipit2: test ecx, ecx",\
2682 "jz endloop",\
2683 "begloop: mov eax, dword ptr [esi-3]",\
2684 "sub esi, 4",\
2685 "bswap eax",\
2686 "mov dword ptr [edi], eax",\
2687 "add edi, 4",\
2688 "dec ecx",\
2689 "jnz begloop",\
2690 "endloop:",\
2691 parm [esi][edi][ecx]
2692
2693 int qinterpolatedown16(int,int,int,int);
2694 #pragma aux qinterpolatedown16 =\
2695 "mov ebx, ecx",\
2696 "shr ecx, 1",\
2697 "jz skipbegcalc",\
2698 "begqcalc: lea edi, [edx+esi]",\
2699 "sar edx, 16",\
2700 "mov dword ptr [eax], edx",\
2701 "lea edx, [edi+esi]",\
2702 "sar edi, 16",\
2703 "mov dword ptr [eax+4], edi",\
2704 "add eax, 8",\
2705 "dec ecx",\
2706 "jnz begqcalc",\
2707 "test ebx, 1",\
2708 "jz skipbegqcalc2",\
2709 "skipbegcalc: sar edx, 16",\
2710 "mov dword ptr [eax], edx",\
2711 "skipbegqcalc2:",\
2712 parm [eax][ecx][edx][esi]\
2713 modify exact [eax ebx ecx edx edi]
2714
2715 int qinterpolatedown16short(int,int,int,int);
2716 #pragma aux qinterpolatedown16short =\
2717 "test ecx, ecx",\
2718 "jz endit",\
2719 "test al, 2",\
2720 "jz skipalignit",\
2721 "mov ebx, edx",\
2722 "sar ebx, 16",\
2723 "mov word ptr [eax], bx",\
2724 "add edx, esi",\
2725 "add eax, 2",\
2726 "dec ecx",\
2727 "jz endit",\
2728 "skipalignit: sub ecx, 2",\
2729 "jc finishit",\
2730 "begqcalc: mov ebx, edx",\
2731 "add edx, esi",\
2732 "sar ebx, 16",\
2733 "mov edi, edx",\
2734 "and edi, 0ffff0000h",\
2735 "add edx, esi",\
2736 "add ebx, edi",\
2737 "mov dword ptr [eax], ebx",\
2738 "add eax, 4",\
2739 "sub ecx, 2",\
2740 "jnc begqcalc",\
2741 "test cl, 1",\
2742 "jz endit",\
2743 "finishit: mov ebx, edx",\
2744 "sar ebx, 16",\
2745 "mov word ptr [eax], bx",\
2746 "endit:",\
2747 parm [eax][ecx][edx][esi]\
2748 modify exact [eax ebx ecx edx edi]
2749
2750 int mul3(int);
2751 #pragma aux mul3 =\
2752 "lea eax, [eax+eax*2]",\
2753 parm nomemory [eax]
2754
2755 int mul5(int);
2756 #pragma aux mul5 =\
2757 "lea eax, [eax+eax*4]",\
2758 parm nomemory [eax]
2759
2760 int mul9(int);
2761 #pragma aux mul9 =\
2762 "lea eax, [eax+eax*8]",\
2763 parm nomemory [eax]
2764
2765 //returns eax/ebx, dmval = eax%edx;
2766 int divmod(int,int);
2767 #pragma aux divmod =\
2768 "xor edx, edx",\
2769 "div ebx",\
2770 "mov dmval, edx",\
2771 parm [eax][ebx]\
2772 modify exact [eax edx]\
2773 value [eax]
2774
2775 //returns eax%ebx, dmval = eax/edx;
2776 int moddiv(int,int);
2777 #pragma aux moddiv =\
2778 "xor edx, edx",\
2779 "div ebx",\
2780 "mov dmval, eax",\
2781 parm [eax][ebx]\
2782 modify exact [eax edx]\
2783 value [edx]
2784
2785 int klabs(int);
2786 #pragma aux klabs =\
2787 "test eax, eax",\
2788 "jns skipnegate",\
2789 "neg eax",\
2790 "skipnegate:",\
2791 parm nomemory [eax]
2792
2793 int ksgn(int);
2794 #pragma aux ksgn =\
2795 "add ebx, ebx",\
2796 "sbb eax, eax",\
2797 "cmp eax, ebx",\
2798 "adc al, 0",\
2799 parm nomemory [ebx]\
2800 modify exact [eax ebx]
2801
2802 //eax = (unsigned min)umin(eax,ebx)
2803 int umin(int,int);
2804 #pragma aux umin =\
2805 "sub eax, ebx",\
2806 "sbb ecx, ecx",\
2807 "and eax, ecx",\
2808 "add eax, ebx",\
2809 parm nomemory [eax][ebx]\
2810 modify exact [eax ecx]
2811
2812 //eax = (unsigned max)umax(eax,ebx)
2813 int umax(int,int);
2814 #pragma aux umax =\
2815 "sub eax, ebx",\
2816 "sbb ecx, ecx",\
2817 "xor ecx, 0xffffffff",\
2818 "and eax, ecx",\
2819 "add eax, ebx",\
2820 parm nomemory [eax][ebx]\
2821 modify exact [eax ecx]
2822
2823 int kmin(int,int);
2824 #pragma aux kmin =\
2825 "cmp eax, ebx",\
2826 "jl skipit",\
2827 "mov eax, ebx",\
2828 "skipit:",\
2829 parm nomemory [eax][ebx]\
2830 modify exact [eax]
2831
2832 int kmax(int,int);
2833 #pragma aux kmax =\
2834 "cmp eax, ebx",\
2835 "jg skipit",\
2836 "mov eax, ebx",\
2837 "skipit:",\
2838 parm nomemory [eax][ebx]\
2839 modify exact [eax]
2840
2841 int swapchar(void*,void*);
2842 #pragma aux swapchar =\
2843 "mov cl, [eax]",\
2844 "mov ch, [ebx]",\
2845 "mov [ebx], cl",\
2846 "mov [eax], ch",\
2847 parm [eax][ebx]\
2848 modify exact [ecx]
2849
2850 int swapshort(void*,void*);
2851 #pragma aux swapshort =\
2852 "mov cx, [eax]",\
2853 "mov dx, [ebx]",\
2854 "mov [ebx], cx",\
2855 "mov [eax], dx",\
2856 parm [eax][ebx]\
2857 modify exact [ecx edx]
2858
2859 int swaplong(void*,void*);
2860 #pragma aux swaplong =\
2861 "mov ecx, [eax]",\
2862 "mov edx, [ebx]",\
2863 "mov [ebx], ecx",\
2864 "mov [eax], edx",\
2865 parm [eax][ebx]\
2866 modify exact [ecx edx]
2867
2868 int swapbuf4(void*,void*,int);
2869 #pragma aux swapbuf4 =\
2870 "begswap:",\
2871 "mov esi, [eax]",\
2872 "mov edi, [ebx]",\
2873 "mov [ebx], esi",\
2874 "mov [eax], edi",\
2875 "add eax, 4",\
2876 "add ebx, 4",\
2877 "dec ecx",\
2878 "jnz short begswap",\
2879 parm [eax][ebx][ecx]\
2880 modify exact [eax ebx ecx esi edi]
2881
2882 int swap64bit(void*,void*);
2883 #pragma aux swap64bit =\
2884 "mov ecx, [eax]",\
2885 "mov edx, [ebx]",\
2886 "mov [ebx], ecx",\
2887 "mov ecx, [eax+4]",\
2888 "mov [eax], edx",\
2889 "mov edx, [ebx+4]",\
2890 "mov [ebx+4], ecx",\
2891 "mov [eax+4], edx",\
2892 parm [eax][ebx]\
2893 modify exact [ecx edx]
2894
2895 //swapchar2(ptr1,ptr2,xsiz); is the same as:
2896 //swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz);
2897 int swapchar2(void*,void*,int);
2898 #pragma aux swapchar2 =\
2899 "add esi, ebx",\
2900 "mov cx, [eax]",\
2901 "mov dl, [ebx]",\
2902 "mov [ebx], cl",\
2903 "mov dh, [esi]",\
2904 "mov [esi], ch",\
2905 "mov [eax], dx",\
2906 parm [eax][ebx][esi]\
2907 modify exact [ecx edx esi]
2908 //}}}
2909
2910 #elif defined(_MSC_VER) && defined(_M_IX86) && USE_ASM // __WATCOMC__
2911
2912 //
2913 // Microsoft C inline assembler
2914 //
2915
2916 //{{{
sqr(int a)2917 static __inline int sqr(int a)
2918 {
2919 _asm {
2920 mov eax, a
2921 imul eax, eax
2922 }
2923 }
2924
scale(int a,int d,int c)2925 static __inline int scale(int a, int d, int c)
2926 {
2927 _asm {
2928 mov eax, a
2929 imul d
2930 idiv c
2931 }
2932 }
2933
mulscale(int a,int d,int c)2934 static __inline int mulscale(int a, int d, int c)
2935 {
2936 _asm {
2937 mov ecx, c
2938 mov eax, a
2939 imul d
2940 shrd eax, edx, cl
2941 }
2942 }
2943
2944 #define MULSCALE(x) \
2945 static __inline int mulscale##x (int a, int d) \
2946 { \
2947 _asm mov eax, a \
2948 _asm imul d \
2949 _asm shrd eax, edx, x \
2950 }
2951
2952 MULSCALE(1) MULSCALE(2) MULSCALE(3) MULSCALE(4)
2953 MULSCALE(5) MULSCALE(6) MULSCALE(7) MULSCALE(8)
2954 MULSCALE(9) MULSCALE(10) MULSCALE(11) MULSCALE(12)
2955 MULSCALE(13) MULSCALE(14) MULSCALE(15) MULSCALE(16)
2956 MULSCALE(17) MULSCALE(18) MULSCALE(19) MULSCALE(20)
2957 MULSCALE(21) MULSCALE(22) MULSCALE(23) MULSCALE(24)
2958 MULSCALE(25) MULSCALE(26) MULSCALE(27) MULSCALE(28)
2959 MULSCALE(29) MULSCALE(30) MULSCALE(31)
2960 #undef MULSCALE
mulscale32(int a,int d)2961 static __inline int mulscale32(int a, int d)
2962 {
2963 _asm {
2964 mov eax, a
2965 imul d
2966 mov eax, edx
2967 }
2968 }
2969
dmulscale(int a,int d,int S,int D,int c)2970 static __inline int dmulscale(int a, int d, int S, int D, int c)
2971 {
2972 _asm {
2973 mov ecx, c
2974 mov eax, a
2975 imul d
2976 mov ebx, eax
2977 mov eax, S
2978 mov esi, edx
2979 imul D
2980 add eax, ebx
2981 adc edx, esi
2982 shrd eax, edx, cl
2983 }
2984 }
2985
2986 #define DMULSCALE(x) \
2987 static __inline int dmulscale##x (int a, int d, int S, int D) \
2988 { \
2989 _asm mov eax, a \
2990 _asm imul d \
2991 _asm mov ebx, eax \
2992 _asm mov eax, S \
2993 _asm mov esi, edx \
2994 _asm imul D \
2995 _asm add eax, ebx \
2996 _asm adc edx, esi \
2997 _asm shrd eax, edx, x \
2998 }
2999
3000 DMULSCALE(1) DMULSCALE(2) DMULSCALE(3) DMULSCALE(4)
3001 DMULSCALE(5) DMULSCALE(6) DMULSCALE(7) DMULSCALE(8)
3002 DMULSCALE(9) DMULSCALE(10) DMULSCALE(11) DMULSCALE(12)
3003 DMULSCALE(13) DMULSCALE(14) DMULSCALE(15) DMULSCALE(16)
3004 DMULSCALE(17) DMULSCALE(18) DMULSCALE(19) DMULSCALE(20)
3005 DMULSCALE(21) DMULSCALE(22) DMULSCALE(23) DMULSCALE(24)
3006 DMULSCALE(25) DMULSCALE(26) DMULSCALE(27) DMULSCALE(28)
3007 DMULSCALE(29) DMULSCALE(30) DMULSCALE(31)
3008 #undef DMULSCALE
dmulscale32(int a,int d,int S,int D)3009 static __inline int dmulscale32(int a, int d, int S, int D)
3010 {
3011 _asm {
3012 mov eax, a
3013 imul d
3014 mov ebx, eax
3015 mov eax, S
3016 mov esi, edx
3017 imul D
3018 add eax, ebx
3019 adc edx, esi
3020 mov eax, edx
3021 }
3022 }
3023
3024 #define TMULSCALE(x) \
3025 static __inline int tmulscale##x (int a, int d, int b, int c, int S, int D) \
3026 { \
3027 _asm mov eax, a \
3028 _asm mov ebx, b \
3029 _asm imul d \
3030 _asm xchg eax, ebx \
3031 _asm mov ecx, c \
3032 _asm xchg edx, ecx \
3033 _asm imul edx \
3034 _asm add ebx, eax \
3035 _asm adc ecx, edx \
3036 _asm mov eax, S \
3037 _asm imul D \
3038 _asm add eax, ebx \
3039 _asm adc edx, ecx \
3040 _asm shrd eax, edx, x \
3041 }
3042
3043 TMULSCALE(1) TMULSCALE(2) TMULSCALE(3) TMULSCALE(4)
3044 TMULSCALE(5) TMULSCALE(6) TMULSCALE(7) TMULSCALE(8)
3045 TMULSCALE(9) TMULSCALE(10) TMULSCALE(11) TMULSCALE(12)
3046 TMULSCALE(13) TMULSCALE(14) TMULSCALE(15) TMULSCALE(16)
3047 TMULSCALE(17) TMULSCALE(18) TMULSCALE(19) TMULSCALE(20)
3048 TMULSCALE(21) TMULSCALE(22) TMULSCALE(23) TMULSCALE(24)
3049 TMULSCALE(25) TMULSCALE(26) TMULSCALE(27) TMULSCALE(28)
3050 TMULSCALE(29) TMULSCALE(30) TMULSCALE(31)
3051 #undef TMULSCALE
tmulscale32(int a,int d,int b,int c,int S,int D)3052 static __inline int tmulscale32(int a, int d, int b, int c, int S, int D)
3053 {
3054 _asm {
3055 mov eax, a
3056 mov ebx, b
3057 imul d
3058 xchg eax, ebx
3059 mov ecx, c
3060 xchg edx, ecx
3061 imul edx
3062 add ebx, eax
3063 adc ecx, edx
3064 mov eax, S
3065 imul D
3066 add eax, ebx
3067 adc edx, ecx
3068 mov eax, edx
3069 }
3070 }
3071
boundmulscale(int a,int b,int c)3072 static __inline int boundmulscale(int a, int b, int c)
3073 {
3074 _asm {
3075 mov eax, a
3076 mov ecx, c
3077 imul b
3078 mov ebx, edx
3079 shrd eax, edx, cl
3080 sar edx, cl
3081 xor edx, eax
3082 js checkit
3083 xor edx, eax
3084 jz skipboundit
3085 cmp edx, 0xffffffff
3086 je skipboundit
3087 checkit:
3088 mov eax, ebx
3089 sar eax, 31
3090 xor eax, 0x7fffffff
3091 skipboundit:
3092 }
3093 }
3094
divscale(int a,int b,int c)3095 static __inline int divscale(int a, int b, int c)
3096 {
3097 _asm {
3098 mov eax, a
3099 mov ecx, c
3100 mov edx, eax
3101 shl eax, cl
3102 neg cl
3103 sar edx, cl
3104 idiv b
3105 }
3106 }
3107
divscale1(int a,int b)3108 static __inline int divscale1(int a, int b)
3109 {
3110 _asm {
3111 mov eax, a
3112 add eax, eax
3113 sbb edx, edx
3114 idiv b
3115 }
3116 }
3117
divscale2(int a,int b)3118 static __inline int divscale2(int a, int b)
3119 {
3120 _asm {
3121 mov eax, a
3122 mov edx, eax
3123 sar edx, 30
3124 lea eax, [eax*4]
3125 idiv b
3126 }
3127 }
3128
divscale3(int a,int b)3129 static __inline int divscale3(int a, int b)
3130 {
3131 _asm {
3132 mov eax, a
3133 mov edx, eax
3134 sar edx, 29
3135 lea eax, [eax*8]
3136 idiv b
3137 }
3138 }
3139
3140 #define DIVSCALE(x,y) \
3141 static __inline int divscale##y(int a, int b) \
3142 { \
3143 _asm mov eax, a \
3144 _asm mov edx, eax \
3145 _asm sar edx, x \
3146 _asm shl eax, y \
3147 _asm idiv b \
3148 }
3149
3150 DIVSCALE(28,4) DIVSCALE(27,5) DIVSCALE(26,6) DIVSCALE(25,7)
3151 DIVSCALE(24,8) DIVSCALE(23,9) DIVSCALE(22,10) DIVSCALE(21,11)
3152 DIVSCALE(20,12) DIVSCALE(19,13) DIVSCALE(18,14) DIVSCALE(17,15)
3153 DIVSCALE(16,16) DIVSCALE(15,17) DIVSCALE(14,18) DIVSCALE(13,19)
3154 DIVSCALE(12,20) DIVSCALE(11,21) DIVSCALE(10,22) DIVSCALE(9,23)
3155 DIVSCALE(8,24) DIVSCALE(7,25) DIVSCALE(6,26) DIVSCALE(5,27)
3156 DIVSCALE(4,28) DIVSCALE(3,29) DIVSCALE(2,30) DIVSCALE(1,31)
3157
divscale32(int d,int b)3158 static __inline int divscale32(int d, int b)
3159 {
3160 _asm {
3161 mov edx, d
3162 xor eax, eax
3163 idiv b
3164 }
3165 }
3166
readpixel(void * d)3167 static __inline char readpixel(void *d)
3168 {
3169 _asm {
3170 mov edx, d
3171 mov al, byte ptr [edx]
3172 }
3173 }
3174
drawpixel(void * d,char a)3175 static __inline void drawpixel(void *d, char a)
3176 {
3177 _asm {
3178 mov edx, d
3179 mov al, a
3180 mov byte ptr [edx], al
3181 }
3182 }
3183
drawpixels(void * d,short a)3184 static __inline void drawpixels(void *d, short a)
3185 {
3186 _asm {
3187 mov edx, d
3188 mov ax, a
3189 mov word ptr [edx], ax
3190 }
3191 }
3192
drawpixelses(void * d,int a)3193 static __inline void drawpixelses(void *d, int a)
3194 {
3195 _asm {
3196 mov edx, d
3197 mov eax, a
3198 mov dword ptr [edx], eax
3199 }
3200 }
3201
clearbuf(void * d,int c,int a)3202 static __inline void clearbuf(void *d, int c, int a)
3203 {
3204 _asm {
3205 mov edi, d
3206 mov ecx, c
3207 mov eax, a
3208 rep stosd
3209 }
3210 }
3211
clearbufbyte(void * d,int c,int a)3212 static __inline void clearbufbyte(void *d, int c, int a)
3213 {
3214 _asm {
3215 mov edi, d
3216 mov ecx, c
3217 mov eax, a
3218 cmp ecx, 4
3219 jae longcopy
3220 test cl, 1
3221 jz preskip
3222 stosb
3223 preskip:
3224 shr ecx, 1
3225 rep stosw
3226 jmp endit
3227 longcopy:
3228 test edi, 1
3229 jz skip1
3230 stosb
3231 dec ecx
3232 skip1:
3233 test edi, 2
3234 jz skip2
3235 stosw
3236 sub ecx, 2
3237 skip2:
3238 mov ebx, ecx
3239 shr ecx, 2
3240 rep stosd
3241 test bl, 2
3242 jz skip3
3243 stosw
3244 skip3:
3245 test bl, 1
3246 jz endit
3247 stosb
3248 endit:
3249 }
3250 }
3251
copybuf(void * s,void * d,int c)3252 static __inline void copybuf(void *s, void *d, int c)
3253 {
3254 _asm {
3255 mov esi, s
3256 mov edi, d
3257 mov ecx, c
3258 rep movsd
3259 }
3260 }
3261
copybufbyte(void * s,void * d,int c)3262 static __inline void copybufbyte(void *s, void *d, int c)
3263 {
3264 _asm {
3265 mov esi, s
3266 mov edi, d
3267 mov ecx, c
3268 cmp ecx, 4
3269 jae longcopy
3270 test cl, 1
3271 jz preskip
3272 movsb
3273 preskip:
3274 shr ecx, 1
3275 rep movsw
3276 jmp endit
3277 longcopy:
3278 test edi, 1
3279 jz skip1
3280 movsb
3281 dec ecx
3282 skip1:
3283 test edi, 2
3284 jz skip2
3285 movsw
3286 sub ecx, 2
3287 skip2:
3288 mov ebx, ecx
3289 shr ecx, 2
3290 rep movsd
3291 test bl, 2
3292 jz skip3
3293 movsw
3294 skip3:
3295 test bl, 1
3296 jz endit
3297 movsb
3298 endit:
3299 }
3300 }
3301
copybufreverse(void * s,void * d,int c)3302 static __inline void copybufreverse(void *s, void *d, int c)
3303 {
3304 _asm {
3305 mov esi, s
3306 mov edi, d
3307 mov ecx, c
3308 shr ecx, 1
3309 jnc skipit1
3310 mov al, byte ptr [esi]
3311 dec esi
3312 mov byte ptr [edi], al
3313 inc edi
3314 skipit1:
3315 shr ecx, 1
3316 jnc skipit2
3317 mov ax, word ptr [esi-1]
3318 sub esi, 2
3319 ror ax, 8
3320 mov word ptr [edi], ax
3321 add edi, 2
3322 skipit2:
3323 test ecx, ecx
3324 jz endloop
3325 begloop:
3326 mov eax, dword ptr [esi-3]
3327 sub esi, 4
3328 bswap eax
3329 mov dword ptr [edi], eax
3330 add edi, 4
3331 dec ecx
3332 jnz begloop
3333 endloop:
3334 }
3335 }
3336
qinterpolatedown16(void * a,int c,int d,int s)3337 static __inline void qinterpolatedown16(void *a, int c, int d, int s)
3338 {
3339 _asm {
3340 mov eax, a
3341 mov ecx, c
3342 mov edx, d
3343 mov esi, s
3344 mov ebx, ecx
3345 shr ecx, 1
3346 jz skipbegcalc
3347 begqcalc:
3348 lea edi, [edx+esi]
3349 sar edx, 16
3350 mov dword ptr [eax], edx
3351 lea edx, [edi+esi]
3352 sar edi, 16
3353 mov dword ptr [eax+4], edi
3354 add eax, 8
3355 dec ecx
3356 jnz begqcalc
3357 test ebx, 1
3358 jz skipbegqcalc2
3359 skipbegcalc:
3360 sar edx, 16
3361 mov dword ptr [eax], edx
3362 skipbegqcalc2:
3363 }
3364 }
3365
qinterpolatedown16short(void * a,int c,int d,int s)3366 static __inline void qinterpolatedown16short(void *a, int c, int d, int s)
3367 {
3368 _asm {
3369 mov eax, a
3370 mov ecx, c
3371 mov edx, d
3372 mov esi, s
3373 test ecx, ecx
3374 jz endit
3375 test al, 2
3376 jz skipalignit
3377 mov ebx, edx
3378 sar ebx, 16
3379 mov word ptr [eax], bx
3380 add edx, esi
3381 add eax, 2
3382 dec ecx
3383 jz endit
3384 skipalignit:
3385 sub ecx, 2
3386 jc finishit
3387 begqcalc:
3388 mov ebx, edx
3389 add edx, esi
3390 sar ebx, 16
3391 mov edi, edx
3392 and edi, 0ffff0000h
3393 add edx, esi
3394 add ebx, edi
3395 mov dword ptr [eax], ebx
3396 add eax, 4
3397 sub ecx, 2
3398 jnc begqcalc
3399 test cl, 1
3400 jz endit
3401 finishit:
3402 mov ebx, edx
3403 sar ebx, 16
3404 mov word ptr [eax], bx
3405 endit:
3406 }
3407 }
3408
mul3(int a)3409 static __inline int mul3(int a)
3410 {
3411 _asm {
3412 mov eax, a
3413 lea eax, [eax+eax*2]
3414 }
3415 }
3416
mul5(int a)3417 static __inline int mul5(int a)
3418 {
3419 _asm {
3420 mov eax, a
3421 lea eax, [eax+eax*4]
3422 }
3423 }
3424
mul9(int a)3425 static __inline int mul9(int a)
3426 {
3427 _asm {
3428 mov eax, a
3429 lea eax, [eax+eax*8]
3430 }
3431 }
3432
3433 //returns eax/ebx, dmval = eax%edx;
divmod(int a,int b)3434 static __inline int divmod(int a, int b)
3435 {
3436 _asm {
3437 mov eax, a
3438 xor edx, edx
3439 div b
3440 mov dmval, edx
3441 }
3442 }
3443
3444 //returns eax%ebx, dmval = eax/edx;
moddiv(int a,int b)3445 static __inline int moddiv(int a, int b)
3446 {
3447 _asm {
3448 mov eax, a
3449 xor edx, edx
3450 div b
3451 mov dmval, eax
3452 mov eax, edx
3453 }
3454 }
3455
klabs(int a)3456 static __inline int klabs(int a)
3457 {
3458 _asm {
3459 mov eax, a
3460 test eax, eax
3461 jns skipnegate
3462 neg eax
3463 skipnegate:
3464 }
3465 }
3466
ksgn(int b)3467 static __inline int ksgn(int b)
3468 {
3469 _asm {
3470 mov ebx, b
3471 add ebx, ebx
3472 sbb eax, eax
3473 cmp eax, ebx
3474 adc al, 0
3475 }
3476 }
3477
3478 //eax = (unsigned min)umin(eax,ebx)
umin(int a,int b)3479 static __inline int umin(int a, int b)
3480 {
3481 _asm {
3482 mov eax, a
3483 sub eax, b
3484 sbb ecx, ecx
3485 and eax, ecx
3486 add eax, b
3487 }
3488 }
3489
3490 //eax = (unsigned max)umax(eax,ebx)
umax(int a,int b)3491 static __inline int umax(int a, int b)
3492 {
3493 _asm {
3494 mov eax, a
3495 sub eax, b
3496 sbb ecx, ecx
3497 xor ecx, 0xffffffff
3498 and eax, ecx
3499 add eax, b
3500 }
3501 }
3502
kmin(int a,int b)3503 static __inline int kmin(int a, int b)
3504 {
3505 _asm {
3506 mov eax, a
3507 mov ebx, b
3508 cmp eax, ebx
3509 jl skipit
3510 mov eax, ebx
3511 skipit:
3512 }
3513 }
3514
kmax(int a,int b)3515 static __inline int kmax(int a, int b)
3516 {
3517 _asm {
3518 mov eax, a
3519 mov ebx, b
3520 cmp eax, ebx
3521 jg skipit
3522 mov eax, ebx
3523 skipit:
3524 }
3525 }
3526
swapchar(void * a,void * b)3527 static __inline void swapchar(void *a, void *b)
3528 {
3529 _asm {
3530 mov eax, a
3531 mov ebx, b
3532 mov cl, [eax]
3533 mov ch, [ebx]
3534 mov [ebx], cl
3535 mov [eax], ch
3536 }
3537 }
3538
swapshort(void * a,void * b)3539 static __inline void swapshort(void *a, void *b)
3540 {
3541 _asm {
3542 mov eax, a
3543 mov ebx, b
3544 mov cx, [eax]
3545 mov dx, [ebx]
3546 mov [ebx], cx
3547 mov [eax], dx
3548 }
3549 }
3550
swaplong(void * a,void * b)3551 static __inline void swaplong(void *a, void *b)
3552 {
3553 _asm {
3554 mov eax, a
3555 mov ebx, b
3556 mov ecx, [eax]
3557 mov edx, [ebx]
3558 mov [ebx], ecx
3559 mov [eax], edx
3560 }
3561 }
3562
swapbuf4(void * a,void * b,int c)3563 static __inline void swapbuf4(void *a, void *b, int c)
3564 {
3565 _asm {
3566 mov eax, a
3567 mov ebx, b
3568 mov ecx, c
3569 begswap:
3570 mov esi, [eax]
3571 mov edi, [ebx]
3572 mov [ebx], esi
3573 mov [eax], edi
3574 add eax, 4
3575 add ebx, 4
3576 dec ecx
3577 jnz short begswap
3578 }
3579 }
3580
swap64bit(void * a,void * b)3581 static __inline void swap64bit(void *a, void *b)
3582 {
3583 _asm {
3584 mov eax, a
3585 mov ebx, b
3586 mov ecx, [eax]
3587 mov edx, [ebx]
3588 mov [ebx], ecx
3589 mov ecx, [eax+4]
3590 mov [eax], edx
3591 mov edx, [ebx+4]
3592 mov [ebx+4], ecx
3593 mov [eax+4], edx
3594 }
3595 }
3596
3597 //swapchar2(ptr1,ptr2,xsiz); is the same as:
3598 //swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz);
swapchar2(void * a,void * b,int s)3599 static __inline void swapchar2(void *a, void *b, int s)
3600 {
3601 _asm {
3602 mov eax, a
3603 mov ebx, b
3604 mov esi, s
3605 add esi, ebx
3606 mov cx, [eax]
3607 mov dl, [ebx]
3608 mov [ebx], cl
3609 mov dh, [esi]
3610 mov [esi], ch
3611 mov [eax], dx
3612 }
3613 }
3614 //}}}
3615
3616 #else // _MSC_VER
3617
3618 //
3619 // Generic C
3620 //
3621
3622 #define qw(x) ((int64_t)(x)) // quadword cast
3623 #define dw(x) ((int32_t)(x)) // doubleword cast
3624 #define wo(x) ((int16_t)(x)) // word cast
3625 #define by(x) ((int8_t)(x)) // byte cast
3626
3627 #define _scaler(a) \
3628 static inline int mulscale##a(int eax, int edx) \
3629 { \
3630 return dw((qw(eax) * qw(edx)) >> a); \
3631 } \
3632 \
3633 static inline int divscale##a(int eax, int ebx) \
3634 { \
3635 return dw((qw(eax) << a) / qw(ebx)); \
3636 } \
3637 \
3638 static inline int dmulscale##a(int eax, int edx, int esi, int edi) \
3639 { \
3640 return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> a); \
3641 } \
3642 \
3643 static inline int tmulscale##a(int eax, int edx, int ebx, int ecx, int esi, int edi) \
3644 { \
3645 return dw(((qw(eax) * qw(edx)) + (qw(ebx) * qw(ecx)) + (qw(esi) * qw(edi))) >> a); \
3646 } \
3647
3648 _scaler(1) _scaler(2) _scaler(3) _scaler(4)
3649 _scaler(5) _scaler(6) _scaler(7) _scaler(8)
3650 _scaler(9) _scaler(10) _scaler(11) _scaler(12)
3651 _scaler(13) _scaler(14) _scaler(15) _scaler(16)
3652 _scaler(17) _scaler(18) _scaler(19) _scaler(20)
3653 _scaler(21) _scaler(22) _scaler(23) _scaler(24)
3654 _scaler(25) _scaler(26) _scaler(27) _scaler(28)
3655 _scaler(29) _scaler(30) _scaler(31) _scaler(32)
3656
swapchar(void * a,void * b)3657 static inline void swapchar(void* a, void* b) { int8_t t = *((int8_t*)b); *((int8_t*)b) = *((int8_t*)a); *((int8_t*)a) = t; }
swapchar2(void * a,void * b,int s)3658 static inline void swapchar2(void* a, void* b, int s) { swapchar(a,b); swapchar((int8_t*)a+1, (int8_t*)b+s); }
swapshort(void * a,void * b)3659 static inline void swapshort(void* a, void* b) { int16_t t = *((int16_t*)b); *((int16_t*)b) = *((int16_t*)a); *((int16_t*)a) = t; }
swaplong(void * a,void * b)3660 static inline void swaplong(void* a, void* b) { int32_t t = *((int32_t*)b); *((int32_t*)b) = *((int32_t*)a); *((int32_t*)a) = t; }
swap64bit(void * a,void * b)3661 static inline void swap64bit(void* a, void* b) { int64_t t = *((int64_t*)b); *((int64_t*)b) = *((int64_t*)a); *((int64_t*)a) = t; }
3662
readpixel(void * s)3663 static inline int8_t readpixel(void* s) { return (*((int8_t*)(s))); }
drawpixel(void * s,int8_t a)3664 static inline void drawpixel(void* s, int8_t a) { *((int8_t*)(s)) = a; }
drawpixels(void * s,int16_t a)3665 static inline void drawpixels(void* s, int16_t a) { *((int16_t*)(s)) = a; }
drawpixelses(void * s,int32_t a)3666 static inline void drawpixelses(void* s, int32_t a) { *((int32_t*)(s)) = a; }
3667
mul3(int a)3668 static inline int mul3(int a) { return (a<<1)+a; }
mul5(int a)3669 static inline int mul5(int a) { return (a<<2)+a; }
mul9(int a)3670 static inline int mul9(int a) { return (a<<3)+a; }
3671
divmod(int a,int b)3672 static inline int divmod(int a, int b) { unsigned int _a=(unsigned int)a, _b=(unsigned int)b; dmval = _a%_b; return _a/_b; }
moddiv(int a,int b)3673 static inline int moddiv(int a, int b) { unsigned int _a=(unsigned int)a, _b=(unsigned int)b; dmval = _a/_b; return _a%_b; }
3674
klabs(int a)3675 static inline int klabs(int a) { if (a < 0) return -a; return a; }
ksgn(int a)3676 static inline int ksgn(int a) { if (a > 0) return 1; if (a < 0) return -1; return 0; }
3677
umin(int a,int b)3678 static inline int umin(int a, int b) { if ((unsigned int)a < (unsigned int)b) return a; return b; }
umax(int a,int b)3679 static inline int umax(int a, int b) { if ((unsigned int)a < (unsigned int)b) return b; return a; }
kmin(int a,int b)3680 static inline int kmin(int a, int b) { if ((signed int)a < (signed int)b) return a; return b; }
kmax(int a,int b)3681 static inline int kmax(int a, int b) { if ((signed int)a < (signed int)b) return b; return a; }
3682
sqr(int eax)3683 static inline int sqr(int eax) { return (eax) * (eax); }
scale(int eax,int edx,int ecx)3684 static inline int scale(int eax, int edx, int ecx) { return dw((qw(eax) * qw(edx)) / qw(ecx)); }
mulscale(int eax,int edx,int ecx)3685 static inline int mulscale(int eax, int edx, int ecx) { return dw((qw(eax) * qw(edx)) >> by(ecx)); }
divscale(int eax,int ebx,int ecx)3686 static inline int divscale(int eax, int ebx, int ecx) { return dw((qw(eax) << by(ecx)) / qw(ebx)); }
dmulscale(int eax,int edx,int esi,int edi,int ecx)3687 static inline int dmulscale(int eax, int edx, int esi, int edi, int ecx) { return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> by(ecx)); }
3688
boundmulscale(int a,int d,int c)3689 static inline int boundmulscale(int a, int d, int c)
3690 { // courtesy of Ken
3691 int64_t p;
3692 p = (((int64_t)a)*((int64_t)d))>>c;
3693 if (p >= INT_MAX) p = INT_MAX;
3694 if (p < INT_MIN) p = INT_MIN;
3695 return((int)p);
3696 }
3697
3698 #undef qw
3699 #undef dw
3700 #undef wo
3701 #undef by
3702 #undef _scaler
3703
3704 void qinterpolatedown16 (void *bufptr, int num, int val, int add);
3705 void qinterpolatedown16short (void *bufptr, int num, int val, int add);
3706
3707 void clearbuf(void* d, int c, int a);
3708 void copybuf(void* s, void* d, int c);
3709 void swapbuf4(void* a, void* b, int c);
3710
3711 void clearbufbyte(void *D, int c, int a);
3712 void copybufbyte(void *S, void *D, int c);
3713 void copybufreverse(void *S, void *D, int c);
3714
3715 #endif
3716
3717 #ifdef __cplusplus
3718 }
3719 #endif
3720
3721 #endif // __pragmas_h__
3722
3723