1 /*
2  * This file is part of the Scale2x project.
3  *
4  * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19  */
20 
21 /*
22  * This file contains a C and MMX implementation of the Scale2x effect.
23  *
24  * You can find an high level description of the effect at :
25  *
26  * http://scale2x.sourceforge.net/
27  *
28  * Alternatively at the previous license terms, you are allowed to use this
29  * code in your program with these conditions:
30  * - the program is not used in commercial activities.
31  * - the whole source code of the program is released with the binary.
32  * - derivative works of the program are allowed.
33  */
34 
35 #if HAVE_CONFIG_H
36 #include <config.h>
37 #endif
38 
39 #include "scale3x.h"
40 
41 #include <assert.h>
42 
43 /***************************************************************************/
44 /* Scale3x C implementation */
45 
46 /**
47  * Define the macro USE_SCALE_RANDOMWRITE to enable
48  * an optimized version which writes memory in random order.
49  * This version is a little faster if you write in system memory.
50  * But it's a lot slower if you write in video memory.
51  * So, enable it only if you are sure to never write directly in video memory.
52  */
53 /* #define USE_SCALE_RANDOMWRITE */
54 
scale3x_8_def_whole(scale3x_uint8 * restrict dst0,scale3x_uint8 * restrict dst1,scale3x_uint8 * restrict dst2,const scale3x_uint8 * restrict src0,const scale3x_uint8 * restrict src1,const scale3x_uint8 * restrict src2,unsigned count)55 static inline void scale3x_8_def_whole(scale3x_uint8* restrict dst0, scale3x_uint8* restrict dst1, scale3x_uint8* restrict dst2, const scale3x_uint8* restrict src0, const scale3x_uint8* restrict src1, const scale3x_uint8* restrict src2, unsigned count)
56 {
57 	assert(count >= 2);
58 
59 	/* first pixel */
60 	if (src0[0] != src2[0] && src1[0] != src1[1]) {
61 		dst0[0] = src1[0];
62 		dst0[1] = (src1[0] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[0]) ? src0[0] : src1[0];
63 		dst0[2] = src1[1] == src0[0] ? src1[1] : src1[0];
64 		dst1[0] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
65 		dst1[1] = src1[0];
66 		dst1[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
67 		dst2[0] = src1[0];
68 		dst2[1] = (src1[0] == src2[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src2[0]) ? src2[0] : src1[0];
69 		dst2[2] = src1[1] == src2[0] ? src1[1] : src1[0];
70 	} else {
71 		dst0[0] = src1[0];
72 		dst0[1] = src1[0];
73 		dst0[2] = src1[0];
74 		dst1[0] = src1[0];
75 		dst1[1] = src1[0];
76 		dst1[2] = src1[0];
77 		dst2[0] = src1[0];
78 		dst2[1] = src1[0];
79 		dst2[2] = src1[0];
80 	}
81 	++src0;
82 	++src1;
83 	++src2;
84 	dst0 += 3;
85 	dst1 += 3;
86 	dst2 += 3;
87 
88 	/* central pixels */
89 	count -= 2;
90 	while (count) {
91 		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
92 			dst0[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
93 			dst0[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
94 			dst0[2] = src1[1] == src0[0] ? src1[1] : src1[0];
95 			dst1[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
96 			dst1[1] = src1[0];
97 			dst1[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
98 			dst2[0] = src1[-1] == src2[0] ? src1[-1] : src1[0];
99 			dst2[1] = (src1[-1] == src2[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src2[-1]) ? src2[0] : src1[0];
100 			dst2[2] = src1[1] == src2[0] ? src1[1] : src1[0];
101 		} else {
102 			dst0[0] = src1[0];
103 			dst0[1] = src1[0];
104 			dst0[2] = src1[0];
105 			dst1[0] = src1[0];
106 			dst1[1] = src1[0];
107 			dst1[2] = src1[0];
108 			dst2[0] = src1[0];
109 			dst2[1] = src1[0];
110 			dst2[2] = src1[0];
111 		}
112 
113 		++src0;
114 		++src1;
115 		++src2;
116 		dst0 += 3;
117 		dst1 += 3;
118 		dst2 += 3;
119 		--count;
120 	}
121 
122 	/* last pixel */
123 	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
124 		dst0[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
125 		dst0[1] = (src1[-1] == src0[0] && src1[0] != src0[0]) || (src1[0] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
126 		dst0[2] = src1[0];
127 		dst1[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
128 		dst1[1] = src1[0];
129 		dst1[2] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
130 		dst2[0] = src1[-1] == src2[0] ? src1[-1] : src1[0];
131 		dst2[1] = (src1[-1] == src2[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src2[-1]) ? src2[0] : src1[0];
132 		dst2[2] = src1[0];
133 	} else {
134 		dst0[0] = src1[0];
135 		dst0[1] = src1[0];
136 		dst0[2] = src1[0];
137 		dst1[0] = src1[0];
138 		dst1[1] = src1[0];
139 		dst1[2] = src1[0];
140 		dst2[0] = src1[0];
141 		dst2[1] = src1[0];
142 		dst2[2] = src1[0];
143 	}
144 }
145 
scale3x_8_def_border(scale3x_uint8 * restrict dst,const scale3x_uint8 * restrict src0,const scale3x_uint8 * restrict src1,const scale3x_uint8 * restrict src2,unsigned count)146 static inline void scale3x_8_def_border(scale3x_uint8* restrict dst, const scale3x_uint8* restrict src0, const scale3x_uint8* restrict src1, const scale3x_uint8* restrict src2, unsigned count)
147 {
148 	assert(count >= 2);
149 
150 	/* first pixel */
151 	if (src0[0] != src2[0] && src1[0] != src1[1]) {
152 		dst[0] = src1[0];
153 		dst[1] = (src1[0] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[0]) ? src0[0] : src1[0];
154 		dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
155 	} else {
156 		dst[0] = src1[0];
157 		dst[1] = src1[0];
158 		dst[2] = src1[0];
159 	}
160 	++src0;
161 	++src1;
162 	++src2;
163 	dst += 3;
164 
165 	/* central pixels */
166 	count -= 2;
167 	while (count) {
168 		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
169 			dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
170 			dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
171 			dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
172 		} else {
173 			dst[0] = src1[0];
174 			dst[1] = src1[0];
175 			dst[2] = src1[0];
176 		}
177 
178 		++src0;
179 		++src1;
180 		++src2;
181 		dst += 3;
182 		--count;
183 	}
184 
185 	/* last pixel */
186 	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
187 		dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
188 		dst[1] = (src1[-1] == src0[0] && src1[0] != src0[0]) || (src1[0] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
189 		dst[2] = src1[0];
190 	} else {
191 		dst[0] = src1[0];
192 		dst[1] = src1[0];
193 		dst[2] = src1[0];
194 	}
195 }
196 
scale3x_8_def_center(scale3x_uint8 * restrict dst,const scale3x_uint8 * restrict src0,const scale3x_uint8 * restrict src1,const scale3x_uint8 * restrict src2,unsigned count)197 static inline void scale3x_8_def_center(scale3x_uint8* restrict dst, const scale3x_uint8* restrict src0, const scale3x_uint8* restrict src1, const scale3x_uint8* restrict src2, unsigned count)
198 {
199 	assert(count >= 2);
200 
201 	/* first pixel */
202 	if (src0[0] != src2[0] && src1[0] != src1[1]) {
203 		dst[0] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
204 		dst[1] = src1[0];
205 		dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
206 	} else {
207 		dst[0] = src1[0];
208 		dst[1] = src1[0];
209 		dst[2] = src1[0];
210 	}
211 	++src0;
212 	++src1;
213 	++src2;
214 	dst += 3;
215 
216 	/* central pixels */
217 	count -= 2;
218 	while (count) {
219 		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
220 			dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
221 			dst[1] = src1[0];
222 			dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
223 		} else {
224 			dst[0] = src1[0];
225 			dst[1] = src1[0];
226 			dst[2] = src1[0];
227 		}
228 
229 		++src0;
230 		++src1;
231 		++src2;
232 		dst += 3;
233 		--count;
234 	}
235 
236 	/* last pixel */
237 	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
238 		dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
239 		dst[1] = src1[0];
240 		dst[2] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
241 	} else {
242 		dst[0] = src1[0];
243 		dst[1] = src1[0];
244 		dst[2] = src1[0];
245 	}
246 }
247 
scale3x_16_def_whole(scale3x_uint16 * restrict dst0,scale3x_uint16 * restrict dst1,scale3x_uint16 * restrict dst2,const scale3x_uint16 * restrict src0,const scale3x_uint16 * restrict src1,const scale3x_uint16 * restrict src2,unsigned count)248 static inline void scale3x_16_def_whole(scale3x_uint16* restrict dst0, scale3x_uint16* restrict dst1, scale3x_uint16* restrict dst2, const scale3x_uint16* restrict src0, const scale3x_uint16* restrict src1, const scale3x_uint16* restrict src2, unsigned count)
249 {
250 	assert(count >= 2);
251 
252 	/* first pixel */
253 	if (src0[0] != src2[0] && src1[0] != src1[1]) {
254 		dst0[0] = src1[0];
255 		dst0[1] = (src1[0] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[0]) ? src0[0] : src1[0];
256 		dst0[2] = src1[1] == src0[0] ? src1[1] : src1[0];
257 		dst1[0] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
258 		dst1[1] = src1[0];
259 		dst1[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
260 		dst2[0] = src1[0];
261 		dst2[1] = (src1[0] == src2[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src2[0]) ? src2[0] : src1[0];
262 		dst2[2] = src1[1] == src2[0] ? src1[1] : src1[0];
263 	} else {
264 		dst0[0] = src1[0];
265 		dst0[1] = src1[0];
266 		dst0[2] = src1[0];
267 		dst1[0] = src1[0];
268 		dst1[1] = src1[0];
269 		dst1[2] = src1[0];
270 		dst2[0] = src1[0];
271 		dst2[1] = src1[0];
272 		dst2[2] = src1[0];
273 	}
274 	++src0;
275 	++src1;
276 	++src2;
277 	dst0 += 3;
278 	dst1 += 3;
279 	dst2 += 3;
280 
281 	/* central pixels */
282 	count -= 2;
283 	while (count) {
284 		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
285 			dst0[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
286 			dst0[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
287 			dst0[2] = src1[1] == src0[0] ? src1[1] : src1[0];
288 			dst1[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
289 			dst1[1] = src1[0];
290 			dst1[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
291 			dst2[0] = src1[-1] == src2[0] ? src1[-1] : src1[0];
292 			dst2[1] = (src1[-1] == src2[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src2[-1]) ? src2[0] : src1[0];
293 			dst2[2] = src1[1] == src2[0] ? src1[1] : src1[0];
294 		} else {
295 			dst0[0] = src1[0];
296 			dst0[1] = src1[0];
297 			dst0[2] = src1[0];
298 			dst1[0] = src1[0];
299 			dst1[1] = src1[0];
300 			dst1[2] = src1[0];
301 			dst2[0] = src1[0];
302 			dst2[1] = src1[0];
303 			dst2[2] = src1[0];
304 		}
305 
306 		++src0;
307 		++src1;
308 		++src2;
309 		dst0 += 3;
310 		dst1 += 3;
311 		dst2 += 3;
312 		--count;
313 	}
314 
315 	/* last pixel */
316 	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
317 		dst0[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
318 		dst0[1] = (src1[-1] == src0[0] && src1[0] != src0[0]) || (src1[0] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
319 		dst0[2] = src1[0];
320 		dst1[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
321 		dst1[1] = src1[0];
322 		dst1[2] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
323 		dst2[0] = src1[-1] == src2[0] ? src1[-1] : src1[0];
324 		dst2[1] = (src1[-1] == src2[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src2[-1]) ? src2[0] : src1[0];
325 		dst2[2] = src1[0];
326 	} else {
327 		dst0[0] = src1[0];
328 		dst0[1] = src1[0];
329 		dst0[2] = src1[0];
330 		dst1[0] = src1[0];
331 		dst1[1] = src1[0];
332 		dst1[2] = src1[0];
333 		dst2[0] = src1[0];
334 		dst2[1] = src1[0];
335 		dst2[2] = src1[0];
336 	}
337 }
338 
scale3x_16_def_border(scale3x_uint16 * restrict dst,const scale3x_uint16 * restrict src0,const scale3x_uint16 * restrict src1,const scale3x_uint16 * restrict src2,unsigned count)339 static inline void scale3x_16_def_border(scale3x_uint16* restrict dst, const scale3x_uint16* restrict src0, const scale3x_uint16* restrict src1, const scale3x_uint16* restrict src2, unsigned count)
340 {
341 	assert(count >= 2);
342 
343 	/* first pixel */
344 	if (src0[0] != src2[0] && src1[0] != src1[1]) {
345 		dst[0] = src1[0];
346 		dst[1] = (src1[0] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[0]) ? src0[0] : src1[0];
347 		dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
348 	} else {
349 		dst[0] = src1[0];
350 		dst[1] = src1[0];
351 		dst[2] = src1[0];
352 	}
353 	++src0;
354 	++src1;
355 	++src2;
356 	dst += 3;
357 
358 	/* central pixels */
359 	count -= 2;
360 	while (count) {
361 		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
362 			dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
363 			dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
364 			dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
365 		} else {
366 			dst[0] = src1[0];
367 			dst[1] = src1[0];
368 			dst[2] = src1[0];
369 		}
370 
371 		++src0;
372 		++src1;
373 		++src2;
374 		dst += 3;
375 		--count;
376 	}
377 
378 	/* last pixel */
379 	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
380 		dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
381 		dst[1] = (src1[-1] == src0[0] && src1[0] != src0[0]) || (src1[0] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
382 		dst[2] = src1[0];
383 	} else {
384 		dst[0] = src1[0];
385 		dst[1] = src1[0];
386 		dst[2] = src1[0];
387 	}
388 }
389 
scale3x_16_def_center(scale3x_uint16 * restrict dst,const scale3x_uint16 * restrict src0,const scale3x_uint16 * restrict src1,const scale3x_uint16 * restrict src2,unsigned count)390 static inline void scale3x_16_def_center(scale3x_uint16* restrict dst, const scale3x_uint16* restrict src0, const scale3x_uint16* restrict src1, const scale3x_uint16* restrict src2, unsigned count)
391 {
392 	assert(count >= 2);
393 
394 	/* first pixel */
395 	if (src0[0] != src2[0] && src1[0] != src1[1]) {
396 		dst[0] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
397 		dst[1] = src1[0];
398 		dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
399 	} else {
400 		dst[0] = src1[0];
401 		dst[1] = src1[0];
402 		dst[2] = src1[0];
403 	}
404 	++src0;
405 	++src1;
406 	++src2;
407 	dst += 3;
408 
409 	/* central pixels */
410 	count -= 2;
411 	while (count) {
412 		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
413 			dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
414 			dst[1] = src1[0];
415 			dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
416 		} else {
417 			dst[0] = src1[0];
418 			dst[1] = src1[0];
419 			dst[2] = src1[0];
420 		}
421 
422 		++src0;
423 		++src1;
424 		++src2;
425 		dst += 3;
426 		--count;
427 	}
428 
429 	/* last pixel */
430 	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
431 		dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
432 		dst[1] = src1[0];
433 		dst[2] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
434 	} else {
435 		dst[0] = src1[0];
436 		dst[1] = src1[0];
437 		dst[2] = src1[0];
438 	}
439 }
440 
scale3x_32_def_whole(scale3x_uint32 * restrict dst0,scale3x_uint32 * restrict dst1,scale3x_uint32 * restrict dst2,const scale3x_uint32 * restrict src0,const scale3x_uint32 * restrict src1,const scale3x_uint32 * restrict src2,unsigned count)441 static inline void scale3x_32_def_whole(scale3x_uint32* restrict dst0, scale3x_uint32* restrict dst1, scale3x_uint32* restrict dst2, const scale3x_uint32* restrict src0, const scale3x_uint32* restrict src1, const scale3x_uint32* restrict src2, unsigned count)
442 {
443 	assert(count >= 2);
444 
445 	/* first pixel */
446 	if (src0[0] != src2[0] && src1[0] != src1[1]) {
447 		dst0[0] = src1[0];
448 		dst0[1] = (src1[0] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[0]) ? src0[0] : src1[0];
449 		dst0[2] = src1[1] == src0[0] ? src1[1] : src1[0];
450 		dst1[0] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
451 		dst1[1] = src1[0];
452 		dst1[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
453 		dst2[0] = src1[0];
454 		dst2[1] = (src1[0] == src2[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src2[0]) ? src2[0] : src1[0];
455 		dst2[2] = src1[1] == src2[0] ? src1[1] : src1[0];
456 	} else {
457 		dst0[0] = src1[0];
458 		dst0[1] = src1[0];
459 		dst0[2] = src1[0];
460 		dst1[0] = src1[0];
461 		dst1[1] = src1[0];
462 		dst1[2] = src1[0];
463 		dst2[0] = src1[0];
464 		dst2[1] = src1[0];
465 		dst2[2] = src1[0];
466 	}
467 	++src0;
468 	++src1;
469 	++src2;
470 	dst0 += 3;
471 	dst1 += 3;
472 	dst2 += 3;
473 
474 	/* central pixels */
475 	count -= 2;
476 	while (count) {
477 		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
478 			dst0[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
479 			dst0[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
480 			dst0[2] = src1[1] == src0[0] ? src1[1] : src1[0];
481 			dst1[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
482 			dst1[1] = src1[0];
483 			dst1[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
484 			dst2[0] = src1[-1] == src2[0] ? src1[-1] : src1[0];
485 			dst2[1] = (src1[-1] == src2[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src2[-1]) ? src2[0] : src1[0];
486 			dst2[2] = src1[1] == src2[0] ? src1[1] : src1[0];
487 		} else {
488 			dst0[0] = src1[0];
489 			dst0[1] = src1[0];
490 			dst0[2] = src1[0];
491 			dst1[0] = src1[0];
492 			dst1[1] = src1[0];
493 			dst1[2] = src1[0];
494 			dst2[0] = src1[0];
495 			dst2[1] = src1[0];
496 			dst2[2] = src1[0];
497 		}
498 
499 		++src0;
500 		++src1;
501 		++src2;
502 		dst0 += 3;
503 		dst1 += 3;
504 		dst2 += 3;
505 		--count;
506 	}
507 
508 	/* last pixel */
509 	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
510 		dst0[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
511 		dst0[1] = (src1[-1] == src0[0] && src1[0] != src0[0]) || (src1[0] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
512 		dst0[2] = src1[0];
513 		dst1[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
514 		dst1[1] = src1[0];
515 		dst1[2] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
516 		dst2[0] = src1[-1] == src2[0] ? src1[-1] : src1[0];
517 		dst2[1] = (src1[-1] == src2[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src2[-1]) ? src2[0] : src1[0];
518 		dst2[2] = src1[0];
519 	} else {
520 		dst0[0] = src1[0];
521 		dst0[1] = src1[0];
522 		dst0[2] = src1[0];
523 		dst1[0] = src1[0];
524 		dst1[1] = src1[0];
525 		dst1[2] = src1[0];
526 		dst2[0] = src1[0];
527 		dst2[1] = src1[0];
528 		dst2[2] = src1[0];
529 	}
530 }
531 
scale3x_32_def_border(scale3x_uint32 * restrict dst,const scale3x_uint32 * restrict src0,const scale3x_uint32 * restrict src1,const scale3x_uint32 * restrict src2,unsigned count)532 static inline void scale3x_32_def_border(scale3x_uint32* restrict dst, const scale3x_uint32* restrict src0, const scale3x_uint32* restrict src1, const scale3x_uint32* restrict src2, unsigned count)
533 {
534 	assert(count >= 2);
535 
536 	/* first pixel */
537 	if (src0[0] != src2[0] && src1[0] != src1[1]) {
538 		dst[0] = src1[0];
539 		dst[1] = (src1[0] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[0]) ? src0[0] : src1[0];
540 		dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
541 	} else {
542 		dst[0] = src1[0];
543 		dst[1] = src1[0];
544 		dst[2] = src1[0];
545 	}
546 	++src0;
547 	++src1;
548 	++src2;
549 	dst += 3;
550 
551 	/* central pixels */
552 	count -= 2;
553 	while (count) {
554 		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
555 			dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
556 			dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
557 			dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
558 		} else {
559 			dst[0] = src1[0];
560 			dst[1] = src1[0];
561 			dst[2] = src1[0];
562 		}
563 
564 		++src0;
565 		++src1;
566 		++src2;
567 		dst += 3;
568 		--count;
569 	}
570 
571 	/* last pixel */
572 	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
573 		dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
574 		dst[1] = (src1[-1] == src0[0] && src1[0] != src0[0]) || (src1[0] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
575 		dst[2] = src1[0];
576 	} else {
577 		dst[0] = src1[0];
578 		dst[1] = src1[0];
579 		dst[2] = src1[0];
580 	}
581 }
582 
scale3x_32_def_center(scale3x_uint32 * restrict dst,const scale3x_uint32 * restrict src0,const scale3x_uint32 * restrict src1,const scale3x_uint32 * restrict src2,unsigned count)583 static inline void scale3x_32_def_center(scale3x_uint32* restrict dst, const scale3x_uint32* restrict src0, const scale3x_uint32* restrict src1, const scale3x_uint32* restrict src2, unsigned count)
584 {
585 	assert(count >= 2);
586 
587 	/* first pixel */
588 	if (src0[0] != src2[0] && src1[0] != src1[1]) {
589 		dst[0] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
590 		dst[1] = src1[0];
591 		dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
592 	} else {
593 		dst[0] = src1[0];
594 		dst[1] = src1[0];
595 		dst[2] = src1[0];
596 	}
597 	++src0;
598 	++src1;
599 	++src2;
600 	dst += 3;
601 
602 	/* central pixels */
603 	count -= 2;
604 	while (count) {
605 		if (src0[0] != src2[0] && src1[-1] != src1[1]) {
606 			dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
607 			dst[1] = src1[0];
608 			dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
609 		} else {
610 			dst[0] = src1[0];
611 			dst[1] = src1[0];
612 			dst[2] = src1[0];
613 		}
614 
615 		++src0;
616 		++src1;
617 		++src2;
618 		dst += 3;
619 		--count;
620 	}
621 
622 	/* last pixel */
623 	if (src0[0] != src2[0] && src1[-1] != src1[0]) {
624 		dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
625 		dst[1] = src1[0];
626 		dst[2] = (src1[0] == src0[0] && src1[0] != src2[0]) || (src1[0] == src2[0] && src1[0] != src0[0]) ? src1[0] : src1[0];
627 	} else {
628 		dst[0] = src1[0];
629 		dst[1] = src1[0];
630 		dst[2] = src1[0];
631 	}
632 }
633 
634 /**
635  * Scale by a factor of 3 a row of pixels of 8 bits.
636  * The function is implemented in C.
637  * The pixels over the left and right borders are assumed of the same color of
638  * the pixels on the border.
639  * \param src0 Pointer at the first pixel of the previous row.
640  * \param src1 Pointer at the first pixel of the current row.
641  * \param src2 Pointer at the first pixel of the next row.
642  * \param count Length in pixels of the src0, src1 and src2 rows.
643  * It must be at least 2.
644  * \param dst0 First destination row, triple length in pixels.
645  * \param dst1 Second destination row, triple length in pixels.
646  * \param dst2 Third destination row, triple length in pixels.
647  */
scale3x_8_def(scale3x_uint8 * dst0,scale3x_uint8 * dst1,scale3x_uint8 * dst2,const scale3x_uint8 * src0,const scale3x_uint8 * src1,const scale3x_uint8 * src2,unsigned count)648 void scale3x_8_def(scale3x_uint8* dst0, scale3x_uint8* dst1, scale3x_uint8* dst2, const scale3x_uint8* src0, const scale3x_uint8* src1, const scale3x_uint8* src2, unsigned count)
649 {
650 #ifdef USE_SCALE_RANDOMWRITE
651 	scale3x_8_def_whole(dst0, dst1, dst2, src0, src1, src2, count);
652 #else
653 	scale3x_8_def_border(dst0, src0, src1, src2, count);
654 	scale3x_8_def_center(dst1, src0, src1, src2, count);
655 	scale3x_8_def_border(dst2, src2, src1, src0, count);
656 #endif
657 }
658 
659 /**
660  * Scale by a factor of 3 a row of pixels of 16 bits.
661  * This function operates like scale3x_8_def() but for 16 bits pixels.
662  * \param src0 Pointer at the first pixel of the previous row.
663  * \param src1 Pointer at the first pixel of the current row.
664  * \param src2 Pointer at the first pixel of the next row.
665  * \param count Length in pixels of the src0, src1 and src2 rows.
666  * It must be at least 2.
667  * \param dst0 First destination row, triple length in pixels.
668  * \param dst1 Second destination row, triple length in pixels.
669  * \param dst2 Third destination row, triple length in pixels.
670  */
scale3x_16_def(scale3x_uint16 * dst0,scale3x_uint16 * dst1,scale3x_uint16 * dst2,const scale3x_uint16 * src0,const scale3x_uint16 * src1,const scale3x_uint16 * src2,unsigned count)671 void scale3x_16_def(scale3x_uint16* dst0, scale3x_uint16* dst1, scale3x_uint16* dst2, const scale3x_uint16* src0, const scale3x_uint16* src1, const scale3x_uint16* src2, unsigned count)
672 {
673 #ifdef USE_SCALE_RANDOMWRITE
674 	scale3x_16_def_whole(dst0, dst1, dst2, src0, src1, src2, count);
675 #else
676 	scale3x_16_def_border(dst0, src0, src1, src2, count);
677 	scale3x_16_def_center(dst1, src0, src1, src2, count);
678 	scale3x_16_def_border(dst2, src2, src1, src0, count);
679 #endif
680 }
681 
682 /**
683  * Scale by a factor of 3 a row of pixels of 32 bits.
684  * This function operates like scale3x_8_def() but for 32 bits pixels.
685  * \param src0 Pointer at the first pixel of the previous row.
686  * \param src1 Pointer at the first pixel of the current row.
687  * \param src2 Pointer at the first pixel of the next row.
688  * \param count Length in pixels of the src0, src1 and src2 rows.
689  * It must be at least 2.
690  * \param dst0 First destination row, triple length in pixels.
691  * \param dst1 Second destination row, triple length in pixels.
692  * \param dst2 Third destination row, triple length in pixels.
693  */
scale3x_32_def(scale3x_uint32 * dst0,scale3x_uint32 * dst1,scale3x_uint32 * dst2,const scale3x_uint32 * src0,const scale3x_uint32 * src1,const scale3x_uint32 * src2,unsigned count)694 void scale3x_32_def(scale3x_uint32* dst0, scale3x_uint32* dst1, scale3x_uint32* dst2, const scale3x_uint32* src0, const scale3x_uint32* src1, const scale3x_uint32* src2, unsigned count)
695 {
696 #ifdef USE_SCALE_RANDOMWRITE
697 	scale3x_32_def_whole(dst0, dst1, dst2, src0, src1, src2, count);
698 #else
699 	scale3x_32_def_border(dst0, src0, src1, src2, count);
700 	scale3x_32_def_center(dst1, src0, src1, src2, count);
701 	scale3x_32_def_border(dst2, src2, src1, src0, count);
702 #endif
703 }
704 
705