1 /*
2  * jsimd_i386.c
3  *
4  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5  * Copyright (C) 2009-2011, 2013-2014, 2016, D. R. Commander.
6  * Copyright (C) 2015, Matthieu Darbois.
7  *
8  * Based on the x86 SIMD extension for IJG JPEG library,
9  * Copyright (C) 1999-2006, MIYASAKA Masaru.
10  * For conditions of distribution and use, see copyright notice in jsimdext.inc
11  *
12  * This file contains the interface between the "normal" portions
13  * of the library and the SIMD implementations when running on a
14  * 32-bit x86 architecture.
15  */
16 
17 #define JPEG_INTERNALS
18 #include "../jinclude.h"
19 #include "../jpeglib.h"
20 #include "../jsimd.h"
21 #include "../jdct.h"
22 #include "../jsimddct.h"
23 #include "jsimd.h"
24 
25 /*
26  * In the PIC cases, we have no guarantee that constants will keep
27  * their alignment. This macro allows us to verify it at runtime.
28  */
29 #define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
30 
31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32 
33 static unsigned int simd_support = ~0;
34 static unsigned int simd_huffman = 1;
35 
36 /*
37  * Check what SIMD accelerations are supported.
38  *
39  * FIXME: This code is racy under a multi-threaded environment.
40  */
41 LOCAL(void)
init_simd(void)42 init_simd (void)
43 {
44   char *env = NULL;
45 
46   if (simd_support != ~0U)
47     return;
48 
49   simd_support = jpeg_simd_cpu_support();
50 
51   /* Force different settings through environment variables */
52   env = getenv("JSIMD_FORCEMMX");
53   if ((env != NULL) && (strcmp(env, "1") == 0))
54     simd_support &= JSIMD_MMX;
55   env = getenv("JSIMD_FORCE3DNOW");
56   if ((env != NULL) && (strcmp(env, "1") == 0))
57     simd_support &= JSIMD_3DNOW|JSIMD_MMX;
58   env = getenv("JSIMD_FORCESSE");
59   if ((env != NULL) && (strcmp(env, "1") == 0))
60     simd_support &= JSIMD_SSE|JSIMD_MMX;
61   env = getenv("JSIMD_FORCESSE2");
62   if ((env != NULL) && (strcmp(env, "1") == 0))
63     simd_support &= JSIMD_SSE2;
64   env = getenv("JSIMD_FORCENONE");
65   if ((env != NULL) && (strcmp(env, "1") == 0))
66     simd_support = 0;
67   env = getenv("JSIMD_NOHUFFENC");
68   if ((env != NULL) && (strcmp(env, "1") == 0))
69     simd_huffman = 0;
70 }
71 
72 GLOBAL(int)
jsimd_can_rgb_ycc(void)73 jsimd_can_rgb_ycc (void)
74 {
75   init_simd();
76 
77   /* The code is optimised for these values only */
78   if (BITS_IN_JSAMPLE != 8)
79     return 0;
80   if (sizeof(JDIMENSION) != 4)
81     return 0;
82   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
83     return 0;
84 
85   if ((simd_support & JSIMD_SSE2) &&
86       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
87     return 1;
88   if (simd_support & JSIMD_MMX)
89     return 1;
90 
91   return 0;
92 }
93 
94 GLOBAL(int)
jsimd_can_rgb_gray(void)95 jsimd_can_rgb_gray (void)
96 {
97   init_simd();
98 
99   /* The code is optimised for these values only */
100   if (BITS_IN_JSAMPLE != 8)
101     return 0;
102   if (sizeof(JDIMENSION) != 4)
103     return 0;
104   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
105     return 0;
106 
107   if ((simd_support & JSIMD_SSE2) &&
108       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
109     return 1;
110   if (simd_support & JSIMD_MMX)
111     return 1;
112 
113   return 0;
114 }
115 
116 GLOBAL(int)
jsimd_can_ycc_rgb(void)117 jsimd_can_ycc_rgb (void)
118 {
119   init_simd();
120 
121   /* The code is optimised for these values only */
122   if (BITS_IN_JSAMPLE != 8)
123     return 0;
124   if (sizeof(JDIMENSION) != 4)
125     return 0;
126   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
127     return 0;
128 
129   if ((simd_support & JSIMD_SSE2) &&
130       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
131     return 1;
132   if (simd_support & JSIMD_MMX)
133     return 1;
134 
135   return 0;
136 }
137 
138 GLOBAL(int)
jsimd_can_ycc_rgb565(void)139 jsimd_can_ycc_rgb565 (void)
140 {
141   return 0;
142 }
143 
144 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)145 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
146                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
147                        JDIMENSION output_row, int num_rows)
148 {
149   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
150   void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
151 
152   switch(cinfo->in_color_space) {
153     case JCS_EXT_RGB:
154       sse2fct=jsimd_extrgb_ycc_convert_sse2;
155       mmxfct=jsimd_extrgb_ycc_convert_mmx;
156       break;
157     case JCS_EXT_RGBX:
158     case JCS_EXT_RGBA:
159       sse2fct=jsimd_extrgbx_ycc_convert_sse2;
160       mmxfct=jsimd_extrgbx_ycc_convert_mmx;
161       break;
162     case JCS_EXT_BGR:
163       sse2fct=jsimd_extbgr_ycc_convert_sse2;
164       mmxfct=jsimd_extbgr_ycc_convert_mmx;
165       break;
166     case JCS_EXT_BGRX:
167     case JCS_EXT_BGRA:
168       sse2fct=jsimd_extbgrx_ycc_convert_sse2;
169       mmxfct=jsimd_extbgrx_ycc_convert_mmx;
170       break;
171     case JCS_EXT_XBGR:
172     case JCS_EXT_ABGR:
173       sse2fct=jsimd_extxbgr_ycc_convert_sse2;
174       mmxfct=jsimd_extxbgr_ycc_convert_mmx;
175       break;
176     case JCS_EXT_XRGB:
177     case JCS_EXT_ARGB:
178       sse2fct=jsimd_extxrgb_ycc_convert_sse2;
179       mmxfct=jsimd_extxrgb_ycc_convert_mmx;
180       break;
181     default:
182       sse2fct=jsimd_rgb_ycc_convert_sse2;
183       mmxfct=jsimd_rgb_ycc_convert_mmx;
184       break;
185   }
186 
187   if ((simd_support & JSIMD_SSE2) &&
188       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
189     sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
190   else if (simd_support & JSIMD_MMX)
191     mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
192 }
193 
194 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)195 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
196                         JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
197                         JDIMENSION output_row, int num_rows)
198 {
199   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
200   void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
201 
202   switch(cinfo->in_color_space) {
203     case JCS_EXT_RGB:
204       sse2fct=jsimd_extrgb_gray_convert_sse2;
205       mmxfct=jsimd_extrgb_gray_convert_mmx;
206       break;
207     case JCS_EXT_RGBX:
208     case JCS_EXT_RGBA:
209       sse2fct=jsimd_extrgbx_gray_convert_sse2;
210       mmxfct=jsimd_extrgbx_gray_convert_mmx;
211       break;
212     case JCS_EXT_BGR:
213       sse2fct=jsimd_extbgr_gray_convert_sse2;
214       mmxfct=jsimd_extbgr_gray_convert_mmx;
215       break;
216     case JCS_EXT_BGRX:
217     case JCS_EXT_BGRA:
218       sse2fct=jsimd_extbgrx_gray_convert_sse2;
219       mmxfct=jsimd_extbgrx_gray_convert_mmx;
220       break;
221     case JCS_EXT_XBGR:
222     case JCS_EXT_ABGR:
223       sse2fct=jsimd_extxbgr_gray_convert_sse2;
224       mmxfct=jsimd_extxbgr_gray_convert_mmx;
225       break;
226     case JCS_EXT_XRGB:
227     case JCS_EXT_ARGB:
228       sse2fct=jsimd_extxrgb_gray_convert_sse2;
229       mmxfct=jsimd_extxrgb_gray_convert_mmx;
230       break;
231     default:
232       sse2fct=jsimd_rgb_gray_convert_sse2;
233       mmxfct=jsimd_rgb_gray_convert_mmx;
234       break;
235   }
236 
237   if ((simd_support & JSIMD_SSE2) &&
238       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
239     sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
240   else if (simd_support & JSIMD_MMX)
241     mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
242 }
243 
244 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)245 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
246                        JSAMPIMAGE input_buf, JDIMENSION input_row,
247                        JSAMPARRAY output_buf, int num_rows)
248 {
249   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
250   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
251 
252   switch(cinfo->out_color_space) {
253     case JCS_EXT_RGB:
254       sse2fct=jsimd_ycc_extrgb_convert_sse2;
255       mmxfct=jsimd_ycc_extrgb_convert_mmx;
256       break;
257     case JCS_EXT_RGBX:
258     case JCS_EXT_RGBA:
259       sse2fct=jsimd_ycc_extrgbx_convert_sse2;
260       mmxfct=jsimd_ycc_extrgbx_convert_mmx;
261       break;
262     case JCS_EXT_BGR:
263       sse2fct=jsimd_ycc_extbgr_convert_sse2;
264       mmxfct=jsimd_ycc_extbgr_convert_mmx;
265       break;
266     case JCS_EXT_BGRX:
267     case JCS_EXT_BGRA:
268       sse2fct=jsimd_ycc_extbgrx_convert_sse2;
269       mmxfct=jsimd_ycc_extbgrx_convert_mmx;
270       break;
271     case JCS_EXT_XBGR:
272     case JCS_EXT_ABGR:
273       sse2fct=jsimd_ycc_extxbgr_convert_sse2;
274       mmxfct=jsimd_ycc_extxbgr_convert_mmx;
275       break;
276     case JCS_EXT_XRGB:
277     case JCS_EXT_ARGB:
278       sse2fct=jsimd_ycc_extxrgb_convert_sse2;
279       mmxfct=jsimd_ycc_extxrgb_convert_mmx;
280       break;
281     default:
282       sse2fct=jsimd_ycc_rgb_convert_sse2;
283       mmxfct=jsimd_ycc_rgb_convert_mmx;
284       break;
285   }
286 
287   if ((simd_support & JSIMD_SSE2) &&
288       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
289     sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
290   else if (simd_support & JSIMD_MMX)
291     mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
292 }
293 
294 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)295 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
296                           JSAMPIMAGE input_buf, JDIMENSION input_row,
297                           JSAMPARRAY output_buf, int num_rows)
298 {
299 }
300 
301 GLOBAL(int)
jsimd_can_h2v2_downsample(void)302 jsimd_can_h2v2_downsample (void)
303 {
304   init_simd();
305 
306   /* The code is optimised for these values only */
307   if (BITS_IN_JSAMPLE != 8)
308     return 0;
309   if (sizeof(JDIMENSION) != 4)
310     return 0;
311 
312   if (simd_support & JSIMD_SSE2)
313     return 1;
314   if (simd_support & JSIMD_MMX)
315     return 1;
316 
317   return 0;
318 }
319 
320 GLOBAL(int)
jsimd_can_h2v1_downsample(void)321 jsimd_can_h2v1_downsample (void)
322 {
323   init_simd();
324 
325   /* The code is optimised for these values only */
326   if (BITS_IN_JSAMPLE != 8)
327     return 0;
328   if (sizeof(JDIMENSION) != 4)
329     return 0;
330 
331   if (simd_support & JSIMD_SSE2)
332     return 1;
333   if (simd_support & JSIMD_MMX)
334     return 1;
335 
336   return 0;
337 }
338 
339 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)340 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
341                        JSAMPARRAY input_data, JSAMPARRAY output_data)
342 {
343   if (simd_support & JSIMD_SSE2)
344     jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
345                                compptr->v_samp_factor,
346                                compptr->width_in_blocks, input_data,
347                                output_data);
348   else if (simd_support & JSIMD_MMX)
349     jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
350                               compptr->v_samp_factor, compptr->width_in_blocks,
351                               input_data, output_data);
352 }
353 
354 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)355 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
356                        JSAMPARRAY input_data, JSAMPARRAY output_data)
357 {
358   if (simd_support & JSIMD_SSE2)
359     jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
360                                compptr->v_samp_factor,
361                                compptr->width_in_blocks, input_data,
362                                output_data);
363   else if (simd_support & JSIMD_MMX)
364     jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
365                               compptr->v_samp_factor, compptr->width_in_blocks,
366                               input_data, output_data);
367 }
368 
369 GLOBAL(int)
jsimd_can_h2v2_upsample(void)370 jsimd_can_h2v2_upsample (void)
371 {
372   init_simd();
373 
374   /* The code is optimised for these values only */
375   if (BITS_IN_JSAMPLE != 8)
376     return 0;
377   if (sizeof(JDIMENSION) != 4)
378     return 0;
379 
380   if (simd_support & JSIMD_SSE2)
381     return 1;
382   if (simd_support & JSIMD_MMX)
383     return 1;
384 
385   return 0;
386 }
387 
388 GLOBAL(int)
jsimd_can_h2v1_upsample(void)389 jsimd_can_h2v1_upsample (void)
390 {
391   init_simd();
392 
393   /* The code is optimised for these values only */
394   if (BITS_IN_JSAMPLE != 8)
395     return 0;
396   if (sizeof(JDIMENSION) != 4)
397     return 0;
398 
399   if (simd_support & JSIMD_SSE2)
400     return 1;
401   if (simd_support & JSIMD_MMX)
402     return 1;
403 
404   return 0;
405 }
406 
407 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)408 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
409                      jpeg_component_info *compptr,
410                      JSAMPARRAY input_data,
411                      JSAMPARRAY *output_data_ptr)
412 {
413   if (simd_support & JSIMD_SSE2)
414     jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
415                              input_data, output_data_ptr);
416   else if (simd_support & JSIMD_MMX)
417     jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
418                             input_data, output_data_ptr);
419 }
420 
421 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)422 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
423                      jpeg_component_info *compptr,
424                      JSAMPARRAY input_data,
425                      JSAMPARRAY *output_data_ptr)
426 {
427   if (simd_support & JSIMD_SSE2)
428     jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
429                              input_data, output_data_ptr);
430   else if (simd_support & JSIMD_MMX)
431     jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
432                             input_data, output_data_ptr);
433 }
434 
435 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)436 jsimd_can_h2v2_fancy_upsample (void)
437 {
438   init_simd();
439 
440   /* The code is optimised for these values only */
441   if (BITS_IN_JSAMPLE != 8)
442     return 0;
443   if (sizeof(JDIMENSION) != 4)
444     return 0;
445 
446   if ((simd_support & JSIMD_SSE2) &&
447       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
448     return 1;
449   if (simd_support & JSIMD_MMX)
450     return 1;
451 
452   return 0;
453 }
454 
455 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)456 jsimd_can_h2v1_fancy_upsample (void)
457 {
458   init_simd();
459 
460   /* The code is optimised for these values only */
461   if (BITS_IN_JSAMPLE != 8)
462     return 0;
463   if (sizeof(JDIMENSION) != 4)
464     return 0;
465 
466   if ((simd_support & JSIMD_SSE2) &&
467       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
468     return 1;
469   if (simd_support & JSIMD_MMX)
470     return 1;
471 
472   return 0;
473 }
474 
475 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)476 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
477                            jpeg_component_info *compptr,
478                            JSAMPARRAY input_data,
479                            JSAMPARRAY *output_data_ptr)
480 {
481   if ((simd_support & JSIMD_SSE2) &&
482       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
483     jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
484                                    compptr->downsampled_width, input_data,
485                                    output_data_ptr);
486   else if (simd_support & JSIMD_MMX)
487     jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
488                                   compptr->downsampled_width, input_data,
489                                   output_data_ptr);
490 }
491 
492 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)493 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
494                            jpeg_component_info *compptr,
495                            JSAMPARRAY input_data,
496                            JSAMPARRAY *output_data_ptr)
497 {
498   if ((simd_support & JSIMD_SSE2) &&
499       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
500     jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
501                                    compptr->downsampled_width, input_data,
502                                    output_data_ptr);
503   else if (simd_support & JSIMD_MMX)
504     jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
505                                   compptr->downsampled_width, input_data,
506                                   output_data_ptr);
507 }
508 
509 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)510 jsimd_can_h2v2_merged_upsample (void)
511 {
512   init_simd();
513 
514   /* The code is optimised for these values only */
515   if (BITS_IN_JSAMPLE != 8)
516     return 0;
517   if (sizeof(JDIMENSION) != 4)
518     return 0;
519 
520   if ((simd_support & JSIMD_SSE2) &&
521       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
522     return 1;
523   if (simd_support & JSIMD_MMX)
524     return 1;
525 
526   return 0;
527 }
528 
529 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)530 jsimd_can_h2v1_merged_upsample (void)
531 {
532   init_simd();
533 
534   /* The code is optimised for these values only */
535   if (BITS_IN_JSAMPLE != 8)
536     return 0;
537   if (sizeof(JDIMENSION) != 4)
538     return 0;
539 
540   if ((simd_support & JSIMD_SSE2) &&
541       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
542     return 1;
543   if (simd_support & JSIMD_MMX)
544     return 1;
545 
546   return 0;
547 }
548 
549 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)550 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
551                             JSAMPIMAGE input_buf,
552                             JDIMENSION in_row_group_ctr,
553                             JSAMPARRAY output_buf)
554 {
555   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
556   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
557 
558   switch(cinfo->out_color_space) {
559     case JCS_EXT_RGB:
560       sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
561       mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
562       break;
563     case JCS_EXT_RGBX:
564     case JCS_EXT_RGBA:
565       sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
566       mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
567       break;
568     case JCS_EXT_BGR:
569       sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
570       mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
571       break;
572     case JCS_EXT_BGRX:
573     case JCS_EXT_BGRA:
574       sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
575       mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
576       break;
577     case JCS_EXT_XBGR:
578     case JCS_EXT_ABGR:
579       sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
580       mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
581       break;
582     case JCS_EXT_XRGB:
583     case JCS_EXT_ARGB:
584       sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
585       mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
586       break;
587     default:
588       sse2fct=jsimd_h2v2_merged_upsample_sse2;
589       mmxfct=jsimd_h2v2_merged_upsample_mmx;
590       break;
591   }
592 
593   if ((simd_support & JSIMD_SSE2) &&
594       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
595     sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
596   else if (simd_support & JSIMD_MMX)
597     mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
598 }
599 
600 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)601 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
602                             JSAMPIMAGE input_buf,
603                             JDIMENSION in_row_group_ctr,
604                             JSAMPARRAY output_buf)
605 {
606   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
607   void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
608 
609   switch(cinfo->out_color_space) {
610     case JCS_EXT_RGB:
611       sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
612       mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
613       break;
614     case JCS_EXT_RGBX:
615     case JCS_EXT_RGBA:
616       sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
617       mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
618       break;
619     case JCS_EXT_BGR:
620       sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
621       mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
622       break;
623     case JCS_EXT_BGRX:
624     case JCS_EXT_BGRA:
625       sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
626       mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
627       break;
628     case JCS_EXT_XBGR:
629     case JCS_EXT_ABGR:
630       sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
631       mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
632       break;
633     case JCS_EXT_XRGB:
634     case JCS_EXT_ARGB:
635       sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
636       mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
637       break;
638     default:
639       sse2fct=jsimd_h2v1_merged_upsample_sse2;
640       mmxfct=jsimd_h2v1_merged_upsample_mmx;
641       break;
642   }
643 
644   if ((simd_support & JSIMD_SSE2) &&
645       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
646     sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
647   else if (simd_support & JSIMD_MMX)
648     mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
649 }
650 
651 GLOBAL(int)
jsimd_can_convsamp(void)652 jsimd_can_convsamp (void)
653 {
654   init_simd();
655 
656   /* The code is optimised for these values only */
657   if (DCTSIZE != 8)
658     return 0;
659   if (BITS_IN_JSAMPLE != 8)
660     return 0;
661   if (sizeof(JDIMENSION) != 4)
662     return 0;
663   if (sizeof(DCTELEM) != 2)
664     return 0;
665 
666   if (simd_support & JSIMD_SSE2)
667     return 1;
668   if (simd_support & JSIMD_MMX)
669     return 1;
670 
671   return 0;
672 }
673 
674 GLOBAL(int)
jsimd_can_convsamp_float(void)675 jsimd_can_convsamp_float (void)
676 {
677   init_simd();
678 
679   /* The code is optimised for these values only */
680   if (DCTSIZE != 8)
681     return 0;
682   if (BITS_IN_JSAMPLE != 8)
683     return 0;
684   if (sizeof(JDIMENSION) != 4)
685     return 0;
686   if (sizeof(FAST_FLOAT) != 4)
687     return 0;
688 
689   if (simd_support & JSIMD_SSE2)
690     return 1;
691   if (simd_support & JSIMD_SSE)
692     return 1;
693   if (simd_support & JSIMD_3DNOW)
694     return 1;
695 
696   return 0;
697 }
698 
699 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)700 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
701                 DCTELEM *workspace)
702 {
703   if (simd_support & JSIMD_SSE2)
704     jsimd_convsamp_sse2(sample_data, start_col, workspace);
705   else if (simd_support & JSIMD_MMX)
706     jsimd_convsamp_mmx(sample_data, start_col, workspace);
707 }
708 
709 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)710 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
711                       FAST_FLOAT *workspace)
712 {
713   if (simd_support & JSIMD_SSE2)
714     jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
715   else if (simd_support & JSIMD_SSE)
716     jsimd_convsamp_float_sse(sample_data, start_col, workspace);
717   else if (simd_support & JSIMD_3DNOW)
718     jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
719 }
720 
721 GLOBAL(int)
jsimd_can_fdct_islow(void)722 jsimd_can_fdct_islow (void)
723 {
724   init_simd();
725 
726   /* The code is optimised for these values only */
727   if (DCTSIZE != 8)
728     return 0;
729   if (sizeof(DCTELEM) != 2)
730     return 0;
731 
732   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
733     return 1;
734   if (simd_support & JSIMD_MMX)
735     return 1;
736 
737   return 0;
738 }
739 
740 GLOBAL(int)
jsimd_can_fdct_ifast(void)741 jsimd_can_fdct_ifast (void)
742 {
743   init_simd();
744 
745   /* The code is optimised for these values only */
746   if (DCTSIZE != 8)
747     return 0;
748   if (sizeof(DCTELEM) != 2)
749     return 0;
750 
751   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
752     return 1;
753   if (simd_support & JSIMD_MMX)
754     return 1;
755 
756   return 0;
757 }
758 
759 GLOBAL(int)
jsimd_can_fdct_float(void)760 jsimd_can_fdct_float (void)
761 {
762   init_simd();
763 
764   /* The code is optimised for these values only */
765   if (DCTSIZE != 8)
766     return 0;
767   if (sizeof(FAST_FLOAT) != 4)
768     return 0;
769 
770   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
771     return 1;
772   if (simd_support & JSIMD_3DNOW)
773     return 1;
774 
775   return 0;
776 }
777 
778 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)779 jsimd_fdct_islow (DCTELEM *data)
780 {
781   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
782     jsimd_fdct_islow_sse2(data);
783   else if (simd_support & JSIMD_MMX)
784     jsimd_fdct_islow_mmx(data);
785 }
786 
787 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)788 jsimd_fdct_ifast (DCTELEM *data)
789 {
790   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
791     jsimd_fdct_ifast_sse2(data);
792   else if (simd_support & JSIMD_MMX)
793     jsimd_fdct_ifast_mmx(data);
794 }
795 
796 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)797 jsimd_fdct_float (FAST_FLOAT *data)
798 {
799   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
800     jsimd_fdct_float_sse(data);
801   else if (simd_support & JSIMD_3DNOW)
802     jsimd_fdct_float_3dnow(data);
803 }
804 
805 GLOBAL(int)
jsimd_can_quantize(void)806 jsimd_can_quantize (void)
807 {
808   init_simd();
809 
810   /* The code is optimised for these values only */
811   if (DCTSIZE != 8)
812     return 0;
813   if (sizeof(JCOEF) != 2)
814     return 0;
815   if (sizeof(DCTELEM) != 2)
816     return 0;
817 
818   if (simd_support & JSIMD_SSE2)
819     return 1;
820   if (simd_support & JSIMD_MMX)
821     return 1;
822 
823   return 0;
824 }
825 
826 GLOBAL(int)
jsimd_can_quantize_float(void)827 jsimd_can_quantize_float (void)
828 {
829   init_simd();
830 
831   /* The code is optimised for these values only */
832   if (DCTSIZE != 8)
833     return 0;
834   if (sizeof(JCOEF) != 2)
835     return 0;
836   if (sizeof(FAST_FLOAT) != 4)
837     return 0;
838 
839   if (simd_support & JSIMD_SSE2)
840     return 1;
841   if (simd_support & JSIMD_SSE)
842     return 1;
843   if (simd_support & JSIMD_3DNOW)
844     return 1;
845 
846   return 0;
847 }
848 
849 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)850 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors,
851                 DCTELEM *workspace)
852 {
853   if (simd_support & JSIMD_SSE2)
854     jsimd_quantize_sse2(coef_block, divisors, workspace);
855   else if (simd_support & JSIMD_MMX)
856     jsimd_quantize_mmx(coef_block, divisors, workspace);
857 }
858 
859 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)860 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors,
861                       FAST_FLOAT *workspace)
862 {
863   if (simd_support & JSIMD_SSE2)
864     jsimd_quantize_float_sse2(coef_block, divisors, workspace);
865   else if (simd_support & JSIMD_SSE)
866     jsimd_quantize_float_sse(coef_block, divisors, workspace);
867   else if (simd_support & JSIMD_3DNOW)
868     jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
869 }
870 
871 GLOBAL(int)
jsimd_can_idct_2x2(void)872 jsimd_can_idct_2x2 (void)
873 {
874   init_simd();
875 
876   /* The code is optimised for these values only */
877   if (DCTSIZE != 8)
878     return 0;
879   if (sizeof(JCOEF) != 2)
880     return 0;
881   if (BITS_IN_JSAMPLE != 8)
882     return 0;
883   if (sizeof(JDIMENSION) != 4)
884     return 0;
885   if (sizeof(ISLOW_MULT_TYPE) != 2)
886     return 0;
887 
888   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
889     return 1;
890   if (simd_support & JSIMD_MMX)
891     return 1;
892 
893   return 0;
894 }
895 
896 GLOBAL(int)
jsimd_can_idct_4x4(void)897 jsimd_can_idct_4x4 (void)
898 {
899   init_simd();
900 
901   /* The code is optimised for these values only */
902   if (DCTSIZE != 8)
903     return 0;
904   if (sizeof(JCOEF) != 2)
905     return 0;
906   if (BITS_IN_JSAMPLE != 8)
907     return 0;
908   if (sizeof(JDIMENSION) != 4)
909     return 0;
910   if (sizeof(ISLOW_MULT_TYPE) != 2)
911     return 0;
912 
913   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
914     return 1;
915   if (simd_support & JSIMD_MMX)
916     return 1;
917 
918   return 0;
919 }
920 
921 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)922 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
923                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
924                 JDIMENSION output_col)
925 {
926   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
927     jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
928                         output_col);
929   else if (simd_support & JSIMD_MMX)
930     jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
931 }
932 
933 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)934 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
935                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
936                 JDIMENSION output_col)
937 {
938   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
939     jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
940                         output_col);
941   else if (simd_support & JSIMD_MMX)
942     jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
943 }
944 
945 GLOBAL(int)
jsimd_can_idct_islow(void)946 jsimd_can_idct_islow (void)
947 {
948   init_simd();
949 
950   /* The code is optimised for these values only */
951   if (DCTSIZE != 8)
952     return 0;
953   if (sizeof(JCOEF) != 2)
954     return 0;
955   if (BITS_IN_JSAMPLE != 8)
956     return 0;
957   if (sizeof(JDIMENSION) != 4)
958     return 0;
959   if (sizeof(ISLOW_MULT_TYPE) != 2)
960     return 0;
961 
962   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
963     return 1;
964   if (simd_support & JSIMD_MMX)
965     return 1;
966 
967   return 0;
968 }
969 
970 GLOBAL(int)
jsimd_can_idct_ifast(void)971 jsimd_can_idct_ifast (void)
972 {
973   init_simd();
974 
975   /* The code is optimised for these values only */
976   if (DCTSIZE != 8)
977     return 0;
978   if (sizeof(JCOEF) != 2)
979     return 0;
980   if (BITS_IN_JSAMPLE != 8)
981     return 0;
982   if (sizeof(JDIMENSION) != 4)
983     return 0;
984   if (sizeof(IFAST_MULT_TYPE) != 2)
985     return 0;
986   if (IFAST_SCALE_BITS != 2)
987     return 0;
988 
989   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
990     return 1;
991   if (simd_support & JSIMD_MMX)
992     return 1;
993 
994   return 0;
995 }
996 
997 GLOBAL(int)
jsimd_can_idct_float(void)998 jsimd_can_idct_float (void)
999 {
1000   init_simd();
1001 
1002   if (DCTSIZE != 8)
1003     return 0;
1004   if (sizeof(JCOEF) != 2)
1005     return 0;
1006   if (BITS_IN_JSAMPLE != 8)
1007     return 0;
1008   if (sizeof(JDIMENSION) != 4)
1009     return 0;
1010   if (sizeof(FAST_FLOAT) != 4)
1011     return 0;
1012   if (sizeof(FLOAT_MULT_TYPE) != 4)
1013     return 0;
1014 
1015   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1016     return 1;
1017   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1018     return 1;
1019   if (simd_support & JSIMD_3DNOW)
1020     return 1;
1021 
1022   return 0;
1023 }
1024 
1025 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1026 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr,
1027                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
1028                   JDIMENSION output_col)
1029 {
1030   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
1031     jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1032                           output_col);
1033   else if (simd_support & JSIMD_MMX)
1034     jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
1035                          output_col);
1036 }
1037 
1038 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1039 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr,
1040                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
1041                   JDIMENSION output_col)
1042 {
1043   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1044     jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1045                           output_col);
1046   else if (simd_support & JSIMD_MMX)
1047     jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
1048                          output_col);
1049 }
1050 
1051 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1052 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr,
1053                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
1054                   JDIMENSION output_col)
1055 {
1056   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1057     jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1058                           output_col);
1059   else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1060     jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
1061                          output_col);
1062   else if (simd_support & JSIMD_3DNOW)
1063     jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
1064                            output_col);
1065 }
1066 
1067 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)1068 jsimd_can_huff_encode_one_block (void)
1069 {
1070   init_simd();
1071 
1072   if (DCTSIZE != 8)
1073     return 0;
1074   if (sizeof(JCOEF) != 2)
1075     return 0;
1076 
1077   if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1078       IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1079     return 1;
1080 
1081   return 0;
1082 }
1083 
1084 GLOBAL(JOCTET*)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)1085 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block,
1086                              int last_dc_val, c_derived_tbl *dctbl,
1087                              c_derived_tbl *actbl)
1088 {
1089   return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1090                                           dctbl, actbl);
1091 }
1092