1 /*
2  * jsimd_x86_64.c
3  *
4  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5  * Copyright (C) 2009-2011, 2014, 2016, D. R. Commander.
6  * Copyright (C) 2015, Matthieu Darbois.
7  *
8  * Based on the x86 SIMD extension for IJG JPEG library,
9  * Copyright (C) 1999-2006, MIYASAKA Masaru.
10  * For conditions of distribution and use, see copyright notice in jsimdext.inc
11  *
12  * This file contains the interface between the "normal" portions
13  * of the library and the SIMD implementations when running on a
14  * 64-bit x86 architecture.
15  */
16 
17 #define JPEG_INTERNALS
18 #include "../jinclude.h"
19 #include "../jpeglib.h"
20 #include "../jsimd.h"
21 #include "../jdct.h"
22 #include "../jsimddct.h"
23 #include "jsimd.h"
24 
25 /*
26  * In the PIC cases, we have no guarantee that constants will keep
27  * their alignment. This macro allows us to verify it at runtime.
28  */
29 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
30 
31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32 
33 static unsigned int simd_support = ~0;
34 static unsigned int simd_huffman = 1;
35 
36 /*
37  * Check what SIMD accelerations are supported.
38  *
39  * FIXME: This code is racy under a multi-threaded environment.
40  */
41 LOCAL(void)
init_simd(void)42 init_simd (void)
43 {
44   char *env = NULL;
45 
46   if (simd_support != ~0U)
47     return;
48 
49   simd_support = JSIMD_SSE2 | JSIMD_SSE;
50 
51   /* Force different settings through environment variables */
52   env = getenv("JSIMD_FORCENONE");
53   if ((env != NULL) && (strcmp(env, "1") == 0))
54     simd_support = 0;
55   env = getenv("JSIMD_NOHUFFENC");
56   if ((env != NULL) && (strcmp(env, "1") == 0))
57     simd_huffman = 0;
58 }
59 
60 GLOBAL(int)
jsimd_can_rgb_ycc(void)61 jsimd_can_rgb_ycc (void)
62 {
63   init_simd();
64 
65   /* The code is optimised for these values only */
66   if (BITS_IN_JSAMPLE != 8)
67     return 0;
68   if (sizeof(JDIMENSION) != 4)
69     return 0;
70   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
71     return 0;
72 
73   if ((simd_support & JSIMD_SSE2) &&
74       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
75     return 1;
76 
77   return 0;
78 }
79 
80 GLOBAL(int)
jsimd_can_rgb_gray(void)81 jsimd_can_rgb_gray (void)
82 {
83   init_simd();
84 
85   /* The code is optimised for these values only */
86   if (BITS_IN_JSAMPLE != 8)
87     return 0;
88   if (sizeof(JDIMENSION) != 4)
89     return 0;
90   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
91     return 0;
92 
93   if ((simd_support & JSIMD_SSE2) &&
94       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
95     return 1;
96 
97   return 0;
98 }
99 
100 GLOBAL(int)
jsimd_can_ycc_rgb(void)101 jsimd_can_ycc_rgb (void)
102 {
103   init_simd();
104 
105   /* The code is optimised for these values only */
106   if (BITS_IN_JSAMPLE != 8)
107     return 0;
108   if (sizeof(JDIMENSION) != 4)
109     return 0;
110   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
111     return 0;
112 
113   if ((simd_support & JSIMD_SSE2) &&
114       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
115     return 1;
116 
117   return 0;
118 }
119 
120 GLOBAL(int)
jsimd_can_ycc_rgb565(void)121 jsimd_can_ycc_rgb565 (void)
122 {
123   return 0;
124 }
125 
126 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)127 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
128                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
129                        JDIMENSION output_row, int num_rows)
130 {
131   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
132 
133   switch(cinfo->in_color_space) {
134     case JCS_EXT_RGB:
135       sse2fct=jsimd_extrgb_ycc_convert_sse2;
136       break;
137     case JCS_EXT_RGBX:
138     case JCS_EXT_RGBA:
139       sse2fct=jsimd_extrgbx_ycc_convert_sse2;
140       break;
141     case JCS_EXT_BGR:
142       sse2fct=jsimd_extbgr_ycc_convert_sse2;
143       break;
144     case JCS_EXT_BGRX:
145     case JCS_EXT_BGRA:
146       sse2fct=jsimd_extbgrx_ycc_convert_sse2;
147       break;
148     case JCS_EXT_XBGR:
149     case JCS_EXT_ABGR:
150       sse2fct=jsimd_extxbgr_ycc_convert_sse2;
151       break;
152     case JCS_EXT_XRGB:
153     case JCS_EXT_ARGB:
154       sse2fct=jsimd_extxrgb_ycc_convert_sse2;
155       break;
156     default:
157       sse2fct=jsimd_rgb_ycc_convert_sse2;
158       break;
159   }
160 
161   sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
162 }
163 
164 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)165 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
166                         JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
167                         JDIMENSION output_row, int num_rows)
168 {
169   void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
170 
171   switch(cinfo->in_color_space) {
172     case JCS_EXT_RGB:
173       sse2fct=jsimd_extrgb_gray_convert_sse2;
174       break;
175     case JCS_EXT_RGBX:
176     case JCS_EXT_RGBA:
177       sse2fct=jsimd_extrgbx_gray_convert_sse2;
178       break;
179     case JCS_EXT_BGR:
180       sse2fct=jsimd_extbgr_gray_convert_sse2;
181       break;
182     case JCS_EXT_BGRX:
183     case JCS_EXT_BGRA:
184       sse2fct=jsimd_extbgrx_gray_convert_sse2;
185       break;
186     case JCS_EXT_XBGR:
187     case JCS_EXT_ABGR:
188       sse2fct=jsimd_extxbgr_gray_convert_sse2;
189       break;
190     case JCS_EXT_XRGB:
191     case JCS_EXT_ARGB:
192       sse2fct=jsimd_extxrgb_gray_convert_sse2;
193       break;
194     default:
195       sse2fct=jsimd_rgb_gray_convert_sse2;
196       break;
197   }
198 
199   sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
200 }
201 
202 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)203 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
204                        JSAMPIMAGE input_buf, JDIMENSION input_row,
205                        JSAMPARRAY output_buf, int num_rows)
206 {
207   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
208 
209   switch(cinfo->out_color_space) {
210     case JCS_EXT_RGB:
211       sse2fct=jsimd_ycc_extrgb_convert_sse2;
212       break;
213     case JCS_EXT_RGBX:
214     case JCS_EXT_RGBA:
215       sse2fct=jsimd_ycc_extrgbx_convert_sse2;
216       break;
217     case JCS_EXT_BGR:
218       sse2fct=jsimd_ycc_extbgr_convert_sse2;
219       break;
220     case JCS_EXT_BGRX:
221     case JCS_EXT_BGRA:
222       sse2fct=jsimd_ycc_extbgrx_convert_sse2;
223       break;
224     case JCS_EXT_XBGR:
225     case JCS_EXT_ABGR:
226       sse2fct=jsimd_ycc_extxbgr_convert_sse2;
227       break;
228     case JCS_EXT_XRGB:
229     case JCS_EXT_ARGB:
230       sse2fct=jsimd_ycc_extxrgb_convert_sse2;
231       break;
232     default:
233       sse2fct=jsimd_ycc_rgb_convert_sse2;
234       break;
235   }
236 
237   sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
238 }
239 
240 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)241 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
242                           JSAMPIMAGE input_buf, JDIMENSION input_row,
243                           JSAMPARRAY output_buf, int num_rows)
244 {
245 }
246 
247 GLOBAL(int)
jsimd_can_h2v2_downsample(void)248 jsimd_can_h2v2_downsample (void)
249 {
250   init_simd();
251 
252   /* The code is optimised for these values only */
253   if (BITS_IN_JSAMPLE != 8)
254     return 0;
255   if (sizeof(JDIMENSION) != 4)
256     return 0;
257 
258   if (simd_support & JSIMD_SSE2)
259     return 1;
260 
261   return 0;
262 }
263 
264 GLOBAL(int)
jsimd_can_h2v1_downsample(void)265 jsimd_can_h2v1_downsample (void)
266 {
267   init_simd();
268 
269   /* The code is optimised for these values only */
270   if (BITS_IN_JSAMPLE != 8)
271     return 0;
272   if (sizeof(JDIMENSION) != 4)
273     return 0;
274 
275   if (simd_support & JSIMD_SSE2)
276     return 1;
277 
278   return 0;
279 }
280 
281 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)282 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
283                        JSAMPARRAY input_data, JSAMPARRAY output_data)
284 {
285   jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
286                              compptr->v_samp_factor, compptr->width_in_blocks,
287                              input_data, output_data);
288 }
289 
290 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)291 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
292                        JSAMPARRAY input_data, JSAMPARRAY output_data)
293 {
294   jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
295                              compptr->v_samp_factor, compptr->width_in_blocks,
296                              input_data, output_data);
297 }
298 
299 GLOBAL(int)
jsimd_can_h2v2_upsample(void)300 jsimd_can_h2v2_upsample (void)
301 {
302   init_simd();
303 
304   /* The code is optimised for these values only */
305   if (BITS_IN_JSAMPLE != 8)
306     return 0;
307   if (sizeof(JDIMENSION) != 4)
308     return 0;
309 
310   if (simd_support & JSIMD_SSE2)
311     return 1;
312 
313   return 0;
314 }
315 
316 GLOBAL(int)
jsimd_can_h2v1_upsample(void)317 jsimd_can_h2v1_upsample (void)
318 {
319   init_simd();
320 
321   /* The code is optimised for these values only */
322   if (BITS_IN_JSAMPLE != 8)
323     return 0;
324   if (sizeof(JDIMENSION) != 4)
325     return 0;
326 
327   if (simd_support & JSIMD_SSE2)
328     return 1;
329 
330   return 0;
331 }
332 
333 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)334 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
335                      jpeg_component_info *compptr,
336                      JSAMPARRAY input_data,
337                      JSAMPARRAY *output_data_ptr)
338 {
339   jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
340                            input_data, output_data_ptr);
341 }
342 
343 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)344 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
345                      jpeg_component_info *compptr,
346                      JSAMPARRAY input_data,
347                      JSAMPARRAY *output_data_ptr)
348 {
349   jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
350                            input_data, output_data_ptr);
351 }
352 
353 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)354 jsimd_can_h2v2_fancy_upsample (void)
355 {
356   init_simd();
357 
358   /* The code is optimised for these values only */
359   if (BITS_IN_JSAMPLE != 8)
360     return 0;
361   if (sizeof(JDIMENSION) != 4)
362     return 0;
363 
364   if ((simd_support & JSIMD_SSE2) &&
365       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
366     return 1;
367 
368   return 0;
369 }
370 
371 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)372 jsimd_can_h2v1_fancy_upsample (void)
373 {
374   init_simd();
375 
376   /* The code is optimised for these values only */
377   if (BITS_IN_JSAMPLE != 8)
378     return 0;
379   if (sizeof(JDIMENSION) != 4)
380     return 0;
381 
382   if ((simd_support & JSIMD_SSE2) &&
383       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
384     return 1;
385 
386   return 0;
387 }
388 
389 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)390 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
391                            jpeg_component_info *compptr,
392                            JSAMPARRAY input_data,
393                            JSAMPARRAY *output_data_ptr)
394 {
395   jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
396                                  compptr->downsampled_width, input_data,
397                                  output_data_ptr);
398 }
399 
400 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)401 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
402                            jpeg_component_info *compptr,
403                            JSAMPARRAY input_data,
404                            JSAMPARRAY *output_data_ptr)
405 {
406   jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
407                                  compptr->downsampled_width, input_data,
408                                  output_data_ptr);
409 }
410 
411 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)412 jsimd_can_h2v2_merged_upsample (void)
413 {
414   init_simd();
415 
416   /* The code is optimised for these values only */
417   if (BITS_IN_JSAMPLE != 8)
418     return 0;
419   if (sizeof(JDIMENSION) != 4)
420     return 0;
421 
422   if ((simd_support & JSIMD_SSE2) &&
423       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
424     return 1;
425 
426   return 0;
427 }
428 
429 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)430 jsimd_can_h2v1_merged_upsample (void)
431 {
432   init_simd();
433 
434   /* The code is optimised for these values only */
435   if (BITS_IN_JSAMPLE != 8)
436     return 0;
437   if (sizeof(JDIMENSION) != 4)
438     return 0;
439 
440   if ((simd_support & JSIMD_SSE2) &&
441       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
442     return 1;
443 
444   return 0;
445 }
446 
447 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)448 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
449                             JSAMPIMAGE input_buf,
450                             JDIMENSION in_row_group_ctr,
451                             JSAMPARRAY output_buf)
452 {
453   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
454 
455   switch(cinfo->out_color_space) {
456     case JCS_EXT_RGB:
457       sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
458       break;
459     case JCS_EXT_RGBX:
460     case JCS_EXT_RGBA:
461       sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
462       break;
463     case JCS_EXT_BGR:
464       sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
465       break;
466     case JCS_EXT_BGRX:
467     case JCS_EXT_BGRA:
468       sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
469       break;
470     case JCS_EXT_XBGR:
471     case JCS_EXT_ABGR:
472       sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
473       break;
474     case JCS_EXT_XRGB:
475     case JCS_EXT_ARGB:
476       sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
477       break;
478     default:
479       sse2fct=jsimd_h2v2_merged_upsample_sse2;
480       break;
481   }
482 
483   sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
484 }
485 
486 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)487 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
488                             JSAMPIMAGE input_buf,
489                             JDIMENSION in_row_group_ctr,
490                             JSAMPARRAY output_buf)
491 {
492   void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
493 
494   switch(cinfo->out_color_space) {
495     case JCS_EXT_RGB:
496       sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
497       break;
498     case JCS_EXT_RGBX:
499     case JCS_EXT_RGBA:
500       sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
501       break;
502     case JCS_EXT_BGR:
503       sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
504       break;
505     case JCS_EXT_BGRX:
506     case JCS_EXT_BGRA:
507       sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
508       break;
509     case JCS_EXT_XBGR:
510     case JCS_EXT_ABGR:
511       sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
512       break;
513     case JCS_EXT_XRGB:
514     case JCS_EXT_ARGB:
515       sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
516       break;
517     default:
518       sse2fct=jsimd_h2v1_merged_upsample_sse2;
519       break;
520   }
521 
522   sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
523 }
524 
525 GLOBAL(int)
jsimd_can_convsamp(void)526 jsimd_can_convsamp (void)
527 {
528   init_simd();
529 
530   /* The code is optimised for these values only */
531   if (DCTSIZE != 8)
532     return 0;
533   if (BITS_IN_JSAMPLE != 8)
534     return 0;
535   if (sizeof(JDIMENSION) != 4)
536     return 0;
537   if (sizeof(DCTELEM) != 2)
538     return 0;
539 
540   if (simd_support & JSIMD_SSE2)
541     return 1;
542 
543   return 0;
544 }
545 
546 GLOBAL(int)
jsimd_can_convsamp_float(void)547 jsimd_can_convsamp_float (void)
548 {
549   init_simd();
550 
551   /* The code is optimised for these values only */
552   if (DCTSIZE != 8)
553     return 0;
554   if (BITS_IN_JSAMPLE != 8)
555     return 0;
556   if (sizeof(JDIMENSION) != 4)
557     return 0;
558   if (sizeof(FAST_FLOAT) != 4)
559     return 0;
560 
561   if (simd_support & JSIMD_SSE2)
562     return 1;
563 
564   return 0;
565 }
566 
567 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)568 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
569                 DCTELEM *workspace)
570 {
571   jsimd_convsamp_sse2(sample_data, start_col, workspace);
572 }
573 
574 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)575 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
576                       FAST_FLOAT *workspace)
577 {
578   jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
579 }
580 
581 GLOBAL(int)
jsimd_can_fdct_islow(void)582 jsimd_can_fdct_islow (void)
583 {
584   init_simd();
585 
586   /* The code is optimised for these values only */
587   if (DCTSIZE != 8)
588     return 0;
589   if (sizeof(DCTELEM) != 2)
590     return 0;
591 
592   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
593     return 1;
594 
595   return 0;
596 }
597 
598 GLOBAL(int)
jsimd_can_fdct_ifast(void)599 jsimd_can_fdct_ifast (void)
600 {
601   init_simd();
602 
603   /* The code is optimised for these values only */
604   if (DCTSIZE != 8)
605     return 0;
606   if (sizeof(DCTELEM) != 2)
607     return 0;
608 
609   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
610     return 1;
611 
612   return 0;
613 }
614 
615 GLOBAL(int)
jsimd_can_fdct_float(void)616 jsimd_can_fdct_float (void)
617 {
618   init_simd();
619 
620   /* The code is optimised for these values only */
621   if (DCTSIZE != 8)
622     return 0;
623   if (sizeof(FAST_FLOAT) != 4)
624     return 0;
625 
626   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
627     return 1;
628 
629   return 0;
630 }
631 
632 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)633 jsimd_fdct_islow (DCTELEM *data)
634 {
635   jsimd_fdct_islow_sse2(data);
636 }
637 
638 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)639 jsimd_fdct_ifast (DCTELEM *data)
640 {
641   jsimd_fdct_ifast_sse2(data);
642 }
643 
644 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)645 jsimd_fdct_float (FAST_FLOAT *data)
646 {
647   jsimd_fdct_float_sse(data);
648 }
649 
650 GLOBAL(int)
jsimd_can_quantize(void)651 jsimd_can_quantize (void)
652 {
653   init_simd();
654 
655   /* The code is optimised for these values only */
656   if (DCTSIZE != 8)
657     return 0;
658   if (sizeof(JCOEF) != 2)
659     return 0;
660   if (sizeof(DCTELEM) != 2)
661     return 0;
662 
663   if (simd_support & JSIMD_SSE2)
664     return 1;
665 
666   return 0;
667 }
668 
669 GLOBAL(int)
jsimd_can_quantize_float(void)670 jsimd_can_quantize_float (void)
671 {
672   init_simd();
673 
674   /* The code is optimised for these values only */
675   if (DCTSIZE != 8)
676     return 0;
677   if (sizeof(JCOEF) != 2)
678     return 0;
679   if (sizeof(FAST_FLOAT) != 4)
680     return 0;
681 
682   if (simd_support & JSIMD_SSE2)
683     return 1;
684 
685   return 0;
686 }
687 
688 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)689 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors,
690                 DCTELEM *workspace)
691 {
692   jsimd_quantize_sse2(coef_block, divisors, workspace);
693 }
694 
695 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)696 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors,
697                       FAST_FLOAT *workspace)
698 {
699   jsimd_quantize_float_sse2(coef_block, divisors, workspace);
700 }
701 
702 GLOBAL(int)
jsimd_can_idct_2x2(void)703 jsimd_can_idct_2x2 (void)
704 {
705   init_simd();
706 
707   /* The code is optimised for these values only */
708   if (DCTSIZE != 8)
709     return 0;
710   if (sizeof(JCOEF) != 2)
711     return 0;
712   if (BITS_IN_JSAMPLE != 8)
713     return 0;
714   if (sizeof(JDIMENSION) != 4)
715     return 0;
716   if (sizeof(ISLOW_MULT_TYPE) != 2)
717     return 0;
718 
719   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
720     return 1;
721 
722   return 0;
723 }
724 
725 GLOBAL(int)
jsimd_can_idct_4x4(void)726 jsimd_can_idct_4x4 (void)
727 {
728   init_simd();
729 
730   /* The code is optimised for these values only */
731   if (DCTSIZE != 8)
732     return 0;
733   if (sizeof(JCOEF) != 2)
734     return 0;
735   if (BITS_IN_JSAMPLE != 8)
736     return 0;
737   if (sizeof(JDIMENSION) != 4)
738     return 0;
739   if (sizeof(ISLOW_MULT_TYPE) != 2)
740     return 0;
741 
742   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
743     return 1;
744 
745   return 0;
746 }
747 
748 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)749 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
750                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
751                 JDIMENSION output_col)
752 {
753   jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
754 }
755 
756 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)757 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
758                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
759                 JDIMENSION output_col)
760 {
761   jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
762 }
763 
764 GLOBAL(int)
jsimd_can_idct_islow(void)765 jsimd_can_idct_islow (void)
766 {
767   init_simd();
768 
769   /* The code is optimised for these values only */
770   if (DCTSIZE != 8)
771     return 0;
772   if (sizeof(JCOEF) != 2)
773     return 0;
774   if (BITS_IN_JSAMPLE != 8)
775     return 0;
776   if (sizeof(JDIMENSION) != 4)
777     return 0;
778   if (sizeof(ISLOW_MULT_TYPE) != 2)
779     return 0;
780 
781   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
782     return 1;
783 
784   return 0;
785 }
786 
787 GLOBAL(int)
jsimd_can_idct_ifast(void)788 jsimd_can_idct_ifast (void)
789 {
790   init_simd();
791 
792   /* The code is optimised for these values only */
793   if (DCTSIZE != 8)
794     return 0;
795   if (sizeof(JCOEF) != 2)
796     return 0;
797   if (BITS_IN_JSAMPLE != 8)
798     return 0;
799   if (sizeof(JDIMENSION) != 4)
800     return 0;
801   if (sizeof(IFAST_MULT_TYPE) != 2)
802     return 0;
803   if (IFAST_SCALE_BITS != 2)
804     return 0;
805 
806   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
807     return 1;
808 
809   return 0;
810 }
811 
812 GLOBAL(int)
jsimd_can_idct_float(void)813 jsimd_can_idct_float (void)
814 {
815   init_simd();
816 
817   if (DCTSIZE != 8)
818     return 0;
819   if (sizeof(JCOEF) != 2)
820     return 0;
821   if (BITS_IN_JSAMPLE != 8)
822     return 0;
823   if (sizeof(JDIMENSION) != 4)
824     return 0;
825   if (sizeof(FAST_FLOAT) != 4)
826     return 0;
827   if (sizeof(FLOAT_MULT_TYPE) != 4)
828     return 0;
829 
830   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
831     return 1;
832 
833   return 0;
834 }
835 
836 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)837 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr,
838                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
839                   JDIMENSION output_col)
840 {
841   jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
842                         output_col);
843 }
844 
845 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)846 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr,
847                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
848                   JDIMENSION output_col)
849 {
850   jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
851                         output_col);
852 }
853 
854 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)855 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr,
856                   JCOEFPTR coef_block, JSAMPARRAY output_buf,
857                   JDIMENSION output_col)
858 {
859   jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
860                         output_col);
861 }
862 
863 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)864 jsimd_can_huff_encode_one_block (void)
865 {
866   init_simd();
867 
868   if (DCTSIZE != 8)
869     return 0;
870   if (sizeof(JCOEF) != 2)
871     return 0;
872 
873   if ((simd_support & JSIMD_SSE2) && simd_huffman &&
874       IS_ALIGNED_SSE(jconst_huff_encode_one_block))
875     return 1;
876 
877   return 0;
878 }
879 
880 GLOBAL(JOCTET*)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)881 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block,
882                              int last_dc_val, c_derived_tbl *dctbl,
883                              c_derived_tbl *actbl)
884 {
885   return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
886                                           dctbl, actbl);
887 }
888