1 /*
2  * jsimd_x86_64.c
3  *
4  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5  * Copyright (C) 2009-2011, 2014, 2016, 2018, D. R. Commander.
6  * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
7  *
8  * Based on the x86 SIMD extension for IJG JPEG library,
9  * Copyright (C) 1999-2006, MIYASAKA Masaru.
10  * For conditions of distribution and use, see copyright notice in jsimdext.inc
11  *
12  * This file contains the interface between the "normal" portions
13  * of the library and the SIMD implementations when running on a
14  * 64-bit x86 architecture.
15  */
16 
17 #define JPEG_INTERNALS
18 #include "../../jinclude.h"
19 #include "../../jpeglib.h"
20 #include "../../jsimd.h"
21 #include "../../jdct.h"
22 #include "../../jsimddct.h"
23 #include "../jsimd.h"
24 #include "jconfigint.h"
25 
26 /*
27  * In the PIC cases, we have no guarantee that constants will keep
28  * their alignment. This macro allows us to verify it at runtime.
29  */
30 #define IS_ALIGNED(ptr, order)  (((size_t)ptr & ((1 << order) - 1)) == 0)
31 
32 #define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
33 #define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
34 
35 static unsigned int simd_support = (unsigned int)(~0);
36 static unsigned int simd_huffman = 1;
37 
38 /*
39  * Check what SIMD accelerations are supported.
40  *
41  * FIXME: This code is racy under a multi-threaded environment.
42  */
43 LOCAL(void)
init_simd(void)44 init_simd(void)
45 {
46 #ifndef NO_GETENV
47   char *env = NULL;
48 #endif
49 
50   if (simd_support != ~0U)
51     return;
52 
53   simd_support = jpeg_simd_cpu_support();
54 
55 #ifndef NO_GETENV
56   /* Force different settings through environment variables */
57   env = getenv("JSIMD_FORCESSE2");
58   if ((env != NULL) && (strcmp(env, "1") == 0))
59     simd_support &= JSIMD_SSE2;
60   env = getenv("JSIMD_FORCEAVX2");
61   if ((env != NULL) && (strcmp(env, "1") == 0))
62     simd_support &= JSIMD_AVX2;
63   env = getenv("JSIMD_FORCENONE");
64   if ((env != NULL) && (strcmp(env, "1") == 0))
65     simd_support = 0;
66   env = getenv("JSIMD_NOHUFFENC");
67   if ((env != NULL) && (strcmp(env, "1") == 0))
68     simd_huffman = 0;
69 #endif
70 }
71 
72 GLOBAL(int)
jsimd_can_rgb_ycc(void)73 jsimd_can_rgb_ycc(void)
74 {
75   init_simd();
76 
77   /* The code is optimised for these values only */
78   if (BITS_IN_JSAMPLE != 8)
79     return 0;
80   if (sizeof(JDIMENSION) != 4)
81     return 0;
82   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
83     return 0;
84 
85   if ((simd_support & JSIMD_AVX2) &&
86       IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
87     return 1;
88   if ((simd_support & JSIMD_SSE2) &&
89       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
90     return 1;
91 
92   return 0;
93 }
94 
95 GLOBAL(int)
jsimd_can_rgb_gray(void)96 jsimd_can_rgb_gray(void)
97 {
98   init_simd();
99 
100   /* The code is optimised for these values only */
101   if (BITS_IN_JSAMPLE != 8)
102     return 0;
103   if (sizeof(JDIMENSION) != 4)
104     return 0;
105   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
106     return 0;
107 
108   if ((simd_support & JSIMD_AVX2) &&
109       IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
110     return 1;
111   if ((simd_support & JSIMD_SSE2) &&
112       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
113     return 1;
114 
115   return 0;
116 }
117 
118 GLOBAL(int)
jsimd_can_ycc_rgb(void)119 jsimd_can_ycc_rgb(void)
120 {
121   init_simd();
122 
123   /* The code is optimised for these values only */
124   if (BITS_IN_JSAMPLE != 8)
125     return 0;
126   if (sizeof(JDIMENSION) != 4)
127     return 0;
128   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
129     return 0;
130 
131   if ((simd_support & JSIMD_AVX2) &&
132       IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
133     return 1;
134   if ((simd_support & JSIMD_SSE2) &&
135       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
136     return 1;
137 
138   return 0;
139 }
140 
141 GLOBAL(int)
jsimd_can_ycc_rgb565(void)142 jsimd_can_ycc_rgb565(void)
143 {
144   return 0;
145 }
146 
147 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)148 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
149                       JSAMPIMAGE output_buf, JDIMENSION output_row,
150                       int num_rows)
151 {
152   void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
153   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
154 
155   switch (cinfo->in_color_space) {
156   case JCS_EXT_RGB:
157     avx2fct = jsimd_extrgb_ycc_convert_avx2;
158     sse2fct = jsimd_extrgb_ycc_convert_sse2;
159     break;
160   case JCS_EXT_RGBX:
161   case JCS_EXT_RGBA:
162     avx2fct = jsimd_extrgbx_ycc_convert_avx2;
163     sse2fct = jsimd_extrgbx_ycc_convert_sse2;
164     break;
165   case JCS_EXT_BGR:
166     avx2fct = jsimd_extbgr_ycc_convert_avx2;
167     sse2fct = jsimd_extbgr_ycc_convert_sse2;
168     break;
169   case JCS_EXT_BGRX:
170   case JCS_EXT_BGRA:
171     avx2fct = jsimd_extbgrx_ycc_convert_avx2;
172     sse2fct = jsimd_extbgrx_ycc_convert_sse2;
173     break;
174   case JCS_EXT_XBGR:
175   case JCS_EXT_ABGR:
176     avx2fct = jsimd_extxbgr_ycc_convert_avx2;
177     sse2fct = jsimd_extxbgr_ycc_convert_sse2;
178     break;
179   case JCS_EXT_XRGB:
180   case JCS_EXT_ARGB:
181     avx2fct = jsimd_extxrgb_ycc_convert_avx2;
182     sse2fct = jsimd_extxrgb_ycc_convert_sse2;
183     break;
184   default:
185     avx2fct = jsimd_rgb_ycc_convert_avx2;
186     sse2fct = jsimd_rgb_ycc_convert_sse2;
187     break;
188   }
189 
190   if (simd_support & JSIMD_AVX2)
191     avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
192   else
193     sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
194 }
195 
196 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)197 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
198                        JSAMPIMAGE output_buf, JDIMENSION output_row,
199                        int num_rows)
200 {
201   void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
202   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
203 
204   switch (cinfo->in_color_space) {
205   case JCS_EXT_RGB:
206     avx2fct = jsimd_extrgb_gray_convert_avx2;
207     sse2fct = jsimd_extrgb_gray_convert_sse2;
208     break;
209   case JCS_EXT_RGBX:
210   case JCS_EXT_RGBA:
211     avx2fct = jsimd_extrgbx_gray_convert_avx2;
212     sse2fct = jsimd_extrgbx_gray_convert_sse2;
213     break;
214   case JCS_EXT_BGR:
215     avx2fct = jsimd_extbgr_gray_convert_avx2;
216     sse2fct = jsimd_extbgr_gray_convert_sse2;
217     break;
218   case JCS_EXT_BGRX:
219   case JCS_EXT_BGRA:
220     avx2fct = jsimd_extbgrx_gray_convert_avx2;
221     sse2fct = jsimd_extbgrx_gray_convert_sse2;
222     break;
223   case JCS_EXT_XBGR:
224   case JCS_EXT_ABGR:
225     avx2fct = jsimd_extxbgr_gray_convert_avx2;
226     sse2fct = jsimd_extxbgr_gray_convert_sse2;
227     break;
228   case JCS_EXT_XRGB:
229   case JCS_EXT_ARGB:
230     avx2fct = jsimd_extxrgb_gray_convert_avx2;
231     sse2fct = jsimd_extxrgb_gray_convert_sse2;
232     break;
233   default:
234     avx2fct = jsimd_rgb_gray_convert_avx2;
235     sse2fct = jsimd_rgb_gray_convert_sse2;
236     break;
237   }
238 
239   if (simd_support & JSIMD_AVX2)
240     avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
241   else
242     sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
243 }
244 
245 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)246 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
247                       JDIMENSION input_row, JSAMPARRAY output_buf,
248                       int num_rows)
249 {
250   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
251   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
252 
253   switch (cinfo->out_color_space) {
254   case JCS_EXT_RGB:
255     avx2fct = jsimd_ycc_extrgb_convert_avx2;
256     sse2fct = jsimd_ycc_extrgb_convert_sse2;
257     break;
258   case JCS_EXT_RGBX:
259   case JCS_EXT_RGBA:
260     avx2fct = jsimd_ycc_extrgbx_convert_avx2;
261     sse2fct = jsimd_ycc_extrgbx_convert_sse2;
262     break;
263   case JCS_EXT_BGR:
264     avx2fct = jsimd_ycc_extbgr_convert_avx2;
265     sse2fct = jsimd_ycc_extbgr_convert_sse2;
266     break;
267   case JCS_EXT_BGRX:
268   case JCS_EXT_BGRA:
269     avx2fct = jsimd_ycc_extbgrx_convert_avx2;
270     sse2fct = jsimd_ycc_extbgrx_convert_sse2;
271     break;
272   case JCS_EXT_XBGR:
273   case JCS_EXT_ABGR:
274     avx2fct = jsimd_ycc_extxbgr_convert_avx2;
275     sse2fct = jsimd_ycc_extxbgr_convert_sse2;
276     break;
277   case JCS_EXT_XRGB:
278   case JCS_EXT_ARGB:
279     avx2fct = jsimd_ycc_extxrgb_convert_avx2;
280     sse2fct = jsimd_ycc_extxrgb_convert_sse2;
281     break;
282   default:
283     avx2fct = jsimd_ycc_rgb_convert_avx2;
284     sse2fct = jsimd_ycc_rgb_convert_sse2;
285     break;
286   }
287 
288   if (simd_support & JSIMD_AVX2)
289     avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
290   else
291     sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
292 }
293 
294 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)295 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
296                          JDIMENSION input_row, JSAMPARRAY output_buf,
297                          int num_rows)
298 {
299 }
300 
301 GLOBAL(int)
jsimd_can_h2v2_downsample(void)302 jsimd_can_h2v2_downsample(void)
303 {
304   init_simd();
305 
306   /* The code is optimised for these values only */
307   if (BITS_IN_JSAMPLE != 8)
308     return 0;
309   if (sizeof(JDIMENSION) != 4)
310     return 0;
311 
312   if (simd_support & JSIMD_AVX2)
313     return 1;
314   if (simd_support & JSIMD_SSE2)
315     return 1;
316 
317   return 0;
318 }
319 
320 GLOBAL(int)
jsimd_can_h2v1_downsample(void)321 jsimd_can_h2v1_downsample(void)
322 {
323   init_simd();
324 
325   /* The code is optimised for these values only */
326   if (BITS_IN_JSAMPLE != 8)
327     return 0;
328   if (sizeof(JDIMENSION) != 4)
329     return 0;
330 
331   if (simd_support & JSIMD_AVX2)
332     return 1;
333   if (simd_support & JSIMD_SSE2)
334     return 1;
335 
336   return 0;
337 }
338 
339 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)340 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
341                       JSAMPARRAY input_data, JSAMPARRAY output_data)
342 {
343   if (simd_support & JSIMD_AVX2)
344     jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
345                                compptr->v_samp_factor,
346                                compptr->width_in_blocks, input_data,
347                                output_data);
348   else
349     jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
350                                compptr->v_samp_factor,
351                                compptr->width_in_blocks, input_data,
352                                output_data);
353 }
354 
355 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)356 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
357                       JSAMPARRAY input_data, JSAMPARRAY output_data)
358 {
359   if (simd_support & JSIMD_AVX2)
360     jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
361                                compptr->v_samp_factor,
362                                compptr->width_in_blocks, input_data,
363                                output_data);
364   else
365     jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
366                                compptr->v_samp_factor,
367                                compptr->width_in_blocks, input_data,
368                                output_data);
369 }
370 
371 GLOBAL(int)
jsimd_can_h2v2_upsample(void)372 jsimd_can_h2v2_upsample(void)
373 {
374   init_simd();
375 
376   /* The code is optimised for these values only */
377   if (BITS_IN_JSAMPLE != 8)
378     return 0;
379   if (sizeof(JDIMENSION) != 4)
380     return 0;
381 
382   if (simd_support & JSIMD_AVX2)
383     return 1;
384   if (simd_support & JSIMD_SSE2)
385     return 1;
386 
387   return 0;
388 }
389 
390 GLOBAL(int)
jsimd_can_h2v1_upsample(void)391 jsimd_can_h2v1_upsample(void)
392 {
393   init_simd();
394 
395   /* The code is optimised for these values only */
396   if (BITS_IN_JSAMPLE != 8)
397     return 0;
398   if (sizeof(JDIMENSION) != 4)
399     return 0;
400 
401   if (simd_support & JSIMD_AVX2)
402     return 1;
403   if (simd_support & JSIMD_SSE2)
404     return 1;
405 
406   return 0;
407 }
408 
409 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)410 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
411                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
412 {
413   if (simd_support & JSIMD_AVX2)
414     jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
415                              input_data, output_data_ptr);
416   else
417     jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
418                              input_data, output_data_ptr);
419 }
420 
421 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)422 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
423                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
424 {
425   if (simd_support & JSIMD_AVX2)
426     jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
427                              input_data, output_data_ptr);
428   else
429     jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
430                              input_data, output_data_ptr);
431 }
432 
433 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)434 jsimd_can_h2v2_fancy_upsample(void)
435 {
436   init_simd();
437 
438   /* The code is optimised for these values only */
439   if (BITS_IN_JSAMPLE != 8)
440     return 0;
441   if (sizeof(JDIMENSION) != 4)
442     return 0;
443 
444   if ((simd_support & JSIMD_AVX2) &&
445       IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
446     return 1;
447   if ((simd_support & JSIMD_SSE2) &&
448       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
449     return 1;
450 
451   return 0;
452 }
453 
454 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)455 jsimd_can_h2v1_fancy_upsample(void)
456 {
457   init_simd();
458 
459   /* The code is optimised for these values only */
460   if (BITS_IN_JSAMPLE != 8)
461     return 0;
462   if (sizeof(JDIMENSION) != 4)
463     return 0;
464 
465   if ((simd_support & JSIMD_AVX2) &&
466       IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
467     return 1;
468   if ((simd_support & JSIMD_SSE2) &&
469       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
470     return 1;
471 
472   return 0;
473 }
474 
475 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)476 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
477                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
478 {
479   if (simd_support & JSIMD_AVX2)
480     jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
481                                    compptr->downsampled_width, input_data,
482                                    output_data_ptr);
483   else
484     jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
485                                    compptr->downsampled_width, input_data,
486                                    output_data_ptr);
487 }
488 
489 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)490 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
491                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
492 {
493   if (simd_support & JSIMD_AVX2)
494     jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
495                                    compptr->downsampled_width, input_data,
496                                    output_data_ptr);
497   else
498     jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
499                                    compptr->downsampled_width, input_data,
500                                    output_data_ptr);
501 }
502 
503 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)504 jsimd_can_h2v2_merged_upsample(void)
505 {
506   init_simd();
507 
508   /* The code is optimised for these values only */
509   if (BITS_IN_JSAMPLE != 8)
510     return 0;
511   if (sizeof(JDIMENSION) != 4)
512     return 0;
513 
514   if ((simd_support & JSIMD_AVX2) &&
515       IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
516     return 1;
517   if ((simd_support & JSIMD_SSE2) &&
518       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
519     return 1;
520 
521   return 0;
522 }
523 
524 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)525 jsimd_can_h2v1_merged_upsample(void)
526 {
527   init_simd();
528 
529   /* The code is optimised for these values only */
530   if (BITS_IN_JSAMPLE != 8)
531     return 0;
532   if (sizeof(JDIMENSION) != 4)
533     return 0;
534 
535   if ((simd_support & JSIMD_AVX2) &&
536       IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
537     return 1;
538   if ((simd_support & JSIMD_SSE2) &&
539       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
540     return 1;
541 
542   return 0;
543 }
544 
545 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)546 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
547                            JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
548 {
549   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
550   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
551 
552   switch (cinfo->out_color_space) {
553   case JCS_EXT_RGB:
554     avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
555     sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
556     break;
557   case JCS_EXT_RGBX:
558   case JCS_EXT_RGBA:
559     avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
560     sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
561     break;
562   case JCS_EXT_BGR:
563     avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
564     sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
565     break;
566   case JCS_EXT_BGRX:
567   case JCS_EXT_BGRA:
568     avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
569     sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
570     break;
571   case JCS_EXT_XBGR:
572   case JCS_EXT_ABGR:
573     avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
574     sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
575     break;
576   case JCS_EXT_XRGB:
577   case JCS_EXT_ARGB:
578     avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
579     sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
580     break;
581   default:
582     avx2fct = jsimd_h2v2_merged_upsample_avx2;
583     sse2fct = jsimd_h2v2_merged_upsample_sse2;
584     break;
585   }
586 
587   if (simd_support & JSIMD_AVX2)
588     avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
589   else
590     sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
591 }
592 
593 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)594 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
595                            JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
596 {
597   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
598   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
599 
600   switch (cinfo->out_color_space) {
601   case JCS_EXT_RGB:
602     avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
603     sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
604     break;
605   case JCS_EXT_RGBX:
606   case JCS_EXT_RGBA:
607     avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
608     sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
609     break;
610   case JCS_EXT_BGR:
611     avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
612     sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
613     break;
614   case JCS_EXT_BGRX:
615   case JCS_EXT_BGRA:
616     avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
617     sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
618     break;
619   case JCS_EXT_XBGR:
620   case JCS_EXT_ABGR:
621     avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
622     sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
623     break;
624   case JCS_EXT_XRGB:
625   case JCS_EXT_ARGB:
626     avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
627     sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
628     break;
629   default:
630     avx2fct = jsimd_h2v1_merged_upsample_avx2;
631     sse2fct = jsimd_h2v1_merged_upsample_sse2;
632     break;
633   }
634 
635   if (simd_support & JSIMD_AVX2)
636     avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
637   else
638     sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
639 }
640 
641 GLOBAL(int)
jsimd_can_convsamp(void)642 jsimd_can_convsamp(void)
643 {
644   init_simd();
645 
646   /* The code is optimised for these values only */
647   if (DCTSIZE != 8)
648     return 0;
649   if (BITS_IN_JSAMPLE != 8)
650     return 0;
651   if (sizeof(JDIMENSION) != 4)
652     return 0;
653   if (sizeof(DCTELEM) != 2)
654     return 0;
655 
656   if (simd_support & JSIMD_AVX2)
657     return 1;
658   if (simd_support & JSIMD_SSE2)
659     return 1;
660 
661   return 0;
662 }
663 
664 GLOBAL(int)
jsimd_can_convsamp_float(void)665 jsimd_can_convsamp_float(void)
666 {
667   init_simd();
668 
669   /* The code is optimised for these values only */
670   if (DCTSIZE != 8)
671     return 0;
672   if (BITS_IN_JSAMPLE != 8)
673     return 0;
674   if (sizeof(JDIMENSION) != 4)
675     return 0;
676   if (sizeof(FAST_FLOAT) != 4)
677     return 0;
678 
679   if (simd_support & JSIMD_SSE2)
680     return 1;
681 
682   return 0;
683 }
684 
685 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)686 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
687                DCTELEM *workspace)
688 {
689   if (simd_support & JSIMD_AVX2)
690     jsimd_convsamp_avx2(sample_data, start_col, workspace);
691   else
692     jsimd_convsamp_sse2(sample_data, start_col, workspace);
693 }
694 
695 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)696 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
697                      FAST_FLOAT *workspace)
698 {
699   jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
700 }
701 
702 GLOBAL(int)
jsimd_can_fdct_islow(void)703 jsimd_can_fdct_islow(void)
704 {
705   init_simd();
706 
707   /* The code is optimised for these values only */
708   if (DCTSIZE != 8)
709     return 0;
710   if (sizeof(DCTELEM) != 2)
711     return 0;
712 
713   if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
714     return 1;
715   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
716     return 1;
717 
718   return 0;
719 }
720 
721 GLOBAL(int)
jsimd_can_fdct_ifast(void)722 jsimd_can_fdct_ifast(void)
723 {
724   init_simd();
725 
726   /* The code is optimised for these values only */
727   if (DCTSIZE != 8)
728     return 0;
729   if (sizeof(DCTELEM) != 2)
730     return 0;
731 
732   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
733     return 1;
734 
735   return 0;
736 }
737 
738 GLOBAL(int)
jsimd_can_fdct_float(void)739 jsimd_can_fdct_float(void)
740 {
741   init_simd();
742 
743   /* The code is optimised for these values only */
744   if (DCTSIZE != 8)
745     return 0;
746   if (sizeof(FAST_FLOAT) != 4)
747     return 0;
748 
749   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
750     return 1;
751 
752   return 0;
753 }
754 
755 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)756 jsimd_fdct_islow(DCTELEM *data)
757 {
758   if (simd_support & JSIMD_AVX2)
759     jsimd_fdct_islow_avx2(data);
760   else
761     jsimd_fdct_islow_sse2(data);
762 }
763 
764 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)765 jsimd_fdct_ifast(DCTELEM *data)
766 {
767   jsimd_fdct_ifast_sse2(data);
768 }
769 
770 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)771 jsimd_fdct_float(FAST_FLOAT *data)
772 {
773   jsimd_fdct_float_sse(data);
774 }
775 
776 GLOBAL(int)
jsimd_can_quantize(void)777 jsimd_can_quantize(void)
778 {
779   init_simd();
780 
781   /* The code is optimised for these values only */
782   if (DCTSIZE != 8)
783     return 0;
784   if (sizeof(JCOEF) != 2)
785     return 0;
786   if (sizeof(DCTELEM) != 2)
787     return 0;
788 
789   if (simd_support & JSIMD_AVX2)
790     return 1;
791   if (simd_support & JSIMD_SSE2)
792     return 1;
793 
794   return 0;
795 }
796 
797 GLOBAL(int)
jsimd_can_quantize_float(void)798 jsimd_can_quantize_float(void)
799 {
800   init_simd();
801 
802   /* The code is optimised for these values only */
803   if (DCTSIZE != 8)
804     return 0;
805   if (sizeof(JCOEF) != 2)
806     return 0;
807   if (sizeof(FAST_FLOAT) != 4)
808     return 0;
809 
810   if (simd_support & JSIMD_SSE2)
811     return 1;
812 
813   return 0;
814 }
815 
816 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)817 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
818 {
819   if (simd_support & JSIMD_AVX2)
820     jsimd_quantize_avx2(coef_block, divisors, workspace);
821   else
822     jsimd_quantize_sse2(coef_block, divisors, workspace);
823 }
824 
825 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)826 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
827                      FAST_FLOAT *workspace)
828 {
829   jsimd_quantize_float_sse2(coef_block, divisors, workspace);
830 }
831 
832 GLOBAL(int)
jsimd_can_idct_2x2(void)833 jsimd_can_idct_2x2(void)
834 {
835   init_simd();
836 
837   /* The code is optimised for these values only */
838   if (DCTSIZE != 8)
839     return 0;
840   if (sizeof(JCOEF) != 2)
841     return 0;
842   if (BITS_IN_JSAMPLE != 8)
843     return 0;
844   if (sizeof(JDIMENSION) != 4)
845     return 0;
846   if (sizeof(ISLOW_MULT_TYPE) != 2)
847     return 0;
848 
849   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
850     return 1;
851 
852   return 0;
853 }
854 
855 GLOBAL(int)
jsimd_can_idct_4x4(void)856 jsimd_can_idct_4x4(void)
857 {
858   init_simd();
859 
860   /* The code is optimised for these values only */
861   if (DCTSIZE != 8)
862     return 0;
863   if (sizeof(JCOEF) != 2)
864     return 0;
865   if (BITS_IN_JSAMPLE != 8)
866     return 0;
867   if (sizeof(JDIMENSION) != 4)
868     return 0;
869   if (sizeof(ISLOW_MULT_TYPE) != 2)
870     return 0;
871 
872   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
873     return 1;
874 
875   return 0;
876 }
877 
878 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)879 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
880                JCOEFPTR coef_block, JSAMPARRAY output_buf,
881                JDIMENSION output_col)
882 {
883   jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
884 }
885 
886 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)887 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
888                JCOEFPTR coef_block, JSAMPARRAY output_buf,
889                JDIMENSION output_col)
890 {
891   jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
892 }
893 
894 GLOBAL(int)
jsimd_can_idct_islow(void)895 jsimd_can_idct_islow(void)
896 {
897   init_simd();
898 
899   /* The code is optimised for these values only */
900   if (DCTSIZE != 8)
901     return 0;
902   if (sizeof(JCOEF) != 2)
903     return 0;
904   if (BITS_IN_JSAMPLE != 8)
905     return 0;
906   if (sizeof(JDIMENSION) != 4)
907     return 0;
908   if (sizeof(ISLOW_MULT_TYPE) != 2)
909     return 0;
910 
911   if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
912     return 1;
913   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
914     return 1;
915 
916   return 0;
917 }
918 
919 GLOBAL(int)
jsimd_can_idct_ifast(void)920 jsimd_can_idct_ifast(void)
921 {
922   init_simd();
923 
924   /* The code is optimised for these values only */
925   if (DCTSIZE != 8)
926     return 0;
927   if (sizeof(JCOEF) != 2)
928     return 0;
929   if (BITS_IN_JSAMPLE != 8)
930     return 0;
931   if (sizeof(JDIMENSION) != 4)
932     return 0;
933   if (sizeof(IFAST_MULT_TYPE) != 2)
934     return 0;
935   if (IFAST_SCALE_BITS != 2)
936     return 0;
937 
938   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
939     return 1;
940 
941   return 0;
942 }
943 
944 GLOBAL(int)
jsimd_can_idct_float(void)945 jsimd_can_idct_float(void)
946 {
947   init_simd();
948 
949   if (DCTSIZE != 8)
950     return 0;
951   if (sizeof(JCOEF) != 2)
952     return 0;
953   if (BITS_IN_JSAMPLE != 8)
954     return 0;
955   if (sizeof(JDIMENSION) != 4)
956     return 0;
957   if (sizeof(FAST_FLOAT) != 4)
958     return 0;
959   if (sizeof(FLOAT_MULT_TYPE) != 4)
960     return 0;
961 
962   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
963     return 1;
964 
965   return 0;
966 }
967 
968 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)969 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
970                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
971                  JDIMENSION output_col)
972 {
973   if (simd_support & JSIMD_AVX2)
974     jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
975                           output_col);
976   else
977     jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
978                           output_col);
979 }
980 
981 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)982 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
983                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
984                  JDIMENSION output_col)
985 {
986   jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
987                         output_col);
988 }
989 
990 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)991 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
992                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
993                  JDIMENSION output_col)
994 {
995   jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
996                         output_col);
997 }
998 
999 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)1000 jsimd_can_huff_encode_one_block(void)
1001 {
1002   init_simd();
1003 
1004   if (DCTSIZE != 8)
1005     return 0;
1006   if (sizeof(JCOEF) != 2)
1007     return 0;
1008 
1009   if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1010       IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1011     return 1;
1012 
1013   return 0;
1014 }
1015 
1016 GLOBAL(JOCTET *)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)1017 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1018                             int last_dc_val, c_derived_tbl *dctbl,
1019                             c_derived_tbl *actbl)
1020 {
1021   return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1022                                           dctbl, actbl);
1023 }
1024 
1025 GLOBAL(int)
jsimd_can_encode_mcu_AC_first_prepare(void)1026 jsimd_can_encode_mcu_AC_first_prepare(void)
1027 {
1028   init_simd();
1029 
1030   if (DCTSIZE != 8)
1031     return 0;
1032   if (sizeof(JCOEF) != 2)
1033     return 0;
1034   if (simd_support & JSIMD_SSE2)
1035     return 1;
1036 
1037   return 0;
1038 }
1039 
1040 GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * values,size_t * zerobits)1041 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1042                                   const int *jpeg_natural_order_start, int Sl,
1043                                   int Al, JCOEF *values, size_t *zerobits)
1044 {
1045   jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1046                                          Sl, Al, values, zerobits);
1047 }
1048 
1049 GLOBAL(int)
jsimd_can_encode_mcu_AC_refine_prepare(void)1050 jsimd_can_encode_mcu_AC_refine_prepare(void)
1051 {
1052   init_simd();
1053 
1054   if (DCTSIZE != 8)
1055     return 0;
1056   if (sizeof(JCOEF) != 2)
1057     return 0;
1058   if (simd_support & JSIMD_SSE2)
1059     return 1;
1060 
1061   return 0;
1062 }
1063 
1064 GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * absvalues,size_t * bits)1065 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1066                                    const int *jpeg_natural_order_start, int Sl,
1067                                    int Al, JCOEF *absvalues, size_t *bits)
1068 {
1069   return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1070                                                  jpeg_natural_order_start,
1071                                                  Sl, Al, absvalues, bits);
1072 }
1073