1 /*
2 * jsimd_x86_64.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2009-2011, 2014, 2016, D. R. Commander.
6 * Copyright (C) 2015, Matthieu Darbois.
7 *
8 * Based on the x86 SIMD extension for IJG JPEG library,
9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11 *
12 * This file contains the interface between the "normal" portions
13 * of the library and the SIMD implementations when running on a
14 * 64-bit x86 architecture.
15 */
16
17 #define JPEG_INTERNALS
18 #include "../jinclude.h"
19 #include "../jpeglib.h"
20 #include "../jsimd.h"
21 #include "../jdct.h"
22 #include "../jsimddct.h"
23 #include "jsimd.h"
24
25 /*
26 * In the PIC cases, we have no guarantee that constants will keep
27 * their alignment. This macro allows us to verify it at runtime.
28 */
29 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
30
31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32
33 static unsigned int simd_support = ~0;
34 static unsigned int simd_huffman = 1;
35
36 /*
37 * Check what SIMD accelerations are supported.
38 *
39 * FIXME: This code is racy under a multi-threaded environment.
40 */
41 LOCAL(void)
init_simd(void)42 init_simd (void)
43 {
44 char *env = NULL;
45
46 if (simd_support != ~0U)
47 return;
48
49 simd_support = JSIMD_SSE2 | JSIMD_SSE;
50
51 /* Force different settings through environment variables */
52 env = getenv("JSIMD_FORCENONE");
53 if ((env != NULL) && (strcmp(env, "1") == 0))
54 simd_support = 0;
55 env = getenv("JSIMD_NOHUFFENC");
56 if ((env != NULL) && (strcmp(env, "1") == 0))
57 simd_huffman = 0;
58 }
59
60 GLOBAL(int)
jsimd_can_rgb_ycc(void)61 jsimd_can_rgb_ycc (void)
62 {
63 init_simd();
64
65 /* The code is optimised for these values only */
66 if (BITS_IN_JSAMPLE != 8)
67 return 0;
68 if (sizeof(JDIMENSION) != 4)
69 return 0;
70 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
71 return 0;
72
73 if ((simd_support & JSIMD_SSE2) &&
74 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
75 return 1;
76
77 return 0;
78 }
79
80 GLOBAL(int)
jsimd_can_rgb_gray(void)81 jsimd_can_rgb_gray (void)
82 {
83 init_simd();
84
85 /* The code is optimised for these values only */
86 if (BITS_IN_JSAMPLE != 8)
87 return 0;
88 if (sizeof(JDIMENSION) != 4)
89 return 0;
90 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
91 return 0;
92
93 if ((simd_support & JSIMD_SSE2) &&
94 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
95 return 1;
96
97 return 0;
98 }
99
100 GLOBAL(int)
jsimd_can_ycc_rgb(void)101 jsimd_can_ycc_rgb (void)
102 {
103 init_simd();
104
105 /* The code is optimised for these values only */
106 if (BITS_IN_JSAMPLE != 8)
107 return 0;
108 if (sizeof(JDIMENSION) != 4)
109 return 0;
110 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
111 return 0;
112
113 if ((simd_support & JSIMD_SSE2) &&
114 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
115 return 1;
116
117 return 0;
118 }
119
120 GLOBAL(int)
jsimd_can_ycc_rgb565(void)121 jsimd_can_ycc_rgb565 (void)
122 {
123 return 0;
124 }
125
126 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)127 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
128 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
129 JDIMENSION output_row, int num_rows)
130 {
131 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
132
133 switch(cinfo->in_color_space) {
134 case JCS_EXT_RGB:
135 sse2fct=jsimd_extrgb_ycc_convert_sse2;
136 break;
137 case JCS_EXT_RGBX:
138 case JCS_EXT_RGBA:
139 sse2fct=jsimd_extrgbx_ycc_convert_sse2;
140 break;
141 case JCS_EXT_BGR:
142 sse2fct=jsimd_extbgr_ycc_convert_sse2;
143 break;
144 case JCS_EXT_BGRX:
145 case JCS_EXT_BGRA:
146 sse2fct=jsimd_extbgrx_ycc_convert_sse2;
147 break;
148 case JCS_EXT_XBGR:
149 case JCS_EXT_ABGR:
150 sse2fct=jsimd_extxbgr_ycc_convert_sse2;
151 break;
152 case JCS_EXT_XRGB:
153 case JCS_EXT_ARGB:
154 sse2fct=jsimd_extxrgb_ycc_convert_sse2;
155 break;
156 default:
157 sse2fct=jsimd_rgb_ycc_convert_sse2;
158 break;
159 }
160
161 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
162 }
163
164 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)165 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
166 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
167 JDIMENSION output_row, int num_rows)
168 {
169 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
170
171 switch(cinfo->in_color_space) {
172 case JCS_EXT_RGB:
173 sse2fct=jsimd_extrgb_gray_convert_sse2;
174 break;
175 case JCS_EXT_RGBX:
176 case JCS_EXT_RGBA:
177 sse2fct=jsimd_extrgbx_gray_convert_sse2;
178 break;
179 case JCS_EXT_BGR:
180 sse2fct=jsimd_extbgr_gray_convert_sse2;
181 break;
182 case JCS_EXT_BGRX:
183 case JCS_EXT_BGRA:
184 sse2fct=jsimd_extbgrx_gray_convert_sse2;
185 break;
186 case JCS_EXT_XBGR:
187 case JCS_EXT_ABGR:
188 sse2fct=jsimd_extxbgr_gray_convert_sse2;
189 break;
190 case JCS_EXT_XRGB:
191 case JCS_EXT_ARGB:
192 sse2fct=jsimd_extxrgb_gray_convert_sse2;
193 break;
194 default:
195 sse2fct=jsimd_rgb_gray_convert_sse2;
196 break;
197 }
198
199 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
200 }
201
202 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)203 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
204 JSAMPIMAGE input_buf, JDIMENSION input_row,
205 JSAMPARRAY output_buf, int num_rows)
206 {
207 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
208
209 switch(cinfo->out_color_space) {
210 case JCS_EXT_RGB:
211 sse2fct=jsimd_ycc_extrgb_convert_sse2;
212 break;
213 case JCS_EXT_RGBX:
214 case JCS_EXT_RGBA:
215 sse2fct=jsimd_ycc_extrgbx_convert_sse2;
216 break;
217 case JCS_EXT_BGR:
218 sse2fct=jsimd_ycc_extbgr_convert_sse2;
219 break;
220 case JCS_EXT_BGRX:
221 case JCS_EXT_BGRA:
222 sse2fct=jsimd_ycc_extbgrx_convert_sse2;
223 break;
224 case JCS_EXT_XBGR:
225 case JCS_EXT_ABGR:
226 sse2fct=jsimd_ycc_extxbgr_convert_sse2;
227 break;
228 case JCS_EXT_XRGB:
229 case JCS_EXT_ARGB:
230 sse2fct=jsimd_ycc_extxrgb_convert_sse2;
231 break;
232 default:
233 sse2fct=jsimd_ycc_rgb_convert_sse2;
234 break;
235 }
236
237 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
238 }
239
240 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)241 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
242 JSAMPIMAGE input_buf, JDIMENSION input_row,
243 JSAMPARRAY output_buf, int num_rows)
244 {
245 }
246
247 GLOBAL(int)
jsimd_can_h2v2_downsample(void)248 jsimd_can_h2v2_downsample (void)
249 {
250 init_simd();
251
252 /* The code is optimised for these values only */
253 if (BITS_IN_JSAMPLE != 8)
254 return 0;
255 if (sizeof(JDIMENSION) != 4)
256 return 0;
257
258 if (simd_support & JSIMD_SSE2)
259 return 1;
260
261 return 0;
262 }
263
264 GLOBAL(int)
jsimd_can_h2v1_downsample(void)265 jsimd_can_h2v1_downsample (void)
266 {
267 init_simd();
268
269 /* The code is optimised for these values only */
270 if (BITS_IN_JSAMPLE != 8)
271 return 0;
272 if (sizeof(JDIMENSION) != 4)
273 return 0;
274
275 if (simd_support & JSIMD_SSE2)
276 return 1;
277
278 return 0;
279 }
280
281 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)282 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
283 JSAMPARRAY input_data, JSAMPARRAY output_data)
284 {
285 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
286 compptr->v_samp_factor, compptr->width_in_blocks,
287 input_data, output_data);
288 }
289
290 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)291 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
292 JSAMPARRAY input_data, JSAMPARRAY output_data)
293 {
294 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
295 compptr->v_samp_factor, compptr->width_in_blocks,
296 input_data, output_data);
297 }
298
299 GLOBAL(int)
jsimd_can_h2v2_upsample(void)300 jsimd_can_h2v2_upsample (void)
301 {
302 init_simd();
303
304 /* The code is optimised for these values only */
305 if (BITS_IN_JSAMPLE != 8)
306 return 0;
307 if (sizeof(JDIMENSION) != 4)
308 return 0;
309
310 if (simd_support & JSIMD_SSE2)
311 return 1;
312
313 return 0;
314 }
315
316 GLOBAL(int)
jsimd_can_h2v1_upsample(void)317 jsimd_can_h2v1_upsample (void)
318 {
319 init_simd();
320
321 /* The code is optimised for these values only */
322 if (BITS_IN_JSAMPLE != 8)
323 return 0;
324 if (sizeof(JDIMENSION) != 4)
325 return 0;
326
327 if (simd_support & JSIMD_SSE2)
328 return 1;
329
330 return 0;
331 }
332
333 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)334 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
335 jpeg_component_info *compptr,
336 JSAMPARRAY input_data,
337 JSAMPARRAY *output_data_ptr)
338 {
339 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
340 input_data, output_data_ptr);
341 }
342
343 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)344 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
345 jpeg_component_info *compptr,
346 JSAMPARRAY input_data,
347 JSAMPARRAY *output_data_ptr)
348 {
349 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
350 input_data, output_data_ptr);
351 }
352
353 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)354 jsimd_can_h2v2_fancy_upsample (void)
355 {
356 init_simd();
357
358 /* The code is optimised for these values only */
359 if (BITS_IN_JSAMPLE != 8)
360 return 0;
361 if (sizeof(JDIMENSION) != 4)
362 return 0;
363
364 if ((simd_support & JSIMD_SSE2) &&
365 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
366 return 1;
367
368 return 0;
369 }
370
371 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)372 jsimd_can_h2v1_fancy_upsample (void)
373 {
374 init_simd();
375
376 /* The code is optimised for these values only */
377 if (BITS_IN_JSAMPLE != 8)
378 return 0;
379 if (sizeof(JDIMENSION) != 4)
380 return 0;
381
382 if ((simd_support & JSIMD_SSE2) &&
383 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
384 return 1;
385
386 return 0;
387 }
388
389 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)390 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
391 jpeg_component_info *compptr,
392 JSAMPARRAY input_data,
393 JSAMPARRAY *output_data_ptr)
394 {
395 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
396 compptr->downsampled_width, input_data,
397 output_data_ptr);
398 }
399
400 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)401 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
402 jpeg_component_info *compptr,
403 JSAMPARRAY input_data,
404 JSAMPARRAY *output_data_ptr)
405 {
406 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
407 compptr->downsampled_width, input_data,
408 output_data_ptr);
409 }
410
411 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)412 jsimd_can_h2v2_merged_upsample (void)
413 {
414 init_simd();
415
416 /* The code is optimised for these values only */
417 if (BITS_IN_JSAMPLE != 8)
418 return 0;
419 if (sizeof(JDIMENSION) != 4)
420 return 0;
421
422 if ((simd_support & JSIMD_SSE2) &&
423 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
424 return 1;
425
426 return 0;
427 }
428
429 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)430 jsimd_can_h2v1_merged_upsample (void)
431 {
432 init_simd();
433
434 /* The code is optimised for these values only */
435 if (BITS_IN_JSAMPLE != 8)
436 return 0;
437 if (sizeof(JDIMENSION) != 4)
438 return 0;
439
440 if ((simd_support & JSIMD_SSE2) &&
441 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
442 return 1;
443
444 return 0;
445 }
446
447 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)448 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
449 JSAMPIMAGE input_buf,
450 JDIMENSION in_row_group_ctr,
451 JSAMPARRAY output_buf)
452 {
453 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
454
455 switch(cinfo->out_color_space) {
456 case JCS_EXT_RGB:
457 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
458 break;
459 case JCS_EXT_RGBX:
460 case JCS_EXT_RGBA:
461 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
462 break;
463 case JCS_EXT_BGR:
464 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
465 break;
466 case JCS_EXT_BGRX:
467 case JCS_EXT_BGRA:
468 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
469 break;
470 case JCS_EXT_XBGR:
471 case JCS_EXT_ABGR:
472 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
473 break;
474 case JCS_EXT_XRGB:
475 case JCS_EXT_ARGB:
476 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
477 break;
478 default:
479 sse2fct=jsimd_h2v2_merged_upsample_sse2;
480 break;
481 }
482
483 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
484 }
485
486 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)487 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
488 JSAMPIMAGE input_buf,
489 JDIMENSION in_row_group_ctr,
490 JSAMPARRAY output_buf)
491 {
492 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
493
494 switch(cinfo->out_color_space) {
495 case JCS_EXT_RGB:
496 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
497 break;
498 case JCS_EXT_RGBX:
499 case JCS_EXT_RGBA:
500 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
501 break;
502 case JCS_EXT_BGR:
503 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
504 break;
505 case JCS_EXT_BGRX:
506 case JCS_EXT_BGRA:
507 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
508 break;
509 case JCS_EXT_XBGR:
510 case JCS_EXT_ABGR:
511 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
512 break;
513 case JCS_EXT_XRGB:
514 case JCS_EXT_ARGB:
515 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
516 break;
517 default:
518 sse2fct=jsimd_h2v1_merged_upsample_sse2;
519 break;
520 }
521
522 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
523 }
524
525 GLOBAL(int)
jsimd_can_convsamp(void)526 jsimd_can_convsamp (void)
527 {
528 init_simd();
529
530 /* The code is optimised for these values only */
531 if (DCTSIZE != 8)
532 return 0;
533 if (BITS_IN_JSAMPLE != 8)
534 return 0;
535 if (sizeof(JDIMENSION) != 4)
536 return 0;
537 if (sizeof(DCTELEM) != 2)
538 return 0;
539
540 if (simd_support & JSIMD_SSE2)
541 return 1;
542
543 return 0;
544 }
545
546 GLOBAL(int)
jsimd_can_convsamp_float(void)547 jsimd_can_convsamp_float (void)
548 {
549 init_simd();
550
551 /* The code is optimised for these values only */
552 if (DCTSIZE != 8)
553 return 0;
554 if (BITS_IN_JSAMPLE != 8)
555 return 0;
556 if (sizeof(JDIMENSION) != 4)
557 return 0;
558 if (sizeof(FAST_FLOAT) != 4)
559 return 0;
560
561 if (simd_support & JSIMD_SSE2)
562 return 1;
563
564 return 0;
565 }
566
567 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)568 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
569 DCTELEM *workspace)
570 {
571 jsimd_convsamp_sse2(sample_data, start_col, workspace);
572 }
573
574 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)575 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
576 FAST_FLOAT *workspace)
577 {
578 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
579 }
580
581 GLOBAL(int)
jsimd_can_fdct_islow(void)582 jsimd_can_fdct_islow (void)
583 {
584 init_simd();
585
586 /* The code is optimised for these values only */
587 if (DCTSIZE != 8)
588 return 0;
589 if (sizeof(DCTELEM) != 2)
590 return 0;
591
592 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
593 return 1;
594
595 return 0;
596 }
597
598 GLOBAL(int)
jsimd_can_fdct_ifast(void)599 jsimd_can_fdct_ifast (void)
600 {
601 init_simd();
602
603 /* The code is optimised for these values only */
604 if (DCTSIZE != 8)
605 return 0;
606 if (sizeof(DCTELEM) != 2)
607 return 0;
608
609 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
610 return 1;
611
612 return 0;
613 }
614
615 GLOBAL(int)
jsimd_can_fdct_float(void)616 jsimd_can_fdct_float (void)
617 {
618 init_simd();
619
620 /* The code is optimised for these values only */
621 if (DCTSIZE != 8)
622 return 0;
623 if (sizeof(FAST_FLOAT) != 4)
624 return 0;
625
626 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
627 return 1;
628
629 return 0;
630 }
631
632 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)633 jsimd_fdct_islow (DCTELEM *data)
634 {
635 jsimd_fdct_islow_sse2(data);
636 }
637
638 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)639 jsimd_fdct_ifast (DCTELEM *data)
640 {
641 jsimd_fdct_ifast_sse2(data);
642 }
643
644 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)645 jsimd_fdct_float (FAST_FLOAT *data)
646 {
647 jsimd_fdct_float_sse(data);
648 }
649
650 GLOBAL(int)
jsimd_can_quantize(void)651 jsimd_can_quantize (void)
652 {
653 init_simd();
654
655 /* The code is optimised for these values only */
656 if (DCTSIZE != 8)
657 return 0;
658 if (sizeof(JCOEF) != 2)
659 return 0;
660 if (sizeof(DCTELEM) != 2)
661 return 0;
662
663 if (simd_support & JSIMD_SSE2)
664 return 1;
665
666 return 0;
667 }
668
669 GLOBAL(int)
jsimd_can_quantize_float(void)670 jsimd_can_quantize_float (void)
671 {
672 init_simd();
673
674 /* The code is optimised for these values only */
675 if (DCTSIZE != 8)
676 return 0;
677 if (sizeof(JCOEF) != 2)
678 return 0;
679 if (sizeof(FAST_FLOAT) != 4)
680 return 0;
681
682 if (simd_support & JSIMD_SSE2)
683 return 1;
684
685 return 0;
686 }
687
688 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)689 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors,
690 DCTELEM *workspace)
691 {
692 jsimd_quantize_sse2(coef_block, divisors, workspace);
693 }
694
695 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)696 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors,
697 FAST_FLOAT *workspace)
698 {
699 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
700 }
701
702 GLOBAL(int)
jsimd_can_idct_2x2(void)703 jsimd_can_idct_2x2 (void)
704 {
705 init_simd();
706
707 /* The code is optimised for these values only */
708 if (DCTSIZE != 8)
709 return 0;
710 if (sizeof(JCOEF) != 2)
711 return 0;
712 if (BITS_IN_JSAMPLE != 8)
713 return 0;
714 if (sizeof(JDIMENSION) != 4)
715 return 0;
716 if (sizeof(ISLOW_MULT_TYPE) != 2)
717 return 0;
718
719 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
720 return 1;
721
722 return 0;
723 }
724
725 GLOBAL(int)
jsimd_can_idct_4x4(void)726 jsimd_can_idct_4x4 (void)
727 {
728 init_simd();
729
730 /* The code is optimised for these values only */
731 if (DCTSIZE != 8)
732 return 0;
733 if (sizeof(JCOEF) != 2)
734 return 0;
735 if (BITS_IN_JSAMPLE != 8)
736 return 0;
737 if (sizeof(JDIMENSION) != 4)
738 return 0;
739 if (sizeof(ISLOW_MULT_TYPE) != 2)
740 return 0;
741
742 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
743 return 1;
744
745 return 0;
746 }
747
748 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)749 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
750 JCOEFPTR coef_block, JSAMPARRAY output_buf,
751 JDIMENSION output_col)
752 {
753 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
754 }
755
756 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)757 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
758 JCOEFPTR coef_block, JSAMPARRAY output_buf,
759 JDIMENSION output_col)
760 {
761 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
762 }
763
764 GLOBAL(int)
jsimd_can_idct_islow(void)765 jsimd_can_idct_islow (void)
766 {
767 init_simd();
768
769 /* The code is optimised for these values only */
770 if (DCTSIZE != 8)
771 return 0;
772 if (sizeof(JCOEF) != 2)
773 return 0;
774 if (BITS_IN_JSAMPLE != 8)
775 return 0;
776 if (sizeof(JDIMENSION) != 4)
777 return 0;
778 if (sizeof(ISLOW_MULT_TYPE) != 2)
779 return 0;
780
781 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
782 return 1;
783
784 return 0;
785 }
786
787 GLOBAL(int)
jsimd_can_idct_ifast(void)788 jsimd_can_idct_ifast (void)
789 {
790 init_simd();
791
792 /* The code is optimised for these values only */
793 if (DCTSIZE != 8)
794 return 0;
795 if (sizeof(JCOEF) != 2)
796 return 0;
797 if (BITS_IN_JSAMPLE != 8)
798 return 0;
799 if (sizeof(JDIMENSION) != 4)
800 return 0;
801 if (sizeof(IFAST_MULT_TYPE) != 2)
802 return 0;
803 if (IFAST_SCALE_BITS != 2)
804 return 0;
805
806 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
807 return 1;
808
809 return 0;
810 }
811
812 GLOBAL(int)
jsimd_can_idct_float(void)813 jsimd_can_idct_float (void)
814 {
815 init_simd();
816
817 if (DCTSIZE != 8)
818 return 0;
819 if (sizeof(JCOEF) != 2)
820 return 0;
821 if (BITS_IN_JSAMPLE != 8)
822 return 0;
823 if (sizeof(JDIMENSION) != 4)
824 return 0;
825 if (sizeof(FAST_FLOAT) != 4)
826 return 0;
827 if (sizeof(FLOAT_MULT_TYPE) != 4)
828 return 0;
829
830 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
831 return 1;
832
833 return 0;
834 }
835
836 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)837 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr,
838 JCOEFPTR coef_block, JSAMPARRAY output_buf,
839 JDIMENSION output_col)
840 {
841 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
842 output_col);
843 }
844
845 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)846 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr,
847 JCOEFPTR coef_block, JSAMPARRAY output_buf,
848 JDIMENSION output_col)
849 {
850 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
851 output_col);
852 }
853
854 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)855 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr,
856 JCOEFPTR coef_block, JSAMPARRAY output_buf,
857 JDIMENSION output_col)
858 {
859 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
860 output_col);
861 }
862
863 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)864 jsimd_can_huff_encode_one_block (void)
865 {
866 init_simd();
867
868 if (DCTSIZE != 8)
869 return 0;
870 if (sizeof(JCOEF) != 2)
871 return 0;
872
873 if ((simd_support & JSIMD_SSE2) && simd_huffman &&
874 IS_ALIGNED_SSE(jconst_huff_encode_one_block))
875 return 1;
876
877 return 0;
878 }
879
880 GLOBAL(JOCTET*)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)881 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block,
882 int last_dc_val, c_derived_tbl *dctbl,
883 c_derived_tbl *actbl)
884 {
885 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
886 dctbl, actbl);
887 }
888