1 /*
2 ** SPDX-License-Identifier: BSD-3-Clause
3 ** Copyright Contributors to the OpenEXR Project.
4 */
5
6 #include "internal_coding.h"
7 #include "internal_xdr.h"
8
9 #include "openexr_attr.h"
10
11 #include <string.h>
12 #include <stdbool.h>
13
14 #if defined(__x86_64__) || defined(_M_X64)
15 # ifndef _WIN32
16 # include <cpuid.h>
17 # endif
18 #endif
19
20 /**************************************/
21
22 #ifndef __F16C__
23 static inline void
half_to_float4(float * out,const uint16_t * src)24 half_to_float4 (float* out, const uint16_t* src)
25 {
26 out[0] = half_to_float (src[0]);
27 out[1] = half_to_float (src[1]);
28 out[2] = half_to_float (src[2]);
29 out[3] = half_to_float (src[3]);
30 }
31
32 static inline void
half_to_float8(float * out,const uint16_t * src)33 half_to_float8 (float* out, const uint16_t* src)
34 {
35 half_to_float4 (out, src);
36 half_to_float4 (out + 4, src + 4);
37 }
38 #endif
39
40 #if (defined(__x86_64__) || defined(_M_X64)) && \
41 (defined(__F16C__) || defined(__GNUC__) || defined(__clang__))
42
43 # if defined(__F16C__)
44 static inline void
half_to_float_buffer(float * out,const uint16_t * in,int w)45 half_to_float_buffer (float* out, const uint16_t* in, int w)
46 # elif defined(__GNUC__) || defined(__clang__)
47 __attribute__ ((target ("f16c"))) static void
48 half_to_float_buffer_f16c (float* out, const uint16_t* in, int w)
49 # endif
50 {
51 while (w >= 8)
52 {
53 _mm256_storeu_ps (
54 out, _mm256_cvtph_ps (_mm_loadu_si128 ((const __m128i*) in)));
55 out += 8;
56 in += 8;
57 w -= 8;
58 }
59 // gcc < 9 does not have loadu_si64
60 # if defined(__clang__) || (__GNUC__ >= 9)
61 switch (w)
62 {
63 case 7:
64 _mm_storeu_ps (out, _mm_cvtph_ps (_mm_loadu_si64 (in)));
65 out[4] = half_to_float (in[4]);
66 out[5] = half_to_float (in[5]);
67 out[6] = half_to_float (in[6]);
68 break;
69 case 6:
70 _mm_storeu_ps (out, _mm_cvtph_ps (_mm_loadu_si64 (in)));
71 out[4] = half_to_float (in[4]);
72 out[5] = half_to_float (in[5]);
73 break;
74 case 5:
75 _mm_storeu_ps (out, _mm_cvtph_ps (_mm_loadu_si64 (in)));
76 out[4] = half_to_float (in[4]);
77 break;
78 case 4: _mm_storeu_ps (out, _mm_cvtph_ps (_mm_loadu_si64 (in))); break;
79 case 3:
80 out[0] = half_to_float (in[0]);
81 out[1] = half_to_float (in[1]);
82 out[2] = half_to_float (in[2]);
83 break;
84 case 2:
85 out[0] = half_to_float (in[0]);
86 out[1] = half_to_float (in[1]);
87 break;
88 case 1: out[0] = half_to_float (in[0]); break;
89 }
90 # else
91 while (w > 0)
92 {
93 *out++ = half_to_float (*in++);
94 --w;
95 }
96 # endif
97 }
98
99 # ifndef __F16C__
100 static void
half_to_float_buffer_impl(float * out,const uint16_t * in,int w)101 half_to_float_buffer_impl (float* out, const uint16_t* in, int w)
102 {
103 while (w >= 8)
104 {
105 half_to_float8 (out, in);
106 out += 8;
107 in += 8;
108 w -= 8;
109 }
110 switch (w)
111 {
112 case 7:
113 half_to_float4 (out, in);
114 out[4] = half_to_float (in[4]);
115 out[5] = half_to_float (in[5]);
116 out[6] = half_to_float (in[6]);
117 break;
118 case 6:
119 half_to_float4 (out, in);
120 out[4] = half_to_float (in[4]);
121 out[5] = half_to_float (in[5]);
122 break;
123 case 5:
124 half_to_float4 (out, in);
125 out[4] = half_to_float (in[4]);
126 break;
127 case 4: half_to_float4 (out, in); break;
128 case 3:
129 out[0] = half_to_float (in[0]);
130 out[1] = half_to_float (in[1]);
131 out[2] = half_to_float (in[2]);
132 break;
133 case 2:
134 out[0] = half_to_float (in[0]);
135 out[1] = half_to_float (in[1]);
136 break;
137 case 1: out[0] = half_to_float (in[0]); break;
138 }
139 }
140
141 static void (*half_to_float_buffer) (float*, const uint16_t*, int) =
142 &half_to_float_buffer_impl;
143
144 static void
choose_half_to_float_impl()145 choose_half_to_float_impl ()
146 {
147 // regs[2] in the extended block is ECX, where f16c indicator lives
148 # ifdef _WIN32
149 int regs[4];
150
151 __cpuid (regs, 0);
152 if (regs[0] >= 1) { __cpuidex (regs, 1, 0); }
153 else regs[2] = 0;
154 # else
155 unsigned int regs[4];
156 __get_cpuid (0, ®s[0], ®s[1], ®s[2], ®s[3]);
157 if (regs[0] >= 1)
158 {
159 __get_cpuid (1, ®s[0], ®s[1], ®s[2], ®s[3]);
160 }
161 else
162 regs[2] = 0;
163 # endif
164 /* F16C is indicated by bit 29 */
165 if (regs[2] & (1 << 29)) half_to_float_buffer = &half_to_float_buffer_f16c;
166 }
167 # else
168 /* when we explicitly compile against f16, force it in */
169 static void
choose_half_to_float_impl()170 choose_half_to_float_impl ()
171 {}
172
173 # endif /* F16C */
174
175 #else
176
177 static inline void
half_to_float_buffer(float * out,const uint16_t * in,int w)178 half_to_float_buffer (float* out, const uint16_t* in, int w)
179 {
180 # if EXR_HOST_IS_NOT_LITTLE_ENDIAN
181 for (int x = 0; x < w; ++x)
182 out[x] = half_to_float (one_to_native16 (in[x]));
183 # else
184 while (w >= 8)
185 {
186 half_to_float8 (out, in);
187 out += 8;
188 in += 8;
189 w -= 8;
190 }
191 switch (w)
192 {
193 case 7:
194 half_to_float4 (out, in);
195 out[4] = half_to_float (in[4]);
196 out[5] = half_to_float (in[5]);
197 out[6] = half_to_float (in[6]);
198 break;
199 case 6:
200 half_to_float4 (out, in);
201 out[4] = half_to_float (in[4]);
202 out[5] = half_to_float (in[5]);
203 break;
204 case 5:
205 half_to_float4 (out, in);
206 out[4] = half_to_float (in[4]);
207 break;
208 case 4: half_to_float4 (out, in); break;
209 case 3:
210 out[0] = half_to_float (in[0]);
211 out[1] = half_to_float (in[1]);
212 out[2] = half_to_float (in[2]);
213 break;
214 case 2:
215 out[0] = half_to_float (in[0]);
216 out[1] = half_to_float (in[1]);
217 break;
218 case 1: out[0] = half_to_float (in[0]); break;
219 }
220 # endif
221 }
222
223 static void
choose_half_to_float_impl()224 choose_half_to_float_impl ()
225 {}
226
227 #endif
228
229 /**************************************/
230
231 static exr_result_t
unpack_16bit_3chan_interleave(exr_decode_pipeline_t * decode)232 unpack_16bit_3chan_interleave (exr_decode_pipeline_t* decode)
233 {
234 /* we know we're unpacking all the channels and there is no subsampling */
235 const uint8_t* srcbuffer = decode->unpacked_buffer;
236 const uint16_t *in0, *in1, *in2;
237 uint8_t* out0;
238 int w, h;
239 int linc0;
240
241 w = decode->channels[0].width;
242 h = decode->chunk.height;
243 linc0 = decode->channels[0].user_line_stride;
244
245 out0 = decode->channels[0].decode_to_ptr;
246
247 /* interleaving case, we can do this! */
248 for (int y = 0; y < h; ++y)
249 {
250 uint16_t* out = (uint16_t*) out0;
251
252 in0 = (const uint16_t*) srcbuffer;
253 in1 = in0 + w;
254 in2 = in1 + w;
255
256 srcbuffer += w * 6; // 3 * sizeof(uint16_t), avoid type conversion
257 for (int x = 0; x < w; ++x)
258 {
259 out[0] = one_to_native16 (in0[x]);
260 out[1] = one_to_native16 (in1[x]);
261 out[2] = one_to_native16 (in2[x]);
262 out += 3;
263 }
264 out0 += linc0;
265 }
266 return EXR_ERR_SUCCESS;
267 }
268
269 /**************************************/
270
271 static exr_result_t
unpack_16bit_3chan_interleave_rev(exr_decode_pipeline_t * decode)272 unpack_16bit_3chan_interleave_rev (exr_decode_pipeline_t* decode)
273 {
274 /* we know we're unpacking all the channels and there is no subsampling */
275 const uint8_t* srcbuffer = decode->unpacked_buffer;
276 const uint16_t *in0, *in1, *in2;
277 uint8_t* out0;
278 int w, h;
279 int linc0;
280
281 w = decode->channels[0].width;
282 h = decode->chunk.height;
283 linc0 = decode->channels[0].user_line_stride;
284
285 out0 = decode->channels[2].decode_to_ptr;
286
287 /* interleaving case, we can do this! */
288 for (int y = 0; y < h; ++y)
289 {
290 uint16_t* out = (uint16_t*) out0;
291
292 in0 = (const uint16_t*) srcbuffer; // B
293 in1 = in0 + w; // G
294 in2 = in1 + w; // R
295
296 srcbuffer += w * 6; // 3 * sizeof(uint16_t), avoid type conversion
297 for (int x = 0; x < w; ++x)
298 {
299 out[0] = one_to_native16 (in2[x]);
300 out[1] = one_to_native16 (in1[x]);
301 out[2] = one_to_native16 (in0[x]);
302 out += 3;
303 }
304 out0 += linc0;
305 }
306 return EXR_ERR_SUCCESS;
307 }
308
309 /**************************************/
310
311 static exr_result_t
unpack_half_to_float_3chan_interleave(exr_decode_pipeline_t * decode)312 unpack_half_to_float_3chan_interleave (exr_decode_pipeline_t* decode)
313 {
314 /* we know we're unpacking all the channels and there is no subsampling */
315 const uint8_t* srcbuffer = decode->unpacked_buffer;
316 const uint16_t *in0, *in1, *in2;
317 uint8_t* out0;
318 int w, h;
319 int linc0;
320
321 w = decode->channels[0].width;
322 h = decode->chunk.height;
323 linc0 = decode->channels[0].user_line_stride;
324
325 out0 = decode->channels[0].decode_to_ptr;
326
327 /* interleaving case, we can do this! */
328 for (int y = 0; y < h; ++y)
329 {
330 float* out = (float*) out0;
331
332 in0 = (const uint16_t*) srcbuffer;
333 in1 = in0 + w;
334 in2 = in1 + w;
335
336 srcbuffer += w * 6; // 3 * sizeof(uint16_t), avoid type conversion
337 for (int x = 0; x < w; ++x)
338 {
339 out[0] = half_to_float (one_to_native16 (in0[x]));
340 out[1] = half_to_float (one_to_native16 (in1[x]));
341 out[2] = half_to_float (one_to_native16 (in2[x]));
342 out += 3;
343 }
344 out0 += linc0;
345 }
346 return EXR_ERR_SUCCESS;
347 }
348
349 /**************************************/
350
351 static exr_result_t
unpack_half_to_float_3chan_interleave_rev(exr_decode_pipeline_t * decode)352 unpack_half_to_float_3chan_interleave_rev (exr_decode_pipeline_t* decode)
353 {
354 /* we know we're unpacking all the channels and there is no subsampling */
355 const uint8_t* srcbuffer = decode->unpacked_buffer;
356 const uint16_t *in0, *in1, *in2;
357 uint8_t* out0;
358 int w, h;
359 int linc0;
360
361 w = decode->channels[0].width;
362 h = decode->chunk.height;
363 linc0 = decode->channels[0].user_line_stride;
364
365 out0 = decode->channels[2].decode_to_ptr;
366
367 /* interleaving case, we can do this! */
368 for (int y = 0; y < h; ++y)
369 {
370 float* out = (float*) out0;
371
372 in0 = (const uint16_t*) srcbuffer;
373 in1 = in0 + w;
374 in2 = in1 + w;
375
376 srcbuffer += w * 6; // 3 * sizeof(uint16_t), avoid type conversion
377 for (int x = 0; x < w; ++x)
378 {
379 out[0] = half_to_float (one_to_native16 (in2[x]));
380 out[1] = half_to_float (one_to_native16 (in1[x]));
381 out[2] = half_to_float (one_to_native16 (in0[x]));
382 out += 3;
383 }
384 out0 += linc0;
385 }
386 return EXR_ERR_SUCCESS;
387 }
388
389 /**************************************/
390
391 static exr_result_t
unpack_16bit_3chan_planar(exr_decode_pipeline_t * decode)392 unpack_16bit_3chan_planar (exr_decode_pipeline_t* decode)
393 {
394 /* we know we're unpacking all the channels and there is no subsampling */
395 const uint8_t* srcbuffer = decode->unpacked_buffer;
396 const uint16_t *in0, *in1, *in2;
397 uint8_t * out0, *out1, *out2;
398 int w, h;
399 int inc0, inc1, inc2;
400 int linc0, linc1, linc2;
401
402 w = decode->channels[0].width;
403 h = decode->chunk.height;
404 inc0 = decode->channels[0].user_pixel_stride;
405 inc1 = decode->channels[1].user_pixel_stride;
406 inc2 = decode->channels[2].user_pixel_stride;
407 linc0 = decode->channels[0].user_line_stride;
408 linc1 = decode->channels[1].user_line_stride;
409 linc2 = decode->channels[2].user_line_stride;
410
411 out0 = decode->channels[0].decode_to_ptr;
412 out1 = decode->channels[1].decode_to_ptr;
413 out2 = decode->channels[2].decode_to_ptr;
414
415 // planar output
416 for (int y = 0; y < h; ++y)
417 {
418 in0 = (const uint16_t*) srcbuffer;
419 in1 = in0 + w;
420 in2 = in1 + w;
421 srcbuffer += w * 6; // 3 * sizeof(uint16_t), avoid type conversion
422 /* specialise to memcpy if we can */
423 #if EXR_HOST_IS_NOT_LITTLE_ENDIAN
424 for (int x = 0; x < w; ++x)
425 *(((uint16_t*) out0) + x) = one_to_native16 (in0[x]);
426 for (int x = 0; x < w; ++x)
427 *(((uint16_t*) out1) + x) = one_to_native16 (in1[x]);
428 for (int x = 0; x < w; ++x)
429 *(((uint16_t*) out2) + x) = one_to_native16 (in2[x]);
430 #else
431 memcpy (out0, in0, (size_t) (w) * sizeof (uint16_t));
432 memcpy (out1, in1, (size_t) (w) * sizeof (uint16_t));
433 memcpy (out2, in2, (size_t) (w) * sizeof (uint16_t));
434 #endif
435 out0 += linc0;
436 out1 += linc1;
437 out2 += linc2;
438 }
439
440 return EXR_ERR_SUCCESS;
441 }
442
443 /**************************************/
444
445 static exr_result_t
unpack_half_to_float_3chan_planar(exr_decode_pipeline_t * decode)446 unpack_half_to_float_3chan_planar (exr_decode_pipeline_t* decode)
447 {
448 /* we know we're unpacking all the channels and there is no subsampling */
449 const uint8_t* srcbuffer = decode->unpacked_buffer;
450 const uint16_t *in0, *in1, *in2;
451 uint8_t * out0, *out1, *out2;
452 int w, h;
453 int inc0, inc1, inc2;
454 int linc0, linc1, linc2;
455
456 w = decode->channels[0].width;
457 h = decode->chunk.height;
458 inc0 = decode->channels[0].user_pixel_stride;
459 inc1 = decode->channels[1].user_pixel_stride;
460 inc2 = decode->channels[2].user_pixel_stride;
461 linc0 = decode->channels[0].user_line_stride;
462 linc1 = decode->channels[1].user_line_stride;
463 linc2 = decode->channels[2].user_line_stride;
464
465 out0 = decode->channels[0].decode_to_ptr;
466 out1 = decode->channels[1].decode_to_ptr;
467 out2 = decode->channels[2].decode_to_ptr;
468
469 // planar output
470 for (int y = 0; y < h; ++y)
471 {
472 in0 = (const uint16_t*) srcbuffer;
473 in1 = in0 + w;
474 in2 = in1 + w;
475 srcbuffer += w * 6; // 3 * sizeof(uint16_t), avoid type conversion
476 /* specialise to memcpy if we can */
477 half_to_float_buffer ((float*) out0, in0, w);
478 half_to_float_buffer ((float*) out1, in1, w);
479 half_to_float_buffer ((float*) out2, in2, w);
480
481 out0 += linc0;
482 out1 += linc1;
483 out2 += linc2;
484 }
485
486 return EXR_ERR_SUCCESS;
487 }
488
489 /**************************************/
490
491 static exr_result_t
unpack_16bit_3chan(exr_decode_pipeline_t * decode)492 unpack_16bit_3chan (exr_decode_pipeline_t* decode)
493 {
494 /* we know we're unpacking all the channels and there is no subsampling */
495 const uint8_t* srcbuffer = decode->unpacked_buffer;
496 const uint16_t *in0, *in1, *in2;
497 uint8_t * out0, *out1, *out2;
498 int w, h;
499 int inc0, inc1, inc2;
500 int linc0, linc1, linc2;
501
502 w = decode->channels[0].width;
503 h = decode->chunk.height;
504 inc0 = decode->channels[0].user_pixel_stride;
505 inc1 = decode->channels[1].user_pixel_stride;
506 inc2 = decode->channels[2].user_pixel_stride;
507 linc0 = decode->channels[0].user_line_stride;
508 linc1 = decode->channels[1].user_line_stride;
509 linc2 = decode->channels[2].user_line_stride;
510
511 out0 = decode->channels[0].decode_to_ptr;
512 out1 = decode->channels[1].decode_to_ptr;
513 out2 = decode->channels[2].decode_to_ptr;
514
515 for (int y = 0; y < h; ++y)
516 {
517 in0 = (const uint16_t*) srcbuffer;
518 in1 = in0 + w;
519 in2 = in1 + w;
520 srcbuffer += w * 6; // 3 * sizeof(uint16_t), avoid type conversion
521 for (int x = 0; x < w; ++x)
522 *((uint16_t*) (out0 + x * inc0)) = one_to_native16 (in0[x]);
523 for (int x = 0; x < w; ++x)
524 *((uint16_t*) (out1 + x * inc1)) = one_to_native16 (in1[x]);
525 for (int x = 0; x < w; ++x)
526 *((uint16_t*) (out2 + x * inc2)) = one_to_native16 (in2[x]);
527 out0 += linc0;
528 out1 += linc1;
529 out2 += linc2;
530 }
531
532 return EXR_ERR_SUCCESS;
533 }
534
535 /**************************************/
536
537 static exr_result_t
unpack_16bit_4chan_interleave(exr_decode_pipeline_t * decode)538 unpack_16bit_4chan_interleave (exr_decode_pipeline_t* decode)
539 {
540 /* we know we're unpacking all the channels and there is no subsampling */
541 const uint8_t* srcbuffer = decode->unpacked_buffer;
542 const uint16_t *in0, *in1, *in2, *in3;
543 uint8_t* out0;
544 int w, h;
545 int linc0;
546 /* TODO: can do this with sse and do 2 outpixels at once */
547 union
548 {
549 struct
550 {
551 uint16_t a;
552 uint16_t b;
553 uint16_t g;
554 uint16_t r;
555 };
556 uint64_t allc;
557 } combined;
558
559 w = decode->channels[0].width;
560 h = decode->chunk.height;
561 linc0 = decode->channels[0].user_line_stride;
562
563 out0 = decode->channels[0].decode_to_ptr;
564
565 /* interleaving case, we can do this! */
566 for (int y = 0; y < h; ++y)
567 {
568 uint64_t* outall = (uint64_t*) out0;
569 in0 = (const uint16_t*) srcbuffer;
570 in1 = in0 + w;
571 in2 = in1 + w;
572 in3 = in2 + w;
573
574 srcbuffer += w * 8; // 4 * sizeof(uint16_t), avoid type conversion
575 for (int x = 0; x < w; ++x)
576 {
577 combined.a = one_to_native16 (in0[x]);
578 combined.b = one_to_native16 (in1[x]);
579 combined.g = one_to_native16 (in2[x]);
580 combined.r = one_to_native16 (in3[x]);
581 outall[x] = combined.allc;
582 }
583 out0 += linc0;
584 }
585 return EXR_ERR_SUCCESS;
586 }
587
588 /**************************************/
589
590 static exr_result_t
unpack_16bit_4chan_interleave_rev(exr_decode_pipeline_t * decode)591 unpack_16bit_4chan_interleave_rev (exr_decode_pipeline_t* decode)
592 {
593 /* we know we're unpacking all the channels and there is no subsampling */
594 const uint8_t* srcbuffer = decode->unpacked_buffer;
595 const uint16_t *in0, *in1, *in2, *in3;
596 uint8_t* out0;
597 int w, h;
598 int linc0;
599 /* TODO: can do this with sse and do 2 outpixels at once */
600 union
601 {
602 struct
603 {
604 uint16_t r;
605 uint16_t g;
606 uint16_t b;
607 uint16_t a;
608 };
609 uint64_t allc;
610 } combined;
611
612 w = decode->channels[0].width;
613 h = decode->chunk.height;
614 linc0 = decode->channels[0].user_line_stride;
615
616 out0 = decode->channels[3].decode_to_ptr;
617
618 /* interleaving case, we can do this! */
619 for (int y = 0; y < h; ++y)
620 {
621 uint64_t* outall = (uint64_t*) out0;
622 in0 = (const uint16_t*) srcbuffer;
623 in1 = in0 + w;
624 in2 = in1 + w;
625 in3 = in2 + w;
626
627 srcbuffer += w * 8; // 4 * sizeof(uint16_t), avoid type conversion
628 for (int x = 0; x < w; ++x)
629 {
630 combined.a = one_to_native16 (in0[x]);
631 combined.b = one_to_native16 (in1[x]);
632 combined.g = one_to_native16 (in2[x]);
633 combined.r = one_to_native16 (in3[x]);
634 outall[x] = combined.allc;
635 }
636 out0 += linc0;
637 }
638 return EXR_ERR_SUCCESS;
639 }
640
641 /**************************************/
642
643 static exr_result_t
unpack_half_to_float_4chan_interleave(exr_decode_pipeline_t * decode)644 unpack_half_to_float_4chan_interleave (exr_decode_pipeline_t* decode)
645 {
646 /* we know we're unpacking all the channels and there is no subsampling */
647 const uint8_t* srcbuffer = decode->unpacked_buffer;
648 const uint16_t *in0, *in1, *in2, *in3;
649 uint8_t* out0;
650 int w, h;
651 int linc0;
652
653 w = decode->channels[0].width;
654 h = decode->chunk.height;
655 linc0 = decode->channels[0].user_line_stride;
656
657 out0 = decode->channels[0].decode_to_ptr;
658
659 /* interleaving case, we can do this! */
660 for (int y = 0; y < h; ++y)
661 {
662 float* out = (float*) out0;
663 in0 = (const uint16_t*) srcbuffer;
664 in1 = in0 + w;
665 in2 = in1 + w;
666 in3 = in2 + w;
667
668 srcbuffer += w * 8; // 4 * sizeof(uint16_t), avoid type conversion
669 for (int x = 0; x < w; ++x)
670 {
671 out[0] = half_to_float (one_to_native16 (in3[x]));
672 out[1] = half_to_float (one_to_native16 (in2[x]));
673 out[2] = half_to_float (one_to_native16 (in1[x]));
674 out[3] = half_to_float (one_to_native16 (in0[x]));
675 out += 4;
676 }
677 out0 += linc0;
678 }
679 return EXR_ERR_SUCCESS;
680 }
681
682 /**************************************/
683
684 static exr_result_t
unpack_half_to_float_4chan_interleave_rev(exr_decode_pipeline_t * decode)685 unpack_half_to_float_4chan_interleave_rev (exr_decode_pipeline_t* decode)
686 {
687 /* we know we're unpacking all the channels and there is no subsampling */
688 const uint8_t* srcbuffer = decode->unpacked_buffer;
689 const uint16_t *in0, *in1, *in2, *in3;
690 uint8_t* out0;
691 int w, h;
692 int linc0;
693
694 w = decode->channels[0].width;
695 h = decode->chunk.height;
696 linc0 = decode->channels[0].user_line_stride;
697
698 out0 = decode->channels[3].decode_to_ptr;
699
700 /* interleaving case, we can do this! */
701 for (int y = 0; y < h; ++y)
702 {
703 float* out = (float*) out0;
704 in0 = (const uint16_t*) srcbuffer;
705 in1 = in0 + w;
706 in2 = in1 + w;
707 in3 = in2 + w;
708
709 srcbuffer += w * 8; // 4 * sizeof(uint16_t), avoid type conversion
710 for (int x = 0; x < w; ++x)
711 {
712 out[0] = half_to_float (one_to_native16 (in0[x]));
713 out[1] = half_to_float (one_to_native16 (in1[x]));
714 out[2] = half_to_float (one_to_native16 (in2[x]));
715 out[3] = half_to_float (one_to_native16 (in3[x]));
716 out += 4;
717 }
718 out0 += linc0;
719 }
720 return EXR_ERR_SUCCESS;
721 }
722
723 /**************************************/
724
725 static exr_result_t
unpack_16bit_4chan_planar(exr_decode_pipeline_t * decode)726 unpack_16bit_4chan_planar (exr_decode_pipeline_t* decode)
727 {
728 /* we know we're unpacking all the channels and there is no subsampling */
729 const uint8_t* srcbuffer = decode->unpacked_buffer;
730 const uint16_t *in0, *in1, *in2, *in3;
731 uint8_t * out0, *out1, *out2, *out3;
732 int w, h;
733 int linc0, linc1, linc2, linc3;
734
735 w = decode->channels[0].width;
736 h = decode->chunk.height;
737 linc0 = decode->channels[0].user_line_stride;
738 linc1 = decode->channels[1].user_line_stride;
739 linc2 = decode->channels[2].user_line_stride;
740 linc3 = decode->channels[3].user_line_stride;
741
742 out0 = decode->channels[0].decode_to_ptr;
743 out1 = decode->channels[1].decode_to_ptr;
744 out2 = decode->channels[2].decode_to_ptr;
745 out3 = decode->channels[3].decode_to_ptr;
746
747 // planar output
748 for (int y = 0; y < h; ++y)
749 {
750 in0 = (const uint16_t*) srcbuffer;
751 in1 = in0 + w;
752 in2 = in1 + w;
753 in3 = in2 + w;
754 srcbuffer += w * 8; // 4 * sizeof(uint16_t), avoid type conversion
755 /* specialize to memcpy if we can */
756 #if EXR_HOST_IS_NOT_LITTLE_ENDIAN
757 for (int x = 0; x < w; ++x)
758 *(((uint16_t*) out0) + x) = one_to_native16 (in0[x]);
759 for (int x = 0; x < w; ++x)
760 *(((uint16_t*) out1) + x) = one_to_native16 (in1[x]);
761 for (int x = 0; x < w; ++x)
762 *(((uint16_t*) out2) + x) = one_to_native16 (in2[x]);
763 for (int x = 0; x < w; ++x)
764 *(((uint16_t*) out3) + x) = one_to_native16 (in3[x]);
765 #else
766 memcpy (out0, in0, (size_t) (w) * sizeof (uint16_t));
767 memcpy (out1, in1, (size_t) (w) * sizeof (uint16_t));
768 memcpy (out2, in2, (size_t) (w) * sizeof (uint16_t));
769 memcpy (out3, in3, (size_t) (w) * sizeof (uint16_t));
770 #endif
771 out0 += linc0;
772 out1 += linc1;
773 out2 += linc2;
774 out3 += linc3;
775 }
776 return EXR_ERR_SUCCESS;
777 }
778
779 /**************************************/
780
781 static exr_result_t
unpack_half_to_float_4chan_planar(exr_decode_pipeline_t * decode)782 unpack_half_to_float_4chan_planar (exr_decode_pipeline_t* decode)
783 {
784 /* we know we're unpacking all the channels and there is no subsampling */
785 const uint8_t* srcbuffer = decode->unpacked_buffer;
786 const uint16_t *in0, *in1, *in2, *in3;
787 uint8_t * out0, *out1, *out2, *out3;
788 int w, h;
789 int linc0, linc1, linc2, linc3;
790
791 w = decode->channels[0].width;
792 h = decode->chunk.height;
793 linc0 = decode->channels[0].user_line_stride;
794 linc1 = decode->channels[1].user_line_stride;
795 linc2 = decode->channels[2].user_line_stride;
796 linc3 = decode->channels[3].user_line_stride;
797
798 out0 = decode->channels[0].decode_to_ptr;
799 out1 = decode->channels[1].decode_to_ptr;
800 out2 = decode->channels[2].decode_to_ptr;
801 out3 = decode->channels[3].decode_to_ptr;
802
803 // planar output
804 for (int y = 0; y < h; ++y)
805 {
806 in0 = (const uint16_t*) srcbuffer;
807 in1 = in0 + w;
808 in2 = in1 + w;
809 in3 = in2 + w;
810 srcbuffer += w * 8; // 4 * sizeof(uint16_t), avoid type conversion
811
812 half_to_float_buffer ((float*) out0, in0, w);
813 half_to_float_buffer ((float*) out1, in1, w);
814 half_to_float_buffer ((float*) out2, in2, w);
815 half_to_float_buffer ((float*) out3, in3, w);
816
817 out0 += linc0;
818 out1 += linc1;
819 out2 += linc2;
820 out3 += linc3;
821 }
822 return EXR_ERR_SUCCESS;
823 }
824
825 /**************************************/
826
827 static exr_result_t
unpack_16bit_4chan(exr_decode_pipeline_t * decode)828 unpack_16bit_4chan (exr_decode_pipeline_t* decode)
829 {
830 /* we know we're unpacking all the channels and there is no subsampling */
831 const uint8_t* srcbuffer = decode->unpacked_buffer;
832 const uint16_t *in0, *in1, *in2, *in3;
833 uint8_t * out0, *out1, *out2, *out3;
834 int w, h;
835 int inc0, inc1, inc2, inc3;
836 int linc0, linc1, linc2, linc3;
837
838 w = decode->channels[0].width;
839 h = decode->chunk.height;
840 inc0 = decode->channels[0].user_pixel_stride;
841 inc1 = decode->channels[1].user_pixel_stride;
842 inc2 = decode->channels[2].user_pixel_stride;
843 inc3 = decode->channels[3].user_pixel_stride;
844 linc0 = decode->channels[0].user_line_stride;
845 linc1 = decode->channels[1].user_line_stride;
846 linc2 = decode->channels[2].user_line_stride;
847 linc3 = decode->channels[3].user_line_stride;
848
849 out0 = decode->channels[0].decode_to_ptr;
850 out1 = decode->channels[1].decode_to_ptr;
851 out2 = decode->channels[2].decode_to_ptr;
852 out3 = decode->channels[3].decode_to_ptr;
853
854 for (int y = 0; y < h; ++y)
855 {
856 in0 = (const uint16_t*) srcbuffer;
857 in1 = in0 + w;
858 in2 = in1 + w;
859 in3 = in2 + w;
860 srcbuffer += w * 8; // 4 * sizeof(uint16_t), avoid type conversion
861 for (int x = 0; x < w; ++x)
862 *((uint16_t*) (out0 + x * inc0)) = one_to_native16 (in0[x]);
863 for (int x = 0; x < w; ++x)
864 *((uint16_t*) (out1 + x * inc1)) = one_to_native16 (in1[x]);
865 for (int x = 0; x < w; ++x)
866 *((uint16_t*) (out2 + x * inc2)) = one_to_native16 (in2[x]);
867 for (int x = 0; x < w; ++x)
868 *((uint16_t*) (out3 + x * inc3)) = one_to_native16 (in3[x]);
869 out0 += linc0;
870 out1 += linc1;
871 out2 += linc2;
872 out3 += linc3;
873 }
874 return EXR_ERR_SUCCESS;
875 }
876
877 /**************************************/
878
879 static exr_result_t
unpack_16bit(exr_decode_pipeline_t * decode)880 unpack_16bit (exr_decode_pipeline_t* decode)
881 {
882 /* we know we're unpacking all the channels and there is no subsampling */
883 const uint8_t* srcbuffer = decode->unpacked_buffer;
884 uint8_t* cdata;
885 int w, h, pixincrement;
886
887 h = decode->chunk.height;
888 for (int y = 0; y < h; ++y)
889 {
890 for (int c = 0; c < decode->channel_count; ++c)
891 {
892 exr_coding_channel_info_t* decc = (decode->channels + c);
893
894 cdata = decc->decode_to_ptr;
895 w = decc->width;
896 pixincrement = decc->user_pixel_stride;
897 cdata += (uint64_t) y * (uint64_t) decc->user_line_stride;
898 /* specialize to memcpy if we can */
899 #if EXR_HOST_IS_NOT_LITTLE_ENDIAN
900 if (pixincrement == 2)
901 {
902 uint16_t* tmp = (uint16_t*) cdata;
903 const uint16_t* src = (const uint16_t*) srcbuffer;
904 uint16_t* end = tmp + w;
905
906 while (tmp < end)
907 *tmp++ = one_to_native16 (*src++);
908 }
909 else
910 {
911 const uint16_t* src = (const uint16_t*) srcbuffer;
912 for (int x = 0; x < w; ++x)
913 {
914 *((uint16_t*) cdata) = one_to_native16 (*src++);
915 cdata += pixincrement;
916 }
917 }
918 #else
919 if (pixincrement == 2)
920 {
921 memcpy (cdata, srcbuffer, (size_t) (w) *2);
922 }
923 else
924 {
925 const uint16_t* src = (const uint16_t*) srcbuffer;
926 for (int x = 0; x < w; ++x)
927 {
928 *((uint16_t*) cdata) = *src++;
929 cdata += pixincrement;
930 }
931 }
932 #endif
933 srcbuffer += w * 2;
934 }
935 }
936 return EXR_ERR_SUCCESS;
937 }
938
939 //static exr_result_t unpack_32bit_3chan (exr_decode_pipeline_t* decode);
940 //static exr_result_t unpack_32bit_4chan (exr_decode_pipeline_t* decode);
941
942 static exr_result_t
unpack_32bit(exr_decode_pipeline_t * decode)943 unpack_32bit (exr_decode_pipeline_t* decode)
944 {
945 /* we know we're unpacking all the channels and there is no subsampling */
946 const uint8_t* srcbuffer = decode->unpacked_buffer;
947 uint8_t* cdata;
948 int64_t w, h, pixincrement;
949 int chans = decode->channel_count;
950
951 h = (int64_t) decode->chunk.height;
952
953 for (int64_t y = 0; y < h; ++y)
954 {
955 for (int c = 0; c < chans; ++c)
956 {
957 exr_coding_channel_info_t* decc = (decode->channels + c);
958
959 cdata = decc->decode_to_ptr;
960 w = decc->width;
961 pixincrement = decc->user_pixel_stride;
962 cdata += y * (int64_t) decc->user_line_stride;
963 /* specialize to memcpy if we can */
964 #if EXR_HOST_IS_NOT_LITTLE_ENDIAN
965 if (pixincrement == 4)
966 {
967 uint32_t* tmp = (uint32_t*) cdata;
968 const uint32_t* src = (const uint32_t*) srcbuffer;
969 uint32_t* end = tmp + w;
970
971 while (tmp < end)
972 *tmp++ = le32toh (*src++);
973 }
974 else
975 {
976 const uint32_t* src = (const uint32_t*) srcbuffer;
977 for (int64_t x = 0; x < w; ++x)
978 {
979 *((uint32_t*) cdata) = le32toh (*src++);
980 cdata += pixincrement;
981 }
982 }
983 #else
984 if (pixincrement == 4)
985 {
986 memcpy (cdata, srcbuffer, (size_t) (w) *4);
987 }
988 else
989 {
990 const uint32_t* src = (const uint32_t*) srcbuffer;
991 for (int64_t x = 0; x < w; ++x)
992 {
993 *((uint32_t*) cdata) = *src++;
994 cdata += pixincrement;
995 }
996 }
997 #endif
998 srcbuffer += w * 4;
999 }
1000 }
1001 return EXR_ERR_SUCCESS;
1002 }
1003
1004 #define UNPACK_SAMPLES(samps) \
1005 switch (decc->data_type) \
1006 { \
1007 case EXR_PIXEL_HALF: \
1008 switch (decc->user_data_type) \
1009 { \
1010 case EXR_PIXEL_HALF: { \
1011 const uint16_t* src = (const uint16_t*) srcbuffer; \
1012 for (int s = 0; s < samps; ++s) \
1013 { \
1014 *((uint16_t*) cdata) = unaligned_load16 (src); \
1015 ++src; \
1016 cdata += ubpc; \
1017 } \
1018 break; \
1019 } \
1020 case EXR_PIXEL_FLOAT: { \
1021 const uint16_t* src = (const uint16_t*) srcbuffer; \
1022 for (int s = 0; s < samps; ++s) \
1023 { \
1024 uint16_t cval = unaligned_load16 (src); \
1025 ++src; \
1026 *((float*) cdata) = half_to_float (cval); \
1027 cdata += ubpc; \
1028 } \
1029 break; \
1030 } \
1031 case EXR_PIXEL_UINT: { \
1032 const uint16_t* src = (const uint16_t*) srcbuffer; \
1033 for (int s = 0; s < samps; ++s) \
1034 { \
1035 uint16_t cval = unaligned_load16 (src); \
1036 ++src; \
1037 *((uint32_t*) cdata) = half_to_uint (cval); \
1038 cdata += ubpc; \
1039 } \
1040 break; \
1041 } \
1042 default: return EXR_ERR_INVALID_ARGUMENT; \
1043 } \
1044 break; \
1045 case EXR_PIXEL_FLOAT: \
1046 switch (decc->user_data_type) \
1047 { \
1048 case EXR_PIXEL_HALF: { \
1049 const uint32_t* src = (const uint32_t*) srcbuffer; \
1050 for (int s = 0; s < samps; ++s) \
1051 { \
1052 uint32_t fint = unaligned_load32 (src); \
1053 ++src; \
1054 *((uint16_t*) cdata) = float_to_half_int (fint); \
1055 cdata += ubpc; \
1056 } \
1057 break; \
1058 } \
1059 case EXR_PIXEL_FLOAT: { \
1060 const uint32_t* src = (const uint32_t*) srcbuffer; \
1061 for (int s = 0; s < samps; ++s) \
1062 { \
1063 *((uint32_t*) cdata) = unaligned_load32 (src); \
1064 ++src; \
1065 cdata += ubpc; \
1066 } \
1067 break; \
1068 } \
1069 case EXR_PIXEL_UINT: { \
1070 const uint32_t* src = (const uint32_t*) srcbuffer; \
1071 for (int s = 0; s < samps; ++s) \
1072 { \
1073 uint32_t fint = unaligned_load32 (src); \
1074 ++src; \
1075 *((uint32_t*) cdata) = float_to_uint_int (fint); \
1076 cdata += ubpc; \
1077 } \
1078 break; \
1079 } \
1080 default: return EXR_ERR_INVALID_ARGUMENT; \
1081 } \
1082 break; \
1083 case EXR_PIXEL_UINT: \
1084 switch (decc->user_data_type) \
1085 { \
1086 case EXR_PIXEL_HALF: { \
1087 const uint32_t* src = (const uint32_t*) srcbuffer; \
1088 for (int s = 0; s < samps; ++s) \
1089 { \
1090 uint32_t fint = unaligned_load32 (src); \
1091 ++src; \
1092 *((uint16_t*) cdata) = uint_to_half (fint); \
1093 cdata += ubpc; \
1094 } \
1095 break; \
1096 } \
1097 case EXR_PIXEL_FLOAT: { \
1098 const uint32_t* src = (const uint32_t*) srcbuffer; \
1099 for (int s = 0; s < samps; ++s) \
1100 { \
1101 uint32_t fint = unaligned_load32 (src); \
1102 ++src; \
1103 *((float*) cdata) = uint_to_float (fint); \
1104 cdata += ubpc; \
1105 } \
1106 break; \
1107 } \
1108 case EXR_PIXEL_UINT: { \
1109 const uint32_t* src = (const uint32_t*) srcbuffer; \
1110 for (int s = 0; s < samps; ++s) \
1111 { \
1112 *((uint32_t*) cdata) = unaligned_load32 (src); \
1113 ++src; \
1114 cdata += ubpc; \
1115 } \
1116 break; \
1117 } \
1118 default: return EXR_ERR_INVALID_ARGUMENT; \
1119 } \
1120 break; \
1121 default: return EXR_ERR_INVALID_ARGUMENT; \
1122 }
1123
1124 static exr_result_t
generic_unpack(exr_decode_pipeline_t * decode)1125 generic_unpack (exr_decode_pipeline_t* decode)
1126 {
1127 const uint8_t* srcbuffer = decode->unpacked_buffer;
1128 uint8_t* cdata;
1129 int w, bpc, ubpc;
1130
1131 for (int y = 0; y < decode->chunk.height; ++y)
1132 {
1133 int cury = y + decode->chunk.start_y;
1134
1135 for (int c = 0; c < decode->channel_count; ++c)
1136 {
1137 exr_coding_channel_info_t* decc = (decode->channels + c);
1138
1139 cdata = decc->decode_to_ptr;
1140 w = decc->width;
1141 bpc = decc->bytes_per_element;
1142 ubpc = decc->user_pixel_stride;
1143
1144 if (decc->y_samples > 1)
1145 {
1146 if ((cury % decc->y_samples) != 0) continue;
1147 if (cdata)
1148 cdata +=
1149 ((uint64_t) (y / decc->y_samples) *
1150 (uint64_t) decc->user_line_stride);
1151 else
1152 {
1153 srcbuffer += w * bpc;
1154 continue;
1155 }
1156 }
1157 else if (cdata)
1158 {
1159 cdata +=
1160 ((uint64_t) y) * ((uint64_t) decc->user_line_stride);
1161 }
1162 else
1163 {
1164 srcbuffer += w * bpc;
1165 continue;
1166 }
1167
1168 UNPACK_SAMPLES (w)
1169 srcbuffer += w * bpc;
1170 }
1171 }
1172 return EXR_ERR_SUCCESS;
1173 }
1174
1175 static exr_result_t
generic_unpack_deep_pointers(exr_decode_pipeline_t * decode)1176 generic_unpack_deep_pointers (exr_decode_pipeline_t* decode)
1177 {
1178 const uint8_t* srcbuffer = decode->unpacked_buffer;
1179 const int32_t* sampbuffer = decode->sample_count_table;
1180 void** pdata;
1181 int w, h, bpc, ubpc;
1182
1183 w = decode->chunk.width;
1184 h = decode->chunk.height;
1185
1186 for (int y = 0; y < h; ++y)
1187 {
1188 for (int c = 0; c < decode->channel_count; ++c)
1189 {
1190 exr_coding_channel_info_t* decc = (decode->channels + c);
1191 int32_t prevsamps = 0;
1192 size_t pixstride;
1193 bpc = decc->bytes_per_element;
1194 ubpc = decc->user_bytes_per_element;
1195 pdata = (void**) decc->decode_to_ptr;
1196
1197 if (!pdata)
1198 {
1199 prevsamps = 0;
1200 if ((decode->decode_flags &
1201 EXR_DECODE_SAMPLE_COUNTS_AS_INDIVIDUAL))
1202 {
1203 for (int x = 0; x < w; ++x)
1204 prevsamps += sampbuffer[x];
1205 }
1206 else
1207 prevsamps = sampbuffer[w - 1];
1208 srcbuffer += ((size_t) bpc) * ((size_t) prevsamps);
1209 continue;
1210 }
1211
1212 pdata += ((size_t)y) * (((size_t)decc->user_line_stride) / sizeof (void*));
1213 pixstride = ((size_t)decc->user_pixel_stride) / sizeof (void*);
1214
1215 for (int x = 0; x < w; ++x)
1216 {
1217 void* outpix = *pdata;
1218 int32_t samps = sampbuffer[x];
1219 if (0 == (decode->decode_flags &
1220 EXR_DECODE_SAMPLE_COUNTS_AS_INDIVIDUAL))
1221 {
1222 int32_t tmp = samps - prevsamps;
1223 prevsamps = samps;
1224 samps = tmp;
1225 }
1226
1227 pdata += pixstride;
1228 if (outpix)
1229 {
1230 uint8_t* cdata = outpix;
1231 UNPACK_SAMPLES (samps)
1232 }
1233 srcbuffer += bpc * samps;
1234 }
1235 }
1236 sampbuffer += w;
1237 }
1238 return EXR_ERR_SUCCESS;
1239 }
1240
1241 static exr_result_t
generic_unpack_deep(exr_decode_pipeline_t * decode)1242 generic_unpack_deep (exr_decode_pipeline_t* decode)
1243 {
1244 const uint8_t* srcbuffer = decode->unpacked_buffer;
1245 const int32_t* sampbuffer = decode->sample_count_table;
1246 uint8_t* cdata;
1247 int w, h, bpc, ubpc;
1248 size_t totsamps = 0;
1249
1250 w = decode->chunk.width;
1251 h = decode->chunk.height;
1252
1253 for (int y = 0; y < h; ++y)
1254 {
1255 for (int c = 0; c < decode->channel_count; ++c)
1256 {
1257 exr_coding_channel_info_t* decc = (decode->channels + c);
1258 int32_t prevsamps = 0;
1259
1260 int incr_tot = ((c + 1) == decode->channel_count);
1261
1262 bpc = decc->bytes_per_element;
1263 ubpc = decc->user_bytes_per_element;
1264 cdata = decc->decode_to_ptr;
1265
1266 if (!cdata)
1267 {
1268 prevsamps = 0;
1269 if ((decode->decode_flags &
1270 EXR_DECODE_SAMPLE_COUNTS_AS_INDIVIDUAL))
1271 {
1272 for (int x = 0; x < w; ++x)
1273 prevsamps += sampbuffer[x];
1274 }
1275 else
1276 prevsamps = sampbuffer[w - 1];
1277 srcbuffer += ((size_t) bpc) * ((size_t) prevsamps);
1278
1279 if (incr_tot) totsamps += (size_t) prevsamps;
1280
1281 continue;
1282 }
1283 cdata += totsamps * ((size_t) ubpc);
1284
1285 for (int x = 0; x < w; ++x)
1286 {
1287 int32_t samps = sampbuffer[x];
1288 if (0 == (decode->decode_flags &
1289 EXR_DECODE_SAMPLE_COUNTS_AS_INDIVIDUAL))
1290 {
1291 int32_t tmp = samps - prevsamps;
1292 prevsamps = samps;
1293 samps = tmp;
1294 }
1295
1296 UNPACK_SAMPLES (samps)
1297
1298 srcbuffer += bpc * samps;
1299 if (incr_tot) totsamps += (size_t) samps;
1300 }
1301 }
1302 sampbuffer += w;
1303 }
1304
1305 return EXR_ERR_SUCCESS;
1306 }
1307
1308 /**************************************/
1309
1310 internal_exr_unpack_fn
internal_exr_match_decode(exr_decode_pipeline_t * decode,int isdeep,int chanstofill,int chanstounpack,int sametype,int sameouttype,int samebpc,int sameoutbpc,int hassampling,int hastypechange,int sameoutinc,int simpinterleave,int simpinterleaverev,int simplineoff)1311 internal_exr_match_decode (
1312 exr_decode_pipeline_t* decode,
1313 int isdeep,
1314 int chanstofill,
1315 int chanstounpack,
1316 int sametype,
1317 int sameouttype,
1318 int samebpc,
1319 int sameoutbpc,
1320 int hassampling,
1321 int hastypechange,
1322 int sameoutinc,
1323 int simpinterleave,
1324 int simpinterleaverev,
1325 int simplineoff)
1326 {
1327 static int init_cpu_check = 1;
1328 if (init_cpu_check)
1329 {
1330 choose_half_to_float_impl ();
1331 init_cpu_check = 0;
1332 }
1333
1334 if (isdeep)
1335 {
1336 if ((decode->decode_flags & EXR_DECODE_SAMPLE_COUNTS_AS_INDIVIDUAL))
1337 return &generic_unpack_deep_pointers;
1338 return &generic_unpack_deep;
1339 }
1340
1341 if (hastypechange > 0)
1342 {
1343 /* other optimizations would not be difficult, but this will
1344 * be the common one (where on encode / pack we want to do the
1345 * opposite) */
1346 if (sametype == (int) EXR_PIXEL_HALF &&
1347 sameouttype == (int) EXR_PIXEL_FLOAT)
1348 {
1349 if (simpinterleave > 0)
1350 {
1351 if (decode->channel_count == 4)
1352 return &unpack_half_to_float_4chan_interleave;
1353 if (decode->channel_count == 3)
1354 return &unpack_half_to_float_3chan_interleave;
1355 }
1356
1357 if (simpinterleaverev > 0)
1358 {
1359 if (decode->channel_count == 4)
1360 return &unpack_half_to_float_4chan_interleave_rev;
1361 if (decode->channel_count == 3)
1362 return &unpack_half_to_float_3chan_interleave_rev;
1363 }
1364
1365 if (sameoutinc == 4)
1366 {
1367 if (decode->channel_count == 4)
1368 return &unpack_half_to_float_4chan_planar;
1369 if (decode->channel_count == 3)
1370 return &unpack_half_to_float_3chan_planar;
1371 }
1372 }
1373
1374 return &generic_unpack;
1375 }
1376
1377 if (hassampling || chanstofill != decode->channel_count || samebpc <= 0 ||
1378 sameoutbpc <= 0)
1379 return &generic_unpack;
1380
1381 (void) chanstounpack;
1382 (void) simplineoff;
1383
1384 if (samebpc == 2)
1385 {
1386 if (simpinterleave > 0)
1387 {
1388 if (decode->channel_count == 4)
1389 return &unpack_16bit_4chan_interleave;
1390 if (decode->channel_count == 3)
1391 return &unpack_16bit_3chan_interleave;
1392 }
1393
1394 if (simpinterleaverev > 0)
1395 {
1396 if (decode->channel_count == 4)
1397 return &unpack_16bit_4chan_interleave_rev;
1398 if (decode->channel_count == 3)
1399 return &unpack_16bit_3chan_interleave_rev;
1400 }
1401
1402 if (sameoutinc == 2)
1403 {
1404 if (decode->channel_count == 4) return &unpack_16bit_4chan_planar;
1405 if (decode->channel_count == 3) return &unpack_16bit_3chan_planar;
1406 }
1407
1408 if (decode->channel_count == 4) return &unpack_16bit_4chan;
1409 if (decode->channel_count == 3) return &unpack_16bit_3chan;
1410
1411 return &unpack_16bit;
1412 }
1413
1414 if (samebpc == 4)
1415 {
1416 //if (decode->channel_count == 4) return &unpack_32bit_4chan;
1417 //if (decode->channel_count == 3) return &unpack_32bit_3chan;
1418 return &unpack_32bit;
1419 }
1420
1421 return &generic_unpack;
1422 }
1423