1 //
2 // Copyright 2013 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 
7 // angle_loadimage.cpp: Defines image loading functions.
8 
9 #include "image_util/loadimage.h"
10 
11 #include "common/mathutil.h"
12 #include "common/platform.h"
13 #include "image_util/imageformats.h"
14 
15 namespace angle
16 {
17 
LoadA8ToRGBA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)18 void LoadA8ToRGBA8(size_t width,
19                    size_t height,
20                    size_t depth,
21                    const uint8_t *input,
22                    size_t inputRowPitch,
23                    size_t inputDepthPitch,
24                    uint8_t *output,
25                    size_t outputRowPitch,
26                    size_t outputDepthPitch)
27 {
28 #if defined(ANGLE_USE_SSE)
29     if (gl::supportsSSE2())
30     {
31         __m128i zeroWide = _mm_setzero_si128();
32 
33         for (size_t z = 0; z < depth; z++)
34         {
35             for (size_t y = 0; y < height; y++)
36             {
37                 const uint8_t *source =
38                     priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
39                 uint32_t *dest = priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch,
40                                                                    outputDepthPitch);
41 
42                 size_t x = 0;
43 
44                 // Make output writes aligned
45                 for (; ((reinterpret_cast<intptr_t>(&dest[x]) & 0xF) != 0 && x < width); x++)
46                 {
47                     dest[x] = static_cast<uint32_t>(source[x]) << 24;
48                 }
49 
50                 for (; x + 7 < width; x += 8)
51                 {
52                     __m128i sourceData =
53                         _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&source[x]));
54                     // Interleave each byte to 16bit, make the lower byte to zero
55                     sourceData = _mm_unpacklo_epi8(zeroWide, sourceData);
56                     // Interleave each 16bit to 32bit, make the lower 16bit to zero
57                     __m128i lo = _mm_unpacklo_epi16(zeroWide, sourceData);
58                     __m128i hi = _mm_unpackhi_epi16(zeroWide, sourceData);
59 
60                     _mm_store_si128(reinterpret_cast<__m128i *>(&dest[x]), lo);
61                     _mm_store_si128(reinterpret_cast<__m128i *>(&dest[x + 4]), hi);
62                 }
63 
64                 // Handle the remainder
65                 for (; x < width; x++)
66                 {
67                     dest[x] = static_cast<uint32_t>(source[x]) << 24;
68                 }
69             }
70         }
71 
72         return;
73     }
74 #endif
75 
76     for (size_t z = 0; z < depth; z++)
77     {
78         for (size_t y = 0; y < height; y++)
79         {
80             const uint8_t *source =
81                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
82             uint32_t *dest =
83                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
84             for (size_t x = 0; x < width; x++)
85             {
86                 dest[x] = static_cast<uint32_t>(source[x]) << 24;
87             }
88         }
89     }
90 }
91 
LoadA8ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)92 void LoadA8ToBGRA8(size_t width,
93                    size_t height,
94                    size_t depth,
95                    const uint8_t *input,
96                    size_t inputRowPitch,
97                    size_t inputDepthPitch,
98                    uint8_t *output,
99                    size_t outputRowPitch,
100                    size_t outputDepthPitch)
101 {
102     // Same as loading to RGBA
103     LoadA8ToRGBA8(width, height, depth, input, inputRowPitch, inputDepthPitch, output,
104                   outputRowPitch, outputDepthPitch);
105 }
106 
LoadA32FToRGBA32F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)107 void LoadA32FToRGBA32F(size_t width,
108                        size_t height,
109                        size_t depth,
110                        const uint8_t *input,
111                        size_t inputRowPitch,
112                        size_t inputDepthPitch,
113                        uint8_t *output,
114                        size_t outputRowPitch,
115                        size_t outputDepthPitch)
116 {
117     for (size_t z = 0; z < depth; z++)
118     {
119         for (size_t y = 0; y < height; y++)
120         {
121             const float *source =
122                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
123             float *dest =
124                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
125             for (size_t x = 0; x < width; x++)
126             {
127                 dest[4 * x + 0] = 0.0f;
128                 dest[4 * x + 1] = 0.0f;
129                 dest[4 * x + 2] = 0.0f;
130                 dest[4 * x + 3] = source[x];
131             }
132         }
133     }
134 }
135 
LoadA16FToRGBA16F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)136 void LoadA16FToRGBA16F(size_t width,
137                        size_t height,
138                        size_t depth,
139                        const uint8_t *input,
140                        size_t inputRowPitch,
141                        size_t inputDepthPitch,
142                        uint8_t *output,
143                        size_t outputRowPitch,
144                        size_t outputDepthPitch)
145 {
146     for (size_t z = 0; z < depth; z++)
147     {
148         for (size_t y = 0; y < height; y++)
149         {
150             const uint16_t *source =
151                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
152             uint16_t *dest =
153                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
154             for (size_t x = 0; x < width; x++)
155             {
156                 dest[4 * x + 0] = 0;
157                 dest[4 * x + 1] = 0;
158                 dest[4 * x + 2] = 0;
159                 dest[4 * x + 3] = source[x];
160             }
161         }
162     }
163 }
164 
LoadL8ToRGBA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)165 void LoadL8ToRGBA8(size_t width,
166                    size_t height,
167                    size_t depth,
168                    const uint8_t *input,
169                    size_t inputRowPitch,
170                    size_t inputDepthPitch,
171                    uint8_t *output,
172                    size_t outputRowPitch,
173                    size_t outputDepthPitch)
174 {
175     for (size_t z = 0; z < depth; z++)
176     {
177         for (size_t y = 0; y < height; y++)
178         {
179             const uint8_t *source =
180                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
181             uint8_t *dest =
182                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
183             for (size_t x = 0; x < width; x++)
184             {
185                 uint8_t sourceVal = source[x];
186                 dest[4 * x + 0]   = sourceVal;
187                 dest[4 * x + 1]   = sourceVal;
188                 dest[4 * x + 2]   = sourceVal;
189                 dest[4 * x + 3]   = 0xFF;
190             }
191         }
192     }
193 }
194 
LoadL8ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)195 void LoadL8ToBGRA8(size_t width,
196                    size_t height,
197                    size_t depth,
198                    const uint8_t *input,
199                    size_t inputRowPitch,
200                    size_t inputDepthPitch,
201                    uint8_t *output,
202                    size_t outputRowPitch,
203                    size_t outputDepthPitch)
204 {
205     // Same as loading to RGBA
206     LoadL8ToRGBA8(width, height, depth, input, inputRowPitch, inputDepthPitch, output,
207                   outputRowPitch, outputDepthPitch);
208 }
209 
LoadL32FToRGBA32F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)210 void LoadL32FToRGBA32F(size_t width,
211                        size_t height,
212                        size_t depth,
213                        const uint8_t *input,
214                        size_t inputRowPitch,
215                        size_t inputDepthPitch,
216                        uint8_t *output,
217                        size_t outputRowPitch,
218                        size_t outputDepthPitch)
219 {
220     for (size_t z = 0; z < depth; z++)
221     {
222         for (size_t y = 0; y < height; y++)
223         {
224             const float *source =
225                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
226             float *dest =
227                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
228             for (size_t x = 0; x < width; x++)
229             {
230                 dest[4 * x + 0] = source[x];
231                 dest[4 * x + 1] = source[x];
232                 dest[4 * x + 2] = source[x];
233                 dest[4 * x + 3] = 1.0f;
234             }
235         }
236     }
237 }
238 
LoadL16FToRGBA16F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)239 void LoadL16FToRGBA16F(size_t width,
240                        size_t height,
241                        size_t depth,
242                        const uint8_t *input,
243                        size_t inputRowPitch,
244                        size_t inputDepthPitch,
245                        uint8_t *output,
246                        size_t outputRowPitch,
247                        size_t outputDepthPitch)
248 {
249     for (size_t z = 0; z < depth; z++)
250     {
251         for (size_t y = 0; y < height; y++)
252         {
253             const uint16_t *source =
254                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
255             uint16_t *dest =
256                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
257             for (size_t x = 0; x < width; x++)
258             {
259                 dest[4 * x + 0] = source[x];
260                 dest[4 * x + 1] = source[x];
261                 dest[4 * x + 2] = source[x];
262                 dest[4 * x + 3] = gl::Float16One;
263             }
264         }
265     }
266 }
267 
LoadLA8ToRGBA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)268 void LoadLA8ToRGBA8(size_t width,
269                     size_t height,
270                     size_t depth,
271                     const uint8_t *input,
272                     size_t inputRowPitch,
273                     size_t inputDepthPitch,
274                     uint8_t *output,
275                     size_t outputRowPitch,
276                     size_t outputDepthPitch)
277 {
278     for (size_t z = 0; z < depth; z++)
279     {
280         for (size_t y = 0; y < height; y++)
281         {
282             const uint8_t *source =
283                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
284             uint8_t *dest =
285                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
286             for (size_t x = 0; x < width; x++)
287             {
288                 dest[4 * x + 0] = source[2 * x + 0];
289                 dest[4 * x + 1] = source[2 * x + 0];
290                 dest[4 * x + 2] = source[2 * x + 0];
291                 dest[4 * x + 3] = source[2 * x + 1];
292             }
293         }
294     }
295 }
296 
LoadLA8ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)297 void LoadLA8ToBGRA8(size_t width,
298                     size_t height,
299                     size_t depth,
300                     const uint8_t *input,
301                     size_t inputRowPitch,
302                     size_t inputDepthPitch,
303                     uint8_t *output,
304                     size_t outputRowPitch,
305                     size_t outputDepthPitch)
306 {
307     // Same as loading to RGBA
308     LoadLA8ToRGBA8(width, height, depth, input, inputRowPitch, inputDepthPitch, output,
309                    outputRowPitch, outputDepthPitch);
310 }
311 
LoadLA32FToRGBA32F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)312 void LoadLA32FToRGBA32F(size_t width,
313                         size_t height,
314                         size_t depth,
315                         const uint8_t *input,
316                         size_t inputRowPitch,
317                         size_t inputDepthPitch,
318                         uint8_t *output,
319                         size_t outputRowPitch,
320                         size_t outputDepthPitch)
321 {
322     for (size_t z = 0; z < depth; z++)
323     {
324         for (size_t y = 0; y < height; y++)
325         {
326             const float *source =
327                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
328             float *dest =
329                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
330             for (size_t x = 0; x < width; x++)
331             {
332                 dest[4 * x + 0] = source[2 * x + 0];
333                 dest[4 * x + 1] = source[2 * x + 0];
334                 dest[4 * x + 2] = source[2 * x + 0];
335                 dest[4 * x + 3] = source[2 * x + 1];
336             }
337         }
338     }
339 }
340 
LoadLA16FToRGBA16F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)341 void LoadLA16FToRGBA16F(size_t width,
342                         size_t height,
343                         size_t depth,
344                         const uint8_t *input,
345                         size_t inputRowPitch,
346                         size_t inputDepthPitch,
347                         uint8_t *output,
348                         size_t outputRowPitch,
349                         size_t outputDepthPitch)
350 {
351     for (size_t z = 0; z < depth; z++)
352     {
353         for (size_t y = 0; y < height; y++)
354         {
355             const uint16_t *source =
356                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
357             uint16_t *dest =
358                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
359             for (size_t x = 0; x < width; x++)
360             {
361                 dest[4 * x + 0] = source[2 * x + 0];
362                 dest[4 * x + 1] = source[2 * x + 0];
363                 dest[4 * x + 2] = source[2 * x + 0];
364                 dest[4 * x + 3] = source[2 * x + 1];
365             }
366         }
367     }
368 }
369 
LoadRGB8ToBGR565(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)370 void LoadRGB8ToBGR565(size_t width,
371                       size_t height,
372                       size_t depth,
373                       const uint8_t *input,
374                       size_t inputRowPitch,
375                       size_t inputDepthPitch,
376                       uint8_t *output,
377                       size_t outputRowPitch,
378                       size_t outputDepthPitch)
379 {
380     for (size_t z = 0; z < depth; z++)
381     {
382         for (size_t y = 0; y < height; y++)
383         {
384             const uint8_t *source =
385                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
386             uint16_t *dest =
387                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
388             for (size_t x = 0; x < width; x++)
389             {
390                 uint8_t r8 = source[x * 3 + 0];
391                 uint8_t g8 = source[x * 3 + 1];
392                 uint8_t b8 = source[x * 3 + 2];
393                 auto r5    = static_cast<uint16_t>(r8 >> 3);
394                 auto g6    = static_cast<uint16_t>(g8 >> 2);
395                 auto b5    = static_cast<uint16_t>(b8 >> 3);
396                 dest[x]    = (r5 << 11) | (g6 << 5) | b5;
397             }
398         }
399     }
400 }
401 
LoadRGB565ToBGR565(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)402 void LoadRGB565ToBGR565(size_t width,
403                         size_t height,
404                         size_t depth,
405                         const uint8_t *input,
406                         size_t inputRowPitch,
407                         size_t inputDepthPitch,
408                         uint8_t *output,
409                         size_t outputRowPitch,
410                         size_t outputDepthPitch)
411 {
412     for (size_t z = 0; z < depth; z++)
413     {
414         for (size_t y = 0; y < height; y++)
415         {
416             const uint16_t *source =
417                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
418             uint16_t *dest =
419                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
420             for (size_t x = 0; x < width; x++)
421             {
422                 // The GL type RGB is packed with with red in the MSB, while the D3D11 type BGR
423                 // is packed with red in the LSB
424                 auto rgb    = source[x];
425                 uint16_t r5 = gl::getShiftedData<5, 11>(rgb);
426                 uint16_t g6 = gl::getShiftedData<6, 5>(rgb);
427                 uint16_t b5 = gl::getShiftedData<5, 0>(rgb);
428                 dest[x]     = (r5 << 11) | (g6 << 5) | b5;
429             }
430         }
431     }
432 }
433 
LoadRGB8ToBGRX8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)434 void LoadRGB8ToBGRX8(size_t width,
435                      size_t height,
436                      size_t depth,
437                      const uint8_t *input,
438                      size_t inputRowPitch,
439                      size_t inputDepthPitch,
440                      uint8_t *output,
441                      size_t outputRowPitch,
442                      size_t outputDepthPitch)
443 {
444     for (size_t z = 0; z < depth; z++)
445     {
446         for (size_t y = 0; y < height; y++)
447         {
448             const uint8_t *source =
449                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
450             uint8_t *dest =
451                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
452             for (size_t x = 0; x < width; x++)
453             {
454                 dest[4 * x + 0] = source[x * 3 + 2];
455                 dest[4 * x + 1] = source[x * 3 + 1];
456                 dest[4 * x + 2] = source[x * 3 + 0];
457                 dest[4 * x + 3] = 0xFF;
458             }
459         }
460     }
461 }
462 
LoadRG8ToBGRX8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)463 void LoadRG8ToBGRX8(size_t width,
464                     size_t height,
465                     size_t depth,
466                     const uint8_t *input,
467                     size_t inputRowPitch,
468                     size_t inputDepthPitch,
469                     uint8_t *output,
470                     size_t outputRowPitch,
471                     size_t outputDepthPitch)
472 {
473     for (size_t z = 0; z < depth; z++)
474     {
475         for (size_t y = 0; y < height; y++)
476         {
477             const uint8_t *source =
478                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
479             uint8_t *dest =
480                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
481             for (size_t x = 0; x < width; x++)
482             {
483                 dest[4 * x + 0] = 0x00;
484                 dest[4 * x + 1] = source[x * 2 + 1];
485                 dest[4 * x + 2] = source[x * 2 + 0];
486                 dest[4 * x + 3] = 0xFF;
487             }
488         }
489     }
490 }
491 
LoadR8ToBGRX8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)492 void LoadR8ToBGRX8(size_t width,
493                    size_t height,
494                    size_t depth,
495                    const uint8_t *input,
496                    size_t inputRowPitch,
497                    size_t inputDepthPitch,
498                    uint8_t *output,
499                    size_t outputRowPitch,
500                    size_t outputDepthPitch)
501 {
502     for (size_t z = 0; z < depth; z++)
503     {
504         for (size_t y = 0; y < height; y++)
505         {
506             const uint8_t *source =
507                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
508             uint8_t *dest =
509                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
510             for (size_t x = 0; x < width; x++)
511             {
512                 dest[4 * x + 0] = 0x00;
513                 dest[4 * x + 1] = 0x00;
514                 dest[4 * x + 2] = source[x];
515                 dest[4 * x + 3] = 0xFF;
516             }
517         }
518     }
519 }
520 
LoadR5G6B5ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)521 void LoadR5G6B5ToBGRA8(size_t width,
522                        size_t height,
523                        size_t depth,
524                        const uint8_t *input,
525                        size_t inputRowPitch,
526                        size_t inputDepthPitch,
527                        uint8_t *output,
528                        size_t outputRowPitch,
529                        size_t outputDepthPitch)
530 {
531     for (size_t z = 0; z < depth; z++)
532     {
533         for (size_t y = 0; y < height; y++)
534         {
535             const uint16_t *source =
536                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
537             uint8_t *dest =
538                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
539             for (size_t x = 0; x < width; x++)
540             {
541                 uint16_t rgb = source[x];
542                 dest[4 * x + 0] =
543                     static_cast<uint8_t>(((rgb & 0x001F) << 3) | ((rgb & 0x001F) >> 2));
544                 dest[4 * x + 1] =
545                     static_cast<uint8_t>(((rgb & 0x07E0) >> 3) | ((rgb & 0x07E0) >> 9));
546                 dest[4 * x + 2] =
547                     static_cast<uint8_t>(((rgb & 0xF800) >> 8) | ((rgb & 0xF800) >> 13));
548                 dest[4 * x + 3] = 0xFF;
549             }
550         }
551     }
552 }
553 
LoadR5G6B5ToRGBA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)554 void LoadR5G6B5ToRGBA8(size_t width,
555                        size_t height,
556                        size_t depth,
557                        const uint8_t *input,
558                        size_t inputRowPitch,
559                        size_t inputDepthPitch,
560                        uint8_t *output,
561                        size_t outputRowPitch,
562                        size_t outputDepthPitch)
563 {
564     for (size_t z = 0; z < depth; z++)
565     {
566         for (size_t y = 0; y < height; y++)
567         {
568             const uint16_t *source =
569                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
570             uint8_t *dest =
571                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
572             for (size_t x = 0; x < width; x++)
573             {
574                 uint16_t rgb = source[x];
575                 dest[4 * x + 0] =
576                     static_cast<uint8_t>(((rgb & 0xF800) >> 8) | ((rgb & 0xF800) >> 13));
577                 dest[4 * x + 1] =
578                     static_cast<uint8_t>(((rgb & 0x07E0) >> 3) | ((rgb & 0x07E0) >> 9));
579                 dest[4 * x + 2] =
580                     static_cast<uint8_t>(((rgb & 0x001F) << 3) | ((rgb & 0x001F) >> 2));
581                 dest[4 * x + 3] = 0xFF;
582             }
583         }
584     }
585 }
586 
LoadRGBA8ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)587 void LoadRGBA8ToBGRA8(size_t width,
588                       size_t height,
589                       size_t depth,
590                       const uint8_t *input,
591                       size_t inputRowPitch,
592                       size_t inputDepthPitch,
593                       uint8_t *output,
594                       size_t outputRowPitch,
595                       size_t outputDepthPitch)
596 {
597 #if defined(ANGLE_USE_SSE)
598     if (gl::supportsSSE2())
599     {
600         __m128i brMask = _mm_set1_epi32(0x00ff00ff);
601 
602         for (size_t z = 0; z < depth; z++)
603         {
604             for (size_t y = 0; y < height; y++)
605             {
606                 const uint32_t *source =
607                     priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
608                 uint32_t *dest = priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch,
609                                                                    outputDepthPitch);
610 
611                 size_t x = 0;
612 
613                 // Make output writes aligned
614                 for (; ((reinterpret_cast<intptr_t>(&dest[x]) & 15) != 0) && x < width; x++)
615                 {
616                     uint32_t rgba = source[x];
617                     dest[x]       = (ANGLE_ROTL(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
618                 }
619 
620                 for (; x + 3 < width; x += 4)
621                 {
622                     __m128i sourceData =
623                         _mm_loadu_si128(reinterpret_cast<const __m128i *>(&source[x]));
624                     // Mask out g and a, which don't change
625                     __m128i gaComponents = _mm_andnot_si128(brMask, sourceData);
626                     // Mask out b and r
627                     __m128i brComponents = _mm_and_si128(sourceData, brMask);
628                     // Swap b and r
629                     __m128i brSwapped = _mm_shufflehi_epi16(
630                         _mm_shufflelo_epi16(brComponents, _MM_SHUFFLE(2, 3, 0, 1)),
631                         _MM_SHUFFLE(2, 3, 0, 1));
632                     __m128i result = _mm_or_si128(gaComponents, brSwapped);
633                     _mm_store_si128(reinterpret_cast<__m128i *>(&dest[x]), result);
634                 }
635 
636                 // Perform leftover writes
637                 for (; x < width; x++)
638                 {
639                     uint32_t rgba = source[x];
640                     dest[x]       = (ANGLE_ROTL(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
641                 }
642             }
643         }
644 
645         return;
646     }
647 #endif
648 
649     for (size_t z = 0; z < depth; z++)
650     {
651         for (size_t y = 0; y < height; y++)
652         {
653             const uint32_t *source =
654                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
655             uint32_t *dest =
656                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
657             for (size_t x = 0; x < width; x++)
658             {
659                 uint32_t rgba = source[x];
660                 dest[x]       = (ANGLE_ROTL(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
661             }
662         }
663     }
664 }
665 
LoadRGBA8ToBGRA4(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)666 void LoadRGBA8ToBGRA4(size_t width,
667                       size_t height,
668                       size_t depth,
669                       const uint8_t *input,
670                       size_t inputRowPitch,
671                       size_t inputDepthPitch,
672                       uint8_t *output,
673                       size_t outputRowPitch,
674                       size_t outputDepthPitch)
675 {
676     for (size_t z = 0; z < depth; z++)
677     {
678         for (size_t y = 0; y < height; y++)
679         {
680             const uint32_t *source =
681                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
682             uint16_t *dest =
683                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
684             for (size_t x = 0; x < width; x++)
685             {
686                 uint32_t rgba8 = source[x];
687                 auto r4        = static_cast<uint16_t>((rgba8 & 0x000000FF) >> 4);
688                 auto g4        = static_cast<uint16_t>((rgba8 & 0x0000FF00) >> 12);
689                 auto b4        = static_cast<uint16_t>((rgba8 & 0x00FF0000) >> 20);
690                 auto a4        = static_cast<uint16_t>((rgba8 & 0xFF000000) >> 28);
691                 dest[x]        = (a4 << 12) | (r4 << 8) | (g4 << 4) | b4;
692             }
693         }
694     }
695 }
696 
LoadRGBA8ToRGBA4(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)697 void LoadRGBA8ToRGBA4(size_t width,
698                       size_t height,
699                       size_t depth,
700                       const uint8_t *input,
701                       size_t inputRowPitch,
702                       size_t inputDepthPitch,
703                       uint8_t *output,
704                       size_t outputRowPitch,
705                       size_t outputDepthPitch)
706 {
707     for (size_t z = 0; z < depth; z++)
708     {
709         for (size_t y = 0; y < height; y++)
710         {
711             const uint32_t *source =
712                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
713             uint16_t *dest =
714                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
715             for (size_t x = 0; x < width; x++)
716             {
717                 uint32_t rgba8 = source[x];
718                 auto r4        = static_cast<uint16_t>((rgba8 & 0x000000FF) >> 4);
719                 auto g4        = static_cast<uint16_t>((rgba8 & 0x0000FF00) >> 12);
720                 auto b4        = static_cast<uint16_t>((rgba8 & 0x00FF0000) >> 20);
721                 auto a4        = static_cast<uint16_t>((rgba8 & 0xFF000000) >> 28);
722                 dest[x]        = (r4 << 12) | (g4 << 8) | (b4 << 4) | a4;
723             }
724         }
725     }
726 }
727 
LoadRGBA4ToARGB4(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)728 void LoadRGBA4ToARGB4(size_t width,
729                       size_t height,
730                       size_t depth,
731                       const uint8_t *input,
732                       size_t inputRowPitch,
733                       size_t inputDepthPitch,
734                       uint8_t *output,
735                       size_t outputRowPitch,
736                       size_t outputDepthPitch)
737 {
738     for (size_t z = 0; z < depth; z++)
739     {
740         for (size_t y = 0; y < height; y++)
741         {
742             const uint16_t *source =
743                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
744             uint16_t *dest =
745                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
746             for (size_t x = 0; x < width; x++)
747             {
748                 dest[x] = ANGLE_ROTR16(source[x], 4);
749             }
750         }
751     }
752 }
753 
LoadRGBA4ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)754 void LoadRGBA4ToBGRA8(size_t width,
755                       size_t height,
756                       size_t depth,
757                       const uint8_t *input,
758                       size_t inputRowPitch,
759                       size_t inputDepthPitch,
760                       uint8_t *output,
761                       size_t outputRowPitch,
762                       size_t outputDepthPitch)
763 {
764     for (size_t z = 0; z < depth; z++)
765     {
766         for (size_t y = 0; y < height; y++)
767         {
768             const uint16_t *source =
769                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
770             uint8_t *dest =
771                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
772             for (size_t x = 0; x < width; x++)
773             {
774                 uint16_t rgba = source[x];
775                 dest[4 * x + 0] =
776                     static_cast<uint8_t>(((rgba & 0x00F0) << 0) | ((rgba & 0x00F0) >> 4));
777                 dest[4 * x + 1] =
778                     static_cast<uint8_t>(((rgba & 0x0F00) >> 4) | ((rgba & 0x0F00) >> 8));
779                 dest[4 * x + 2] =
780                     static_cast<uint8_t>(((rgba & 0xF000) >> 8) | ((rgba & 0xF000) >> 12));
781                 dest[4 * x + 3] =
782                     static_cast<uint8_t>(((rgba & 0x000F) << 4) | ((rgba & 0x000F) >> 0));
783             }
784         }
785     }
786 }
787 
LoadRGBA4ToRGBA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)788 void LoadRGBA4ToRGBA8(size_t width,
789                       size_t height,
790                       size_t depth,
791                       const uint8_t *input,
792                       size_t inputRowPitch,
793                       size_t inputDepthPitch,
794                       uint8_t *output,
795                       size_t outputRowPitch,
796                       size_t outputDepthPitch)
797 {
798     for (size_t z = 0; z < depth; z++)
799     {
800         for (size_t y = 0; y < height; y++)
801         {
802             const uint16_t *source =
803                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
804             uint8_t *dest =
805                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
806             for (size_t x = 0; x < width; x++)
807             {
808                 uint16_t rgba = source[x];
809                 dest[4 * x + 0] =
810                     static_cast<uint8_t>(((rgba & 0xF000) >> 8) | ((rgba & 0xF000) >> 12));
811                 dest[4 * x + 1] =
812                     static_cast<uint8_t>(((rgba & 0x0F00) >> 4) | ((rgba & 0x0F00) >> 8));
813                 dest[4 * x + 2] =
814                     static_cast<uint8_t>(((rgba & 0x00F0) << 0) | ((rgba & 0x00F0) >> 4));
815                 dest[4 * x + 3] =
816                     static_cast<uint8_t>(((rgba & 0x000F) << 4) | ((rgba & 0x000F) >> 0));
817             }
818         }
819     }
820 }
821 
LoadBGRA4ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)822 void LoadBGRA4ToBGRA8(size_t width,
823                       size_t height,
824                       size_t depth,
825                       const uint8_t *input,
826                       size_t inputRowPitch,
827                       size_t inputDepthPitch,
828                       uint8_t *output,
829                       size_t outputRowPitch,
830                       size_t outputDepthPitch)
831 {
832     for (size_t z = 0; z < depth; z++)
833     {
834         for (size_t y = 0; y < height; y++)
835         {
836             const uint16_t *source =
837                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
838             uint8_t *dest =
839                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
840             for (size_t x = 0; x < width; x++)
841             {
842                 uint16_t bgra = source[x];
843                 dest[4 * x + 0] =
844                     static_cast<uint8_t>(((bgra & 0xF000) >> 8) | ((bgra & 0xF000) >> 12));
845                 dest[4 * x + 1] =
846                     static_cast<uint8_t>(((bgra & 0x0F00) >> 4) | ((bgra & 0x0F00) >> 8));
847                 dest[4 * x + 2] =
848                     static_cast<uint8_t>(((bgra & 0x00F0) << 0) | ((bgra & 0x00F0) >> 4));
849                 dest[4 * x + 3] =
850                     static_cast<uint8_t>(((bgra & 0x000F) << 4) | ((bgra & 0x000F) >> 0));
851             }
852         }
853     }
854 }
855 
LoadRGBA8ToBGR5A1(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)856 void LoadRGBA8ToBGR5A1(size_t width,
857                        size_t height,
858                        size_t depth,
859                        const uint8_t *input,
860                        size_t inputRowPitch,
861                        size_t inputDepthPitch,
862                        uint8_t *output,
863                        size_t outputRowPitch,
864                        size_t outputDepthPitch)
865 {
866     for (size_t z = 0; z < depth; z++)
867     {
868         for (size_t y = 0; y < height; y++)
869         {
870             const uint32_t *source =
871                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
872             uint16_t *dest =
873                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
874             for (size_t x = 0; x < width; x++)
875             {
876                 uint32_t rgba8 = source[x];
877                 auto r5        = static_cast<uint16_t>((rgba8 & 0x000000FF) >> 3);
878                 auto g5        = static_cast<uint16_t>((rgba8 & 0x0000FF00) >> 11);
879                 auto b5        = static_cast<uint16_t>((rgba8 & 0x00FF0000) >> 19);
880                 auto a1        = static_cast<uint16_t>((rgba8 & 0xFF000000) >> 31);
881                 dest[x]        = (a1 << 15) | (r5 << 10) | (g5 << 5) | b5;
882             }
883         }
884     }
885 }
886 
LoadRGBA8ToRGB5A1(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)887 void LoadRGBA8ToRGB5A1(size_t width,
888                        size_t height,
889                        size_t depth,
890                        const uint8_t *input,
891                        size_t inputRowPitch,
892                        size_t inputDepthPitch,
893                        uint8_t *output,
894                        size_t outputRowPitch,
895                        size_t outputDepthPitch)
896 {
897     for (size_t z = 0; z < depth; z++)
898     {
899         for (size_t y = 0; y < height; y++)
900         {
901             const uint32_t *source =
902                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
903             uint16_t *dest =
904                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
905             for (size_t x = 0; x < width; x++)
906             {
907                 uint32_t rgba8 = source[x];
908                 auto r5        = static_cast<uint16_t>((rgba8 & 0x000000FF) >> 3);
909                 auto g5        = static_cast<uint16_t>((rgba8 & 0x0000FF00) >> 11);
910                 auto b5        = static_cast<uint16_t>((rgba8 & 0x00FF0000) >> 19);
911                 auto a1        = static_cast<uint16_t>((rgba8 & 0xFF000000) >> 31);
912                 dest[x]        = (r5 << 11) | (g5 << 6) | (b5 << 1) | a1;
913             }
914         }
915     }
916 }
917 
LoadRGB10A2ToBGR5A1(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)918 void LoadRGB10A2ToBGR5A1(size_t width,
919                          size_t height,
920                          size_t depth,
921                          const uint8_t *input,
922                          size_t inputRowPitch,
923                          size_t inputDepthPitch,
924                          uint8_t *output,
925                          size_t outputRowPitch,
926                          size_t outputDepthPitch)
927 {
928     for (size_t z = 0; z < depth; z++)
929     {
930         for (size_t y = 0; y < height; y++)
931         {
932             const R10G10B10A2 *source =
933                 priv::OffsetDataPointer<R10G10B10A2>(input, y, z, inputRowPitch, inputDepthPitch);
934             uint16_t *dest =
935                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
936             for (size_t x = 0; x < width; x++)
937             {
938                 R10G10B10A2 rgb10a2 = source[x];
939 
940                 uint16_t r5 = static_cast<uint16_t>(rgb10a2.R >> 5u);
941                 uint16_t g5 = static_cast<uint16_t>(rgb10a2.G >> 5u);
942                 uint16_t b5 = static_cast<uint16_t>(rgb10a2.B >> 5u);
943                 uint16_t a1 = static_cast<uint16_t>(rgb10a2.A >> 1u);
944 
945                 dest[x] = (a1 << 15) | (r5 << 10) | (g5 << 5) | b5;
946             }
947         }
948     }
949 }
950 
LoadRGB10A2ToRGB5A1(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)951 void LoadRGB10A2ToRGB5A1(size_t width,
952                          size_t height,
953                          size_t depth,
954                          const uint8_t *input,
955                          size_t inputRowPitch,
956                          size_t inputDepthPitch,
957                          uint8_t *output,
958                          size_t outputRowPitch,
959                          size_t outputDepthPitch)
960 {
961     for (size_t z = 0; z < depth; z++)
962     {
963         for (size_t y = 0; y < height; y++)
964         {
965             const R10G10B10A2 *source =
966                 priv::OffsetDataPointer<R10G10B10A2>(input, y, z, inputRowPitch, inputDepthPitch);
967             uint16_t *dest =
968                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
969             for (size_t x = 0; x < width; x++)
970             {
971                 R10G10B10A2 rgb10a2 = source[x];
972 
973                 uint16_t r5 = static_cast<uint16_t>(rgb10a2.R >> 5u);
974                 uint16_t g5 = static_cast<uint16_t>(rgb10a2.G >> 5u);
975                 uint16_t b5 = static_cast<uint16_t>(rgb10a2.B >> 5u);
976                 uint16_t a1 = static_cast<uint16_t>(rgb10a2.A >> 1u);
977 
978                 dest[x] = (r5 << 11) | (g5 << 6) | (b5 << 1) | a1;
979             }
980         }
981     }
982 }
983 
LoadRGB5A1ToA1RGB5(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)984 void LoadRGB5A1ToA1RGB5(size_t width,
985                         size_t height,
986                         size_t depth,
987                         const uint8_t *input,
988                         size_t inputRowPitch,
989                         size_t inputDepthPitch,
990                         uint8_t *output,
991                         size_t outputRowPitch,
992                         size_t outputDepthPitch)
993 {
994     for (size_t z = 0; z < depth; z++)
995     {
996         for (size_t y = 0; y < height; y++)
997         {
998             const uint16_t *source =
999                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1000             uint16_t *dest =
1001                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1002             for (size_t x = 0; x < width; x++)
1003             {
1004                 dest[x] = ANGLE_ROTR16(source[x], 1);
1005             }
1006         }
1007     }
1008 }
1009 
LoadRGB5A1ToBGR5A1(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1010 void LoadRGB5A1ToBGR5A1(size_t width,
1011                         size_t height,
1012                         size_t depth,
1013                         const uint8_t *input,
1014                         size_t inputRowPitch,
1015                         size_t inputDepthPitch,
1016                         uint8_t *output,
1017                         size_t outputRowPitch,
1018                         size_t outputDepthPitch)
1019 {
1020     for (size_t z = 0; z < depth; z++)
1021     {
1022         for (size_t y = 0; y < height; y++)
1023         {
1024             const uint16_t *source =
1025                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1026             uint16_t *dest =
1027                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1028             for (size_t x = 0; x < width; x++)
1029             {
1030                 uint16_t rgba = source[x];
1031                 auto r5       = static_cast<uint16_t>((rgba & 0xF800) >> 11);
1032                 auto g5       = static_cast<uint16_t>((rgba & 0x07c0) >> 6);
1033                 auto b5       = static_cast<uint16_t>((rgba & 0x003e) >> 1);
1034                 auto a1       = static_cast<uint16_t>((rgba & 0x0001));
1035                 dest[x]       = (b5 << 11) | (g5 << 6) | (r5 << 1) | a1;
1036             }
1037         }
1038     }
1039 }
1040 
LoadRGB5A1ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1041 void LoadRGB5A1ToBGRA8(size_t width,
1042                        size_t height,
1043                        size_t depth,
1044                        const uint8_t *input,
1045                        size_t inputRowPitch,
1046                        size_t inputDepthPitch,
1047                        uint8_t *output,
1048                        size_t outputRowPitch,
1049                        size_t outputDepthPitch)
1050 {
1051     for (size_t z = 0; z < depth; z++)
1052     {
1053         for (size_t y = 0; y < height; y++)
1054         {
1055             const uint16_t *source =
1056                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1057             uint8_t *dest =
1058                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
1059             for (size_t x = 0; x < width; x++)
1060             {
1061                 uint16_t rgba = source[x];
1062                 dest[4 * x + 0] =
1063                     static_cast<uint8_t>(((rgba & 0x003E) << 2) | ((rgba & 0x003E) >> 3));
1064                 dest[4 * x + 1] =
1065                     static_cast<uint8_t>(((rgba & 0x07C0) >> 3) | ((rgba & 0x07C0) >> 8));
1066                 dest[4 * x + 2] =
1067                     static_cast<uint8_t>(((rgba & 0xF800) >> 8) | ((rgba & 0xF800) >> 13));
1068                 dest[4 * x + 3] = static_cast<uint8_t>((rgba & 0x0001) ? 0xFF : 0);
1069             }
1070         }
1071     }
1072 }
1073 
LoadRGB5A1ToRGBA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1074 void LoadRGB5A1ToRGBA8(size_t width,
1075                        size_t height,
1076                        size_t depth,
1077                        const uint8_t *input,
1078                        size_t inputRowPitch,
1079                        size_t inputDepthPitch,
1080                        uint8_t *output,
1081                        size_t outputRowPitch,
1082                        size_t outputDepthPitch)
1083 {
1084     for (size_t z = 0; z < depth; z++)
1085     {
1086         for (size_t y = 0; y < height; y++)
1087         {
1088             const uint16_t *source =
1089                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1090             uint8_t *dest =
1091                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
1092             for (size_t x = 0; x < width; x++)
1093             {
1094                 uint16_t rgba = source[x];
1095                 dest[4 * x + 0] =
1096                     static_cast<uint8_t>(((rgba & 0xF800) >> 8) | ((rgba & 0xF800) >> 13));
1097                 dest[4 * x + 1] =
1098                     static_cast<uint8_t>(((rgba & 0x07C0) >> 3) | ((rgba & 0x07C0) >> 8));
1099                 dest[4 * x + 2] =
1100                     static_cast<uint8_t>(((rgba & 0x003E) << 2) | ((rgba & 0x003E) >> 3));
1101                 dest[4 * x + 3] = static_cast<uint8_t>((rgba & 0x0001) ? 0xFF : 0);
1102             }
1103         }
1104     }
1105 }
1106 
LoadBGR5A1ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1107 void LoadBGR5A1ToBGRA8(size_t width,
1108                        size_t height,
1109                        size_t depth,
1110                        const uint8_t *input,
1111                        size_t inputRowPitch,
1112                        size_t inputDepthPitch,
1113                        uint8_t *output,
1114                        size_t outputRowPitch,
1115                        size_t outputDepthPitch)
1116 {
1117     for (size_t z = 0; z < depth; z++)
1118     {
1119         for (size_t y = 0; y < height; y++)
1120         {
1121             const uint16_t *source =
1122                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1123             uint8_t *dest =
1124                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
1125             for (size_t x = 0; x < width; x++)
1126             {
1127                 uint16_t bgra = source[x];
1128                 dest[4 * x + 0] =
1129                     static_cast<uint8_t>(((bgra & 0xF800) >> 8) | ((bgra & 0xF800) >> 13));
1130                 dest[4 * x + 1] =
1131                     static_cast<uint8_t>(((bgra & 0x07C0) >> 3) | ((bgra & 0x07C0) >> 8));
1132                 dest[4 * x + 2] =
1133                     static_cast<uint8_t>(((bgra & 0x003E) << 2) | ((bgra & 0x003E) >> 3));
1134                 dest[4 * x + 3] = static_cast<uint8_t>((bgra & 0x0001) ? 0xFF : 0);
1135             }
1136         }
1137     }
1138 }
1139 
LoadRGB10A2ToRGBA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1140 void LoadRGB10A2ToRGBA8(size_t width,
1141                         size_t height,
1142                         size_t depth,
1143                         const uint8_t *input,
1144                         size_t inputRowPitch,
1145                         size_t inputDepthPitch,
1146                         uint8_t *output,
1147                         size_t outputRowPitch,
1148                         size_t outputDepthPitch)
1149 {
1150     for (size_t z = 0; z < depth; z++)
1151     {
1152         for (size_t y = 0; y < height; y++)
1153         {
1154             const uint32_t *source =
1155                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1156             uint8_t *dest =
1157                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
1158             for (size_t x = 0; x < width; x++)
1159             {
1160                 uint32_t rgba   = source[x];
1161                 dest[4 * x + 0] = static_cast<uint8_t>((rgba & 0x000003FF) >> 2);
1162                 dest[4 * x + 1] = static_cast<uint8_t>((rgba & 0x000FFC00) >> 12);
1163                 dest[4 * x + 2] = static_cast<uint8_t>((rgba & 0x3FF00000) >> 22);
1164                 dest[4 * x + 3] = static_cast<uint8_t>(((rgba & 0xC0000000) >> 30) * 0x55);
1165             }
1166         }
1167     }
1168 }
1169 
LoadRGB10A2ToRGB10X2(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1170 void LoadRGB10A2ToRGB10X2(size_t width,
1171                           size_t height,
1172                           size_t depth,
1173                           const uint8_t *input,
1174                           size_t inputRowPitch,
1175                           size_t inputDepthPitch,
1176                           uint8_t *output,
1177                           size_t outputRowPitch,
1178                           size_t outputDepthPitch)
1179 {
1180     for (size_t z = 0; z < depth; z++)
1181     {
1182         for (size_t y = 0; y < height; y++)
1183         {
1184             const uint32_t *source =
1185                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1186             uint32_t *dest =
1187                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1188             for (size_t x = 0; x < width; x++)
1189             {
1190                 dest[x] = source[x] | 0xC0000000;
1191             }
1192         }
1193     }
1194 }
1195 
LoadRGB16FToRGB9E5(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1196 void LoadRGB16FToRGB9E5(size_t width,
1197                         size_t height,
1198                         size_t depth,
1199                         const uint8_t *input,
1200                         size_t inputRowPitch,
1201                         size_t inputDepthPitch,
1202                         uint8_t *output,
1203                         size_t outputRowPitch,
1204                         size_t outputDepthPitch)
1205 {
1206     for (size_t z = 0; z < depth; z++)
1207     {
1208         for (size_t y = 0; y < height; y++)
1209         {
1210             const uint16_t *source =
1211                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1212             uint32_t *dest =
1213                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1214             for (size_t x = 0; x < width; x++)
1215             {
1216                 dest[x] = gl::convertRGBFloatsTo999E5(gl::float16ToFloat32(source[x * 3 + 0]),
1217                                                       gl::float16ToFloat32(source[x * 3 + 1]),
1218                                                       gl::float16ToFloat32(source[x * 3 + 2]));
1219             }
1220         }
1221     }
1222 }
1223 
LoadRGB32FToRGB9E5(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1224 void LoadRGB32FToRGB9E5(size_t width,
1225                         size_t height,
1226                         size_t depth,
1227                         const uint8_t *input,
1228                         size_t inputRowPitch,
1229                         size_t inputDepthPitch,
1230                         uint8_t *output,
1231                         size_t outputRowPitch,
1232                         size_t outputDepthPitch)
1233 {
1234     for (size_t z = 0; z < depth; z++)
1235     {
1236         for (size_t y = 0; y < height; y++)
1237         {
1238             const float *source =
1239                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1240             uint32_t *dest =
1241                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1242             for (size_t x = 0; x < width; x++)
1243             {
1244                 dest[x] = gl::convertRGBFloatsTo999E5(source[x * 3 + 0], source[x * 3 + 1],
1245                                                       source[x * 3 + 2]);
1246             }
1247         }
1248     }
1249 }
1250 
LoadRGB16FToRG11B10F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1251 void LoadRGB16FToRG11B10F(size_t width,
1252                           size_t height,
1253                           size_t depth,
1254                           const uint8_t *input,
1255                           size_t inputRowPitch,
1256                           size_t inputDepthPitch,
1257                           uint8_t *output,
1258                           size_t outputRowPitch,
1259                           size_t outputDepthPitch)
1260 {
1261     for (size_t z = 0; z < depth; z++)
1262     {
1263         for (size_t y = 0; y < height; y++)
1264         {
1265             const uint16_t *source =
1266                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1267             uint32_t *dest =
1268                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1269             for (size_t x = 0; x < width; x++)
1270             {
1271                 dest[x] = (gl::float32ToFloat11(gl::float16ToFloat32(source[x * 3 + 0])) << 0) |
1272                           (gl::float32ToFloat11(gl::float16ToFloat32(source[x * 3 + 1])) << 11) |
1273                           (gl::float32ToFloat10(gl::float16ToFloat32(source[x * 3 + 2])) << 22);
1274             }
1275         }
1276     }
1277 }
1278 
LoadRGB32FToRG11B10F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1279 void LoadRGB32FToRG11B10F(size_t width,
1280                           size_t height,
1281                           size_t depth,
1282                           const uint8_t *input,
1283                           size_t inputRowPitch,
1284                           size_t inputDepthPitch,
1285                           uint8_t *output,
1286                           size_t outputRowPitch,
1287                           size_t outputDepthPitch)
1288 {
1289     for (size_t z = 0; z < depth; z++)
1290     {
1291         for (size_t y = 0; y < height; y++)
1292         {
1293             const float *source =
1294                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1295             uint32_t *dest =
1296                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1297             for (size_t x = 0; x < width; x++)
1298             {
1299                 dest[x] = (gl::float32ToFloat11(source[x * 3 + 0]) << 0) |
1300                           (gl::float32ToFloat11(source[x * 3 + 1]) << 11) |
1301                           (gl::float32ToFloat10(source[x * 3 + 2]) << 22);
1302             }
1303         }
1304     }
1305 }
1306 
LoadG8R24ToR24G8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1307 void LoadG8R24ToR24G8(size_t width,
1308                       size_t height,
1309                       size_t depth,
1310                       const uint8_t *input,
1311                       size_t inputRowPitch,
1312                       size_t inputDepthPitch,
1313                       uint8_t *output,
1314                       size_t outputRowPitch,
1315                       size_t outputDepthPitch)
1316 {
1317     for (size_t z = 0; z < depth; z++)
1318     {
1319         for (size_t y = 0; y < height; y++)
1320         {
1321             const uint32_t *source =
1322                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1323             uint32_t *dest =
1324                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1325             for (size_t x = 0; x < width; x++)
1326             {
1327                 uint32_t d = source[x] >> 8;
1328                 uint8_t s  = source[x] & 0xFF;
1329                 dest[x]    = d | (s << 24);
1330             }
1331         }
1332     }
1333 }
1334 
LoadD24S8ToD32FS8X24(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1335 void LoadD24S8ToD32FS8X24(size_t width,
1336                           size_t height,
1337                           size_t depth,
1338                           const uint8_t *input,
1339                           size_t inputRowPitch,
1340                           size_t inputDepthPitch,
1341                           uint8_t *output,
1342                           size_t outputRowPitch,
1343                           size_t outputDepthPitch)
1344 {
1345     for (size_t z = 0; z < depth; z++)
1346     {
1347         for (size_t y = 0; y < height; y++)
1348         {
1349             const uint32_t *source =
1350                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1351             float *destDepth =
1352                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1353             uint32_t *destStencil =
1354                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch) +
1355                 1;
1356             for (size_t x = 0; x < width; x++)
1357             {
1358                 destDepth[x * 2]   = (source[x] & 0xFFFFFF) / static_cast<float>(0xFFFFFF);
1359                 destStencil[x * 2] = source[x] & 0xFF000000;
1360             }
1361         }
1362     }
1363 }
1364 
LoadD24S8ToD32F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1365 void LoadD24S8ToD32F(size_t width,
1366                      size_t height,
1367                      size_t depth,
1368                      const uint8_t *input,
1369                      size_t inputRowPitch,
1370                      size_t inputDepthPitch,
1371                      uint8_t *output,
1372                      size_t outputRowPitch,
1373                      size_t outputDepthPitch)
1374 {
1375     for (size_t z = 0; z < depth; z++)
1376     {
1377         for (size_t y = 0; y < height; y++)
1378         {
1379             const uint32_t *source =
1380                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1381             float *destDepth =
1382                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1383             for (size_t x = 0; x < width; x++)
1384             {
1385                 uint32_t sourcePixel = (source[x] >> 8) & 0xFFFFFF;
1386                 destDepth[x]         = sourcePixel / static_cast<float>(0xFFFFFF);
1387             }
1388         }
1389     }
1390 }
1391 
LoadD32ToD32FX32(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1392 void LoadD32ToD32FX32(size_t width,
1393                       size_t height,
1394                       size_t depth,
1395                       const uint8_t *input,
1396                       size_t inputRowPitch,
1397                       size_t inputDepthPitch,
1398                       uint8_t *output,
1399                       size_t outputRowPitch,
1400                       size_t outputDepthPitch)
1401 {
1402     for (size_t z = 0; z < depth; z++)
1403     {
1404         for (size_t y = 0; y < height; y++)
1405         {
1406             const uint32_t *source =
1407                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1408             float *destDepth =
1409                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1410             for (size_t x = 0; x < width; x++)
1411             {
1412                 destDepth[x * 2] = source[x] / static_cast<float>(0xFFFFFFFF);
1413             }
1414         }
1415     }
1416 }
1417 
LoadD32ToD32F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1418 void LoadD32ToD32F(size_t width,
1419                    size_t height,
1420                    size_t depth,
1421                    const uint8_t *input,
1422                    size_t inputRowPitch,
1423                    size_t inputDepthPitch,
1424                    uint8_t *output,
1425                    size_t outputRowPitch,
1426                    size_t outputDepthPitch)
1427 {
1428     for (size_t z = 0; z < depth; z++)
1429     {
1430         for (size_t y = 0; y < height; y++)
1431         {
1432             const uint32_t *source =
1433                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1434             float *destDepth =
1435                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1436             for (size_t x = 0; x < width; x++)
1437             {
1438                 uint32_t sourcePixel = source[x];
1439                 destDepth[x]         = sourcePixel / static_cast<float>(0xFFFFFFFF);
1440             }
1441         }
1442     }
1443 }
1444 
LoadD32FToD32F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1445 void LoadD32FToD32F(size_t width,
1446                     size_t height,
1447                     size_t depth,
1448                     const uint8_t *input,
1449                     size_t inputRowPitch,
1450                     size_t inputDepthPitch,
1451                     uint8_t *output,
1452                     size_t outputRowPitch,
1453                     size_t outputDepthPitch)
1454 {
1455     for (size_t z = 0; z < depth; z++)
1456     {
1457         for (size_t y = 0; y < height; y++)
1458         {
1459             const float *source =
1460                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1461             float *dest =
1462                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1463             for (size_t x = 0; x < width; x++)
1464             {
1465                 dest[x] = gl::clamp01(source[x]);
1466             }
1467         }
1468     }
1469 }
1470 
LoadD32FS8X24ToD24S8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1471 void LoadD32FS8X24ToD24S8(size_t width,
1472                           size_t height,
1473                           size_t depth,
1474                           const uint8_t *input,
1475                           size_t inputRowPitch,
1476                           size_t inputDepthPitch,
1477                           uint8_t *output,
1478                           size_t outputRowPitch,
1479                           size_t outputDepthPitch)
1480 {
1481     for (size_t z = 0; z < depth; z++)
1482     {
1483         for (size_t y = 0; y < height; y++)
1484         {
1485             const float *sourceDepth =
1486                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1487             const uint32_t *sourceStencil =
1488                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch) + 1;
1489             uint32_t *dest =
1490                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1491             for (size_t x = 0; x < width; x++)
1492             {
1493                 uint32_t d = static_cast<uint32_t>(gl::clamp01(sourceDepth[x * 2]) * 0xFFFFFF);
1494                 uint32_t s = sourceStencil[x * 2] & 0xFF000000;
1495                 dest[x]    = d | s;
1496             }
1497         }
1498     }
1499 }
1500 
LoadX24S8ToS8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1501 void LoadX24S8ToS8(size_t width,
1502                    size_t height,
1503                    size_t depth,
1504                    const uint8_t *input,
1505                    size_t inputRowPitch,
1506                    size_t inputDepthPitch,
1507                    uint8_t *output,
1508                    size_t outputRowPitch,
1509                    size_t outputDepthPitch)
1510 {
1511     for (size_t z = 0; z < depth; z++)
1512     {
1513         for (size_t y = 0; y < height; y++)
1514         {
1515             const uint32_t *source = reinterpret_cast<const uint32_t *>(
1516                 input + (y * inputRowPitch) + (z * inputDepthPitch));
1517             uint8_t *destStencil =
1518                 reinterpret_cast<uint8_t *>(output + (y * outputRowPitch) + (z * outputDepthPitch));
1519             for (size_t x = 0; x < width; x++)
1520             {
1521                 destStencil[x] = (source[x] & 0xFF);
1522             }
1523         }
1524     }
1525 }
1526 
LoadX32S8ToS8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1527 void LoadX32S8ToS8(size_t width,
1528                    size_t height,
1529                    size_t depth,
1530                    const uint8_t *input,
1531                    size_t inputRowPitch,
1532                    size_t inputDepthPitch,
1533                    uint8_t *output,
1534                    size_t outputRowPitch,
1535                    size_t outputDepthPitch)
1536 {
1537     for (size_t z = 0; z < depth; z++)
1538     {
1539         for (size_t y = 0; y < height; y++)
1540         {
1541             const uint32_t *source = reinterpret_cast<const uint32_t *>(
1542                 input + (y * inputRowPitch) + (z * inputDepthPitch));
1543             uint8_t *destStencil =
1544                 reinterpret_cast<uint8_t *>(output + (y * outputRowPitch) + (z * outputDepthPitch));
1545             for (size_t x = 0; x < width; x++)
1546             {
1547                 destStencil[x] = (source[(x * 2) + 1] & 0xFF);
1548             }
1549         }
1550     }
1551 }
1552 
LoadD32FS8X24ToD32F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1553 void LoadD32FS8X24ToD32F(size_t width,
1554                          size_t height,
1555                          size_t depth,
1556                          const uint8_t *input,
1557                          size_t inputRowPitch,
1558                          size_t inputDepthPitch,
1559                          uint8_t *output,
1560                          size_t outputRowPitch,
1561                          size_t outputDepthPitch)
1562 {
1563     for (size_t z = 0; z < depth; z++)
1564     {
1565         for (size_t y = 0; y < height; y++)
1566         {
1567             const float *sourceDepth =
1568                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1569             float *destDepth =
1570                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1571             for (size_t x = 0; x < width; x++)
1572             {
1573                 destDepth[x] = gl::clamp01(sourceDepth[x * 2]);
1574             }
1575         }
1576     }
1577 }
1578 
LoadD32FS8X24ToD32FS8X24(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1579 void LoadD32FS8X24ToD32FS8X24(size_t width,
1580                               size_t height,
1581                               size_t depth,
1582                               const uint8_t *input,
1583                               size_t inputRowPitch,
1584                               size_t inputDepthPitch,
1585                               uint8_t *output,
1586                               size_t outputRowPitch,
1587                               size_t outputDepthPitch)
1588 {
1589     for (size_t z = 0; z < depth; z++)
1590     {
1591         for (size_t y = 0; y < height; y++)
1592         {
1593             const float *sourceDepth =
1594                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1595             const uint32_t *sourceStencil =
1596                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch) + 1;
1597             float *destDepth =
1598                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1599             uint32_t *destStencil =
1600                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch) +
1601                 1;
1602             for (size_t x = 0; x < width; x++)
1603             {
1604                 destDepth[x * 2]   = gl::clamp01(sourceDepth[x * 2]);
1605                 destStencil[x * 2] = sourceStencil[x * 2] & 0xFF000000;
1606             }
1607         }
1608     }
1609 }
1610 
LoadRGB32FToRGBA16F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1611 void LoadRGB32FToRGBA16F(size_t width,
1612                          size_t height,
1613                          size_t depth,
1614                          const uint8_t *input,
1615                          size_t inputRowPitch,
1616                          size_t inputDepthPitch,
1617                          uint8_t *output,
1618                          size_t outputRowPitch,
1619                          size_t outputDepthPitch)
1620 {
1621     for (size_t z = 0; z < depth; z++)
1622     {
1623         for (size_t y = 0; y < height; y++)
1624         {
1625             const float *source =
1626                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1627             uint16_t *dest =
1628                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1629             for (size_t x = 0; x < width; x++)
1630             {
1631                 dest[x * 4 + 0] = gl::float32ToFloat16(source[x * 3 + 0]);
1632                 dest[x * 4 + 1] = gl::float32ToFloat16(source[x * 3 + 1]);
1633                 dest[x * 4 + 2] = gl::float32ToFloat16(source[x * 3 + 2]);
1634                 dest[x * 4 + 3] = gl::Float16One;
1635             }
1636         }
1637     }
1638 }
1639 
LoadRGB32FToRGB16F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1640 void LoadRGB32FToRGB16F(size_t width,
1641                         size_t height,
1642                         size_t depth,
1643                         const uint8_t *input,
1644                         size_t inputRowPitch,
1645                         size_t inputDepthPitch,
1646                         uint8_t *output,
1647                         size_t outputRowPitch,
1648                         size_t outputDepthPitch)
1649 {
1650     for (size_t z = 0; z < depth; z++)
1651     {
1652         for (size_t y = 0; y < height; y++)
1653         {
1654             const float *source =
1655                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1656             uint16_t *dest =
1657                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1658             for (size_t x = 0; x < width; x++)
1659             {
1660                 dest[x * 3 + 0] = gl::float32ToFloat16(source[x * 3 + 0]);
1661                 dest[x * 3 + 1] = gl::float32ToFloat16(source[x * 3 + 1]);
1662                 dest[x * 3 + 2] = gl::float32ToFloat16(source[x * 3 + 2]);
1663             }
1664         }
1665     }
1666 }
1667 
LoadR32ToR16(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1668 void LoadR32ToR16(size_t width,
1669                   size_t height,
1670                   size_t depth,
1671                   const uint8_t *input,
1672                   size_t inputRowPitch,
1673                   size_t inputDepthPitch,
1674                   uint8_t *output,
1675                   size_t outputRowPitch,
1676                   size_t outputDepthPitch)
1677 {
1678     for (size_t z = 0; z < depth; z++)
1679     {
1680         for (size_t y = 0; y < height; y++)
1681         {
1682             const uint32_t *source =
1683                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1684             uint16_t *dest =
1685                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1686             for (size_t x = 0; x < width; x++)
1687             {
1688                 dest[x] = source[x] >> 16;
1689             }
1690         }
1691     }
1692 }
1693 
LoadR32ToR24G8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1694 void LoadR32ToR24G8(size_t width,
1695                     size_t height,
1696                     size_t depth,
1697                     const uint8_t *input,
1698                     size_t inputRowPitch,
1699                     size_t inputDepthPitch,
1700                     uint8_t *output,
1701                     size_t outputRowPitch,
1702                     size_t outputDepthPitch)
1703 {
1704     for (size_t z = 0; z < depth; z++)
1705     {
1706         for (size_t y = 0; y < height; y++)
1707         {
1708             const uint32_t *source =
1709                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1710             uint32_t *dest =
1711                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1712 
1713             for (size_t x = 0; x < width; x++)
1714             {
1715                 dest[x] = source[x] >> 8;
1716             }
1717         }
1718     }
1719 }
1720 
1721 }  // namespace angle
1722