1 //
2 // Copyright (c) 2013-2015 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 
7 // angle_loadimage.cpp: Defines image loading functions.
8 
9 #include "image_util/loadimage.h"
10 
11 #include "common/mathutil.h"
12 #include "common/platform.h"
13 #include "image_util/imageformats.h"
14 
15 namespace angle
16 {
17 
LoadA8ToRGBA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)18 void LoadA8ToRGBA8(size_t width,
19                    size_t height,
20                    size_t depth,
21                    const uint8_t *input,
22                    size_t inputRowPitch,
23                    size_t inputDepthPitch,
24                    uint8_t *output,
25                    size_t outputRowPitch,
26                    size_t outputDepthPitch)
27 {
28 #if defined(ANGLE_USE_SSE)
29     if (gl::supportsSSE2())
30     {
31         __m128i zeroWide = _mm_setzero_si128();
32 
33         for (size_t z = 0; z < depth; z++)
34         {
35             for (size_t y = 0; y < height; y++)
36             {
37                 const uint8_t *source =
38                     priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
39                 uint32_t *dest = priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch,
40                                                                    outputDepthPitch);
41 
42                 size_t x = 0;
43 
44                 // Make output writes aligned
45                 for (; ((reinterpret_cast<intptr_t>(&dest[x]) & 0xF) != 0 && x < width); x++)
46                 {
47                     dest[x] = static_cast<uint32_t>(source[x]) << 24;
48                 }
49 
50                 for (; x + 7 < width; x += 8)
51                 {
52                     __m128i sourceData =
53                         _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&source[x]));
54                     // Interleave each byte to 16bit, make the lower byte to zero
55                     sourceData = _mm_unpacklo_epi8(zeroWide, sourceData);
56                     // Interleave each 16bit to 32bit, make the lower 16bit to zero
57                     __m128i lo = _mm_unpacklo_epi16(zeroWide, sourceData);
58                     __m128i hi = _mm_unpackhi_epi16(zeroWide, sourceData);
59 
60                     _mm_store_si128(reinterpret_cast<__m128i *>(&dest[x]), lo);
61                     _mm_store_si128(reinterpret_cast<__m128i *>(&dest[x + 4]), hi);
62                 }
63 
64                 // Handle the remainder
65                 for (; x < width; x++)
66                 {
67                     dest[x] = static_cast<uint32_t>(source[x]) << 24;
68                 }
69             }
70         }
71 
72         return;
73     }
74 #endif
75 
76     for (size_t z = 0; z < depth; z++)
77     {
78         for (size_t y = 0; y < height; y++)
79         {
80             const uint8_t *source =
81                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
82             uint32_t *dest =
83                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
84             for (size_t x = 0; x < width; x++)
85             {
86                 dest[x] = static_cast<uint32_t>(source[x]) << 24;
87             }
88         }
89     }
90 }
91 
LoadA8ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)92 void LoadA8ToBGRA8(size_t width,
93                    size_t height,
94                    size_t depth,
95                    const uint8_t *input,
96                    size_t inputRowPitch,
97                    size_t inputDepthPitch,
98                    uint8_t *output,
99                    size_t outputRowPitch,
100                    size_t outputDepthPitch)
101 {
102     // Same as loading to RGBA
103     LoadA8ToRGBA8(width, height, depth, input, inputRowPitch, inputDepthPitch, output,
104                   outputRowPitch, outputDepthPitch);
105 }
106 
LoadA32FToRGBA32F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)107 void LoadA32FToRGBA32F(size_t width,
108                        size_t height,
109                        size_t depth,
110                        const uint8_t *input,
111                        size_t inputRowPitch,
112                        size_t inputDepthPitch,
113                        uint8_t *output,
114                        size_t outputRowPitch,
115                        size_t outputDepthPitch)
116 {
117     for (size_t z = 0; z < depth; z++)
118     {
119         for (size_t y = 0; y < height; y++)
120         {
121             const float *source =
122                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
123             float *dest =
124                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
125             for (size_t x = 0; x < width; x++)
126             {
127                 dest[4 * x + 0] = 0.0f;
128                 dest[4 * x + 1] = 0.0f;
129                 dest[4 * x + 2] = 0.0f;
130                 dest[4 * x + 3] = source[x];
131             }
132         }
133     }
134 }
135 
LoadA16FToRGBA16F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)136 void LoadA16FToRGBA16F(size_t width,
137                        size_t height,
138                        size_t depth,
139                        const uint8_t *input,
140                        size_t inputRowPitch,
141                        size_t inputDepthPitch,
142                        uint8_t *output,
143                        size_t outputRowPitch,
144                        size_t outputDepthPitch)
145 {
146     for (size_t z = 0; z < depth; z++)
147     {
148         for (size_t y = 0; y < height; y++)
149         {
150             const uint16_t *source =
151                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
152             uint16_t *dest =
153                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
154             for (size_t x = 0; x < width; x++)
155             {
156                 dest[4 * x + 0] = 0;
157                 dest[4 * x + 1] = 0;
158                 dest[4 * x + 2] = 0;
159                 dest[4 * x + 3] = source[x];
160             }
161         }
162     }
163 }
164 
LoadL8ToRGBA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)165 void LoadL8ToRGBA8(size_t width,
166                    size_t height,
167                    size_t depth,
168                    const uint8_t *input,
169                    size_t inputRowPitch,
170                    size_t inputDepthPitch,
171                    uint8_t *output,
172                    size_t outputRowPitch,
173                    size_t outputDepthPitch)
174 {
175     for (size_t z = 0; z < depth; z++)
176     {
177         for (size_t y = 0; y < height; y++)
178         {
179             const uint8_t *source =
180                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
181             uint8_t *dest =
182                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
183             for (size_t x = 0; x < width; x++)
184             {
185                 uint8_t sourceVal = source[x];
186                 dest[4 * x + 0]   = sourceVal;
187                 dest[4 * x + 1]   = sourceVal;
188                 dest[4 * x + 2]   = sourceVal;
189                 dest[4 * x + 3]   = 0xFF;
190             }
191         }
192     }
193 }
194 
LoadL8ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)195 void LoadL8ToBGRA8(size_t width,
196                    size_t height,
197                    size_t depth,
198                    const uint8_t *input,
199                    size_t inputRowPitch,
200                    size_t inputDepthPitch,
201                    uint8_t *output,
202                    size_t outputRowPitch,
203                    size_t outputDepthPitch)
204 {
205     // Same as loading to RGBA
206     LoadL8ToRGBA8(width, height, depth, input, inputRowPitch, inputDepthPitch, output,
207                   outputRowPitch, outputDepthPitch);
208 }
209 
LoadL32FToRGBA32F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)210 void LoadL32FToRGBA32F(size_t width,
211                        size_t height,
212                        size_t depth,
213                        const uint8_t *input,
214                        size_t inputRowPitch,
215                        size_t inputDepthPitch,
216                        uint8_t *output,
217                        size_t outputRowPitch,
218                        size_t outputDepthPitch)
219 {
220     for (size_t z = 0; z < depth; z++)
221     {
222         for (size_t y = 0; y < height; y++)
223         {
224             const float *source =
225                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
226             float *dest =
227                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
228             for (size_t x = 0; x < width; x++)
229             {
230                 dest[4 * x + 0] = source[x];
231                 dest[4 * x + 1] = source[x];
232                 dest[4 * x + 2] = source[x];
233                 dest[4 * x + 3] = 1.0f;
234             }
235         }
236     }
237 }
238 
LoadL16FToRGBA16F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)239 void LoadL16FToRGBA16F(size_t width,
240                        size_t height,
241                        size_t depth,
242                        const uint8_t *input,
243                        size_t inputRowPitch,
244                        size_t inputDepthPitch,
245                        uint8_t *output,
246                        size_t outputRowPitch,
247                        size_t outputDepthPitch)
248 {
249     for (size_t z = 0; z < depth; z++)
250     {
251         for (size_t y = 0; y < height; y++)
252         {
253             const uint16_t *source =
254                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
255             uint16_t *dest =
256                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
257             for (size_t x = 0; x < width; x++)
258             {
259                 dest[4 * x + 0] = source[x];
260                 dest[4 * x + 1] = source[x];
261                 dest[4 * x + 2] = source[x];
262                 dest[4 * x + 3] = gl::Float16One;
263             }
264         }
265     }
266 }
267 
LoadLA8ToRGBA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)268 void LoadLA8ToRGBA8(size_t width,
269                     size_t height,
270                     size_t depth,
271                     const uint8_t *input,
272                     size_t inputRowPitch,
273                     size_t inputDepthPitch,
274                     uint8_t *output,
275                     size_t outputRowPitch,
276                     size_t outputDepthPitch)
277 {
278     for (size_t z = 0; z < depth; z++)
279     {
280         for (size_t y = 0; y < height; y++)
281         {
282             const uint8_t *source =
283                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
284             uint8_t *dest =
285                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
286             for (size_t x = 0; x < width; x++)
287             {
288                 dest[4 * x + 0] = source[2 * x + 0];
289                 dest[4 * x + 1] = source[2 * x + 0];
290                 dest[4 * x + 2] = source[2 * x + 0];
291                 dest[4 * x + 3] = source[2 * x + 1];
292             }
293         }
294     }
295 }
296 
LoadLA8ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)297 void LoadLA8ToBGRA8(size_t width,
298                     size_t height,
299                     size_t depth,
300                     const uint8_t *input,
301                     size_t inputRowPitch,
302                     size_t inputDepthPitch,
303                     uint8_t *output,
304                     size_t outputRowPitch,
305                     size_t outputDepthPitch)
306 {
307     // Same as loading to RGBA
308     LoadLA8ToRGBA8(width, height, depth, input, inputRowPitch, inputDepthPitch, output,
309                    outputRowPitch, outputDepthPitch);
310 }
311 
LoadLA32FToRGBA32F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)312 void LoadLA32FToRGBA32F(size_t width,
313                         size_t height,
314                         size_t depth,
315                         const uint8_t *input,
316                         size_t inputRowPitch,
317                         size_t inputDepthPitch,
318                         uint8_t *output,
319                         size_t outputRowPitch,
320                         size_t outputDepthPitch)
321 {
322     for (size_t z = 0; z < depth; z++)
323     {
324         for (size_t y = 0; y < height; y++)
325         {
326             const float *source =
327                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
328             float *dest =
329                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
330             for (size_t x = 0; x < width; x++)
331             {
332                 dest[4 * x + 0] = source[2 * x + 0];
333                 dest[4 * x + 1] = source[2 * x + 0];
334                 dest[4 * x + 2] = source[2 * x + 0];
335                 dest[4 * x + 3] = source[2 * x + 1];
336             }
337         }
338     }
339 }
340 
LoadLA16FToRGBA16F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)341 void LoadLA16FToRGBA16F(size_t width,
342                         size_t height,
343                         size_t depth,
344                         const uint8_t *input,
345                         size_t inputRowPitch,
346                         size_t inputDepthPitch,
347                         uint8_t *output,
348                         size_t outputRowPitch,
349                         size_t outputDepthPitch)
350 {
351     for (size_t z = 0; z < depth; z++)
352     {
353         for (size_t y = 0; y < height; y++)
354         {
355             const uint16_t *source =
356                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
357             uint16_t *dest =
358                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
359             for (size_t x = 0; x < width; x++)
360             {
361                 dest[4 * x + 0] = source[2 * x + 0];
362                 dest[4 * x + 1] = source[2 * x + 0];
363                 dest[4 * x + 2] = source[2 * x + 0];
364                 dest[4 * x + 3] = source[2 * x + 1];
365             }
366         }
367     }
368 }
369 
LoadRGB8ToBGR565(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)370 void LoadRGB8ToBGR565(size_t width,
371                       size_t height,
372                       size_t depth,
373                       const uint8_t *input,
374                       size_t inputRowPitch,
375                       size_t inputDepthPitch,
376                       uint8_t *output,
377                       size_t outputRowPitch,
378                       size_t outputDepthPitch)
379 {
380     for (size_t z = 0; z < depth; z++)
381     {
382         for (size_t y = 0; y < height; y++)
383         {
384             const uint8_t *source =
385                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
386             uint16_t *dest =
387                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
388             for (size_t x = 0; x < width; x++)
389             {
390                 uint8_t r8 = source[x * 3 + 0];
391                 uint8_t g8 = source[x * 3 + 1];
392                 uint8_t b8 = source[x * 3 + 2];
393                 auto r5    = static_cast<uint16_t>(r8 >> 3);
394                 auto g6    = static_cast<uint16_t>(g8 >> 2);
395                 auto b5    = static_cast<uint16_t>(b8 >> 3);
396                 dest[x]    = (r5 << 11) | (g6 << 5) | b5;
397             }
398         }
399     }
400 }
401 
LoadRGB565ToBGR565(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)402 void LoadRGB565ToBGR565(size_t width,
403                         size_t height,
404                         size_t depth,
405                         const uint8_t *input,
406                         size_t inputRowPitch,
407                         size_t inputDepthPitch,
408                         uint8_t *output,
409                         size_t outputRowPitch,
410                         size_t outputDepthPitch)
411 {
412     for (size_t z = 0; z < depth; z++)
413     {
414         for (size_t y = 0; y < height; y++)
415         {
416             const uint16_t *source =
417                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
418             uint16_t *dest =
419                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
420             for (size_t x = 0; x < width; x++)
421             {
422                 // The GL type RGB is packed with with red in the MSB, while the D3D11 type BGR
423                 // is packed with red in the LSB
424                 auto rgb    = source[x];
425                 uint16_t r5 = gl::getShiftedData<5, 11>(rgb);
426                 uint16_t g6 = gl::getShiftedData<6, 5>(rgb);
427                 uint16_t b5 = gl::getShiftedData<5, 0>(rgb);
428                 dest[x]     = (r5 << 11) | (g6 << 5) | b5;
429             }
430         }
431     }
432 }
433 
LoadRGB8ToBGRX8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)434 void LoadRGB8ToBGRX8(size_t width,
435                      size_t height,
436                      size_t depth,
437                      const uint8_t *input,
438                      size_t inputRowPitch,
439                      size_t inputDepthPitch,
440                      uint8_t *output,
441                      size_t outputRowPitch,
442                      size_t outputDepthPitch)
443 {
444     for (size_t z = 0; z < depth; z++)
445     {
446         for (size_t y = 0; y < height; y++)
447         {
448             const uint8_t *source =
449                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
450             uint8_t *dest =
451                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
452             for (size_t x = 0; x < width; x++)
453             {
454                 dest[4 * x + 0] = source[x * 3 + 2];
455                 dest[4 * x + 1] = source[x * 3 + 1];
456                 dest[4 * x + 2] = source[x * 3 + 0];
457                 dest[4 * x + 3] = 0xFF;
458             }
459         }
460     }
461 }
462 
LoadRG8ToBGRX8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)463 void LoadRG8ToBGRX8(size_t width,
464                     size_t height,
465                     size_t depth,
466                     const uint8_t *input,
467                     size_t inputRowPitch,
468                     size_t inputDepthPitch,
469                     uint8_t *output,
470                     size_t outputRowPitch,
471                     size_t outputDepthPitch)
472 {
473     for (size_t z = 0; z < depth; z++)
474     {
475         for (size_t y = 0; y < height; y++)
476         {
477             const uint8_t *source =
478                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
479             uint8_t *dest =
480                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
481             for (size_t x = 0; x < width; x++)
482             {
483                 dest[4 * x + 0] = 0x00;
484                 dest[4 * x + 1] = source[x * 2 + 1];
485                 dest[4 * x + 2] = source[x * 2 + 0];
486                 dest[4 * x + 3] = 0xFF;
487             }
488         }
489     }
490 }
491 
LoadR8ToBGRX8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)492 void LoadR8ToBGRX8(size_t width,
493                    size_t height,
494                    size_t depth,
495                    const uint8_t *input,
496                    size_t inputRowPitch,
497                    size_t inputDepthPitch,
498                    uint8_t *output,
499                    size_t outputRowPitch,
500                    size_t outputDepthPitch)
501 {
502     for (size_t z = 0; z < depth; z++)
503     {
504         for (size_t y = 0; y < height; y++)
505         {
506             const uint8_t *source =
507                 priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
508             uint8_t *dest =
509                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
510             for (size_t x = 0; x < width; x++)
511             {
512                 dest[4 * x + 0] = 0x00;
513                 dest[4 * x + 1] = 0x00;
514                 dest[4 * x + 2] = source[x];
515                 dest[4 * x + 3] = 0xFF;
516             }
517         }
518     }
519 }
520 
LoadR5G6B5ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)521 void LoadR5G6B5ToBGRA8(size_t width,
522                        size_t height,
523                        size_t depth,
524                        const uint8_t *input,
525                        size_t inputRowPitch,
526                        size_t inputDepthPitch,
527                        uint8_t *output,
528                        size_t outputRowPitch,
529                        size_t outputDepthPitch)
530 {
531     for (size_t z = 0; z < depth; z++)
532     {
533         for (size_t y = 0; y < height; y++)
534         {
535             const uint16_t *source =
536                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
537             uint8_t *dest =
538                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
539             for (size_t x = 0; x < width; x++)
540             {
541                 uint16_t rgb = source[x];
542                 dest[4 * x + 0] =
543                     static_cast<uint8_t>(((rgb & 0x001F) << 3) | ((rgb & 0x001F) >> 2));
544                 dest[4 * x + 1] =
545                     static_cast<uint8_t>(((rgb & 0x07E0) >> 3) | ((rgb & 0x07E0) >> 9));
546                 dest[4 * x + 2] =
547                     static_cast<uint8_t>(((rgb & 0xF800) >> 8) | ((rgb & 0xF800) >> 13));
548                 dest[4 * x + 3] = 0xFF;
549             }
550         }
551     }
552 }
553 
LoadR5G6B5ToRGBA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)554 void LoadR5G6B5ToRGBA8(size_t width,
555                        size_t height,
556                        size_t depth,
557                        const uint8_t *input,
558                        size_t inputRowPitch,
559                        size_t inputDepthPitch,
560                        uint8_t *output,
561                        size_t outputRowPitch,
562                        size_t outputDepthPitch)
563 {
564     for (size_t z = 0; z < depth; z++)
565     {
566         for (size_t y = 0; y < height; y++)
567         {
568             const uint16_t *source =
569                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
570             uint8_t *dest =
571                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
572             for (size_t x = 0; x < width; x++)
573             {
574                 uint16_t rgb = source[x];
575                 dest[4 * x + 0] =
576                     static_cast<uint8_t>(((rgb & 0xF800) >> 8) | ((rgb & 0xF800) >> 13));
577                 dest[4 * x + 1] =
578                     static_cast<uint8_t>(((rgb & 0x07E0) >> 3) | ((rgb & 0x07E0) >> 9));
579                 dest[4 * x + 2] =
580                     static_cast<uint8_t>(((rgb & 0x001F) << 3) | ((rgb & 0x001F) >> 2));
581                 dest[4 * x + 3] = 0xFF;
582             }
583         }
584     }
585 }
586 
LoadRGBA8ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)587 void LoadRGBA8ToBGRA8(size_t width,
588                       size_t height,
589                       size_t depth,
590                       const uint8_t *input,
591                       size_t inputRowPitch,
592                       size_t inputDepthPitch,
593                       uint8_t *output,
594                       size_t outputRowPitch,
595                       size_t outputDepthPitch)
596 {
597 #if defined(ANGLE_USE_SSE)
598     if (gl::supportsSSE2())
599     {
600         __m128i brMask = _mm_set1_epi32(0x00ff00ff);
601 
602         for (size_t z = 0; z < depth; z++)
603         {
604             for (size_t y = 0; y < height; y++)
605             {
606                 const uint32_t *source =
607                     priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
608                 uint32_t *dest = priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch,
609                                                                    outputDepthPitch);
610 
611                 size_t x = 0;
612 
613                 // Make output writes aligned
614                 for (; ((reinterpret_cast<intptr_t>(&dest[x]) & 15) != 0) && x < width; x++)
615                 {
616                     uint32_t rgba = source[x];
617                     dest[x]       = (ANGLE_ROTL(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
618                 }
619 
620                 for (; x + 3 < width; x += 4)
621                 {
622                     __m128i sourceData =
623                         _mm_loadu_si128(reinterpret_cast<const __m128i *>(&source[x]));
624                     // Mask out g and a, which don't change
625                     __m128i gaComponents = _mm_andnot_si128(brMask, sourceData);
626                     // Mask out b and r
627                     __m128i brComponents = _mm_and_si128(sourceData, brMask);
628                     // Swap b and r
629                     __m128i brSwapped = _mm_shufflehi_epi16(
630                         _mm_shufflelo_epi16(brComponents, _MM_SHUFFLE(2, 3, 0, 1)),
631                         _MM_SHUFFLE(2, 3, 0, 1));
632                     __m128i result = _mm_or_si128(gaComponents, brSwapped);
633                     _mm_store_si128(reinterpret_cast<__m128i *>(&dest[x]), result);
634                 }
635 
636                 // Perform leftover writes
637                 for (; x < width; x++)
638                 {
639                     uint32_t rgba = source[x];
640                     dest[x]       = (ANGLE_ROTL(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
641                 }
642             }
643         }
644 
645         return;
646     }
647 #endif
648 
649     for (size_t z = 0; z < depth; z++)
650     {
651         for (size_t y = 0; y < height; y++)
652         {
653             const uint32_t *source =
654                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
655             uint32_t *dest =
656                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
657             for (size_t x = 0; x < width; x++)
658             {
659                 uint32_t rgba = source[x];
660                 dest[x]       = (ANGLE_ROTL(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
661             }
662         }
663     }
664 }
665 
LoadRGBA8ToBGRA4(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)666 void LoadRGBA8ToBGRA4(size_t width,
667                       size_t height,
668                       size_t depth,
669                       const uint8_t *input,
670                       size_t inputRowPitch,
671                       size_t inputDepthPitch,
672                       uint8_t *output,
673                       size_t outputRowPitch,
674                       size_t outputDepthPitch)
675 {
676     for (size_t z = 0; z < depth; z++)
677     {
678         for (size_t y = 0; y < height; y++)
679         {
680             const uint32_t *source =
681                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
682             uint16_t *dest =
683                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
684             for (size_t x = 0; x < width; x++)
685             {
686                 uint32_t rgba8 = source[x];
687                 auto r4        = static_cast<uint16_t>((rgba8 & 0x000000FF) >> 4);
688                 auto g4        = static_cast<uint16_t>((rgba8 & 0x0000FF00) >> 12);
689                 auto b4        = static_cast<uint16_t>((rgba8 & 0x00FF0000) >> 20);
690                 auto a4        = static_cast<uint16_t>((rgba8 & 0xFF000000) >> 28);
691                 dest[x]        = (a4 << 12) | (r4 << 8) | (g4 << 4) | b4;
692             }
693         }
694     }
695 }
696 
LoadRGBA4ToARGB4(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)697 void LoadRGBA4ToARGB4(size_t width,
698                       size_t height,
699                       size_t depth,
700                       const uint8_t *input,
701                       size_t inputRowPitch,
702                       size_t inputDepthPitch,
703                       uint8_t *output,
704                       size_t outputRowPitch,
705                       size_t outputDepthPitch)
706 {
707     for (size_t z = 0; z < depth; z++)
708     {
709         for (size_t y = 0; y < height; y++)
710         {
711             const uint16_t *source =
712                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
713             uint16_t *dest =
714                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
715             for (size_t x = 0; x < width; x++)
716             {
717                 dest[x] = ANGLE_ROTR16(source[x], 4);
718             }
719         }
720     }
721 }
722 
LoadRGBA4ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)723 void LoadRGBA4ToBGRA8(size_t width,
724                       size_t height,
725                       size_t depth,
726                       const uint8_t *input,
727                       size_t inputRowPitch,
728                       size_t inputDepthPitch,
729                       uint8_t *output,
730                       size_t outputRowPitch,
731                       size_t outputDepthPitch)
732 {
733     for (size_t z = 0; z < depth; z++)
734     {
735         for (size_t y = 0; y < height; y++)
736         {
737             const uint16_t *source =
738                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
739             uint8_t *dest =
740                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
741             for (size_t x = 0; x < width; x++)
742             {
743                 uint16_t rgba = source[x];
744                 dest[4 * x + 0] =
745                     static_cast<uint8_t>(((rgba & 0x00F0) << 0) | ((rgba & 0x00F0) >> 4));
746                 dest[4 * x + 1] =
747                     static_cast<uint8_t>(((rgba & 0x0F00) >> 4) | ((rgba & 0x0F00) >> 8));
748                 dest[4 * x + 2] =
749                     static_cast<uint8_t>(((rgba & 0xF000) >> 8) | ((rgba & 0xF000) >> 12));
750                 dest[4 * x + 3] =
751                     static_cast<uint8_t>(((rgba & 0x000F) << 4) | ((rgba & 0x000F) >> 0));
752             }
753         }
754     }
755 }
756 
LoadRGBA4ToRGBA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)757 void LoadRGBA4ToRGBA8(size_t width,
758                       size_t height,
759                       size_t depth,
760                       const uint8_t *input,
761                       size_t inputRowPitch,
762                       size_t inputDepthPitch,
763                       uint8_t *output,
764                       size_t outputRowPitch,
765                       size_t outputDepthPitch)
766 {
767     for (size_t z = 0; z < depth; z++)
768     {
769         for (size_t y = 0; y < height; y++)
770         {
771             const uint16_t *source =
772                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
773             uint8_t *dest =
774                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
775             for (size_t x = 0; x < width; x++)
776             {
777                 uint16_t rgba = source[x];
778                 dest[4 * x + 0] =
779                     static_cast<uint8_t>(((rgba & 0xF000) >> 8) | ((rgba & 0xF000) >> 12));
780                 dest[4 * x + 1] =
781                     static_cast<uint8_t>(((rgba & 0x0F00) >> 4) | ((rgba & 0x0F00) >> 8));
782                 dest[4 * x + 2] =
783                     static_cast<uint8_t>(((rgba & 0x00F0) << 0) | ((rgba & 0x00F0) >> 4));
784                 dest[4 * x + 3] =
785                     static_cast<uint8_t>(((rgba & 0x000F) << 4) | ((rgba & 0x000F) >> 0));
786             }
787         }
788     }
789 }
790 
LoadBGRA4ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)791 void LoadBGRA4ToBGRA8(size_t width,
792                       size_t height,
793                       size_t depth,
794                       const uint8_t *input,
795                       size_t inputRowPitch,
796                       size_t inputDepthPitch,
797                       uint8_t *output,
798                       size_t outputRowPitch,
799                       size_t outputDepthPitch)
800 {
801     for (size_t z = 0; z < depth; z++)
802     {
803         for (size_t y = 0; y < height; y++)
804         {
805             const uint16_t *source =
806                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
807             uint8_t *dest =
808                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
809             for (size_t x = 0; x < width; x++)
810             {
811                 uint16_t bgra = source[x];
812                 dest[4 * x + 0] =
813                     static_cast<uint8_t>(((bgra & 0xF000) >> 8) | ((bgra & 0xF000) >> 12));
814                 dest[4 * x + 1] =
815                     static_cast<uint8_t>(((bgra & 0x0F00) >> 4) | ((bgra & 0x0F00) >> 8));
816                 dest[4 * x + 2] =
817                     static_cast<uint8_t>(((bgra & 0x00F0) << 0) | ((bgra & 0x00F0) >> 4));
818                 dest[4 * x + 3] =
819                     static_cast<uint8_t>(((bgra & 0x000F) << 4) | ((bgra & 0x000F) >> 0));
820             }
821         }
822     }
823 }
824 
LoadRGBA8ToBGR5A1(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)825 void LoadRGBA8ToBGR5A1(size_t width,
826                        size_t height,
827                        size_t depth,
828                        const uint8_t *input,
829                        size_t inputRowPitch,
830                        size_t inputDepthPitch,
831                        uint8_t *output,
832                        size_t outputRowPitch,
833                        size_t outputDepthPitch)
834 {
835     for (size_t z = 0; z < depth; z++)
836     {
837         for (size_t y = 0; y < height; y++)
838         {
839             const uint32_t *source =
840                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
841             uint16_t *dest =
842                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
843             for (size_t x = 0; x < width; x++)
844             {
845                 uint32_t rgba8 = source[x];
846                 auto r5        = static_cast<uint16_t>((rgba8 & 0x000000FF) >> 3);
847                 auto g5        = static_cast<uint16_t>((rgba8 & 0x0000FF00) >> 11);
848                 auto b5        = static_cast<uint16_t>((rgba8 & 0x00FF0000) >> 19);
849                 auto a1        = static_cast<uint16_t>((rgba8 & 0xFF000000) >> 31);
850                 dest[x]        = (a1 << 15) | (r5 << 10) | (g5 << 5) | b5;
851             }
852         }
853     }
854 }
855 
LoadRGB10A2ToBGR5A1(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)856 void LoadRGB10A2ToBGR5A1(size_t width,
857                          size_t height,
858                          size_t depth,
859                          const uint8_t *input,
860                          size_t inputRowPitch,
861                          size_t inputDepthPitch,
862                          uint8_t *output,
863                          size_t outputRowPitch,
864                          size_t outputDepthPitch)
865 {
866     for (size_t z = 0; z < depth; z++)
867     {
868         for (size_t y = 0; y < height; y++)
869         {
870             const R10G10B10A2 *source =
871                 priv::OffsetDataPointer<R10G10B10A2>(input, y, z, inputRowPitch, inputDepthPitch);
872             uint16_t *dest =
873                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
874             for (size_t x = 0; x < width; x++)
875             {
876                 R10G10B10A2 rgb10a2 = source[x];
877 
878                 uint16_t r5 = static_cast<uint16_t>(rgb10a2.R >> 5u);
879                 uint16_t g5 = static_cast<uint16_t>(rgb10a2.G >> 5u);
880                 uint16_t b5 = static_cast<uint16_t>(rgb10a2.B >> 5u);
881                 uint16_t a1 = static_cast<uint16_t>(rgb10a2.A >> 1u);
882 
883                 dest[x] = (a1 << 15) | (r5 << 10) | (g5 << 5) | b5;
884             }
885         }
886     }
887 }
888 
LoadRGB5A1ToA1RGB5(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)889 void LoadRGB5A1ToA1RGB5(size_t width,
890                         size_t height,
891                         size_t depth,
892                         const uint8_t *input,
893                         size_t inputRowPitch,
894                         size_t inputDepthPitch,
895                         uint8_t *output,
896                         size_t outputRowPitch,
897                         size_t outputDepthPitch)
898 {
899     for (size_t z = 0; z < depth; z++)
900     {
901         for (size_t y = 0; y < height; y++)
902         {
903             const uint16_t *source =
904                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
905             uint16_t *dest =
906                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
907             for (size_t x = 0; x < width; x++)
908             {
909                 dest[x] = ANGLE_ROTR16(source[x], 1);
910             }
911         }
912     }
913 }
914 
LoadRGB5A1ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)915 void LoadRGB5A1ToBGRA8(size_t width,
916                        size_t height,
917                        size_t depth,
918                        const uint8_t *input,
919                        size_t inputRowPitch,
920                        size_t inputDepthPitch,
921                        uint8_t *output,
922                        size_t outputRowPitch,
923                        size_t outputDepthPitch)
924 {
925     for (size_t z = 0; z < depth; z++)
926     {
927         for (size_t y = 0; y < height; y++)
928         {
929             const uint16_t *source =
930                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
931             uint8_t *dest =
932                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
933             for (size_t x = 0; x < width; x++)
934             {
935                 uint16_t rgba = source[x];
936                 dest[4 * x + 0] =
937                     static_cast<uint8_t>(((rgba & 0x003E) << 2) | ((rgba & 0x003E) >> 3));
938                 dest[4 * x + 1] =
939                     static_cast<uint8_t>(((rgba & 0x07C0) >> 3) | ((rgba & 0x07C0) >> 8));
940                 dest[4 * x + 2] =
941                     static_cast<uint8_t>(((rgba & 0xF800) >> 8) | ((rgba & 0xF800) >> 13));
942                 dest[4 * x + 3] = static_cast<uint8_t>((rgba & 0x0001) ? 0xFF : 0);
943             }
944         }
945     }
946 }
947 
LoadRGB5A1ToRGBA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)948 void LoadRGB5A1ToRGBA8(size_t width,
949                        size_t height,
950                        size_t depth,
951                        const uint8_t *input,
952                        size_t inputRowPitch,
953                        size_t inputDepthPitch,
954                        uint8_t *output,
955                        size_t outputRowPitch,
956                        size_t outputDepthPitch)
957 {
958     for (size_t z = 0; z < depth; z++)
959     {
960         for (size_t y = 0; y < height; y++)
961         {
962             const uint16_t *source =
963                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
964             uint8_t *dest =
965                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
966             for (size_t x = 0; x < width; x++)
967             {
968                 uint16_t rgba = source[x];
969                 dest[4 * x + 0] =
970                     static_cast<uint8_t>(((rgba & 0xF800) >> 8) | ((rgba & 0xF800) >> 13));
971                 dest[4 * x + 1] =
972                     static_cast<uint8_t>(((rgba & 0x07C0) >> 3) | ((rgba & 0x07C0) >> 8));
973                 dest[4 * x + 2] =
974                     static_cast<uint8_t>(((rgba & 0x003E) << 2) | ((rgba & 0x003E) >> 3));
975                 dest[4 * x + 3] = static_cast<uint8_t>((rgba & 0x0001) ? 0xFF : 0);
976             }
977         }
978     }
979 }
980 
LoadBGR5A1ToBGRA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)981 void LoadBGR5A1ToBGRA8(size_t width,
982                        size_t height,
983                        size_t depth,
984                        const uint8_t *input,
985                        size_t inputRowPitch,
986                        size_t inputDepthPitch,
987                        uint8_t *output,
988                        size_t outputRowPitch,
989                        size_t outputDepthPitch)
990 {
991     for (size_t z = 0; z < depth; z++)
992     {
993         for (size_t y = 0; y < height; y++)
994         {
995             const uint16_t *source =
996                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
997             uint8_t *dest =
998                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
999             for (size_t x = 0; x < width; x++)
1000             {
1001                 uint16_t bgra = source[x];
1002                 dest[4 * x + 0] =
1003                     static_cast<uint8_t>(((bgra & 0xF800) >> 8) | ((bgra & 0xF800) >> 13));
1004                 dest[4 * x + 1] =
1005                     static_cast<uint8_t>(((bgra & 0x07C0) >> 3) | ((bgra & 0x07C0) >> 8));
1006                 dest[4 * x + 2] =
1007                     static_cast<uint8_t>(((bgra & 0x003E) << 2) | ((bgra & 0x003E) >> 3));
1008                 dest[4 * x + 3] = static_cast<uint8_t>((bgra & 0x0001) ? 0xFF : 0);
1009             }
1010         }
1011     }
1012 }
1013 
LoadRGB10A2ToRGBA8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1014 void LoadRGB10A2ToRGBA8(size_t width,
1015                         size_t height,
1016                         size_t depth,
1017                         const uint8_t *input,
1018                         size_t inputRowPitch,
1019                         size_t inputDepthPitch,
1020                         uint8_t *output,
1021                         size_t outputRowPitch,
1022                         size_t outputDepthPitch)
1023 {
1024     for (size_t z = 0; z < depth; z++)
1025     {
1026         for (size_t y = 0; y < height; y++)
1027         {
1028             const uint32_t *source =
1029                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1030             uint8_t *dest =
1031                 priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
1032             for (size_t x = 0; x < width; x++)
1033             {
1034                 uint32_t rgba   = source[x];
1035                 dest[4 * x + 0] = static_cast<uint8_t>((rgba & 0x000003FF) >> 2);
1036                 dest[4 * x + 1] = static_cast<uint8_t>((rgba & 0x000FFC00) >> 12);
1037                 dest[4 * x + 2] = static_cast<uint8_t>((rgba & 0x3FF00000) >> 22);
1038                 dest[4 * x + 3] = static_cast<uint8_t>(((rgba & 0xC0000000) >> 30) * 0x55);
1039             }
1040         }
1041     }
1042 }
1043 
LoadRGB16FToRGB9E5(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1044 void LoadRGB16FToRGB9E5(size_t width,
1045                         size_t height,
1046                         size_t depth,
1047                         const uint8_t *input,
1048                         size_t inputRowPitch,
1049                         size_t inputDepthPitch,
1050                         uint8_t *output,
1051                         size_t outputRowPitch,
1052                         size_t outputDepthPitch)
1053 {
1054     for (size_t z = 0; z < depth; z++)
1055     {
1056         for (size_t y = 0; y < height; y++)
1057         {
1058             const uint16_t *source =
1059                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1060             uint32_t *dest =
1061                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1062             for (size_t x = 0; x < width; x++)
1063             {
1064                 dest[x] = gl::convertRGBFloatsTo999E5(gl::float16ToFloat32(source[x * 3 + 0]),
1065                                                       gl::float16ToFloat32(source[x * 3 + 1]),
1066                                                       gl::float16ToFloat32(source[x * 3 + 2]));
1067             }
1068         }
1069     }
1070 }
1071 
LoadRGB32FToRGB9E5(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1072 void LoadRGB32FToRGB9E5(size_t width,
1073                         size_t height,
1074                         size_t depth,
1075                         const uint8_t *input,
1076                         size_t inputRowPitch,
1077                         size_t inputDepthPitch,
1078                         uint8_t *output,
1079                         size_t outputRowPitch,
1080                         size_t outputDepthPitch)
1081 {
1082     for (size_t z = 0; z < depth; z++)
1083     {
1084         for (size_t y = 0; y < height; y++)
1085         {
1086             const float *source =
1087                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1088             uint32_t *dest =
1089                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1090             for (size_t x = 0; x < width; x++)
1091             {
1092                 dest[x] = gl::convertRGBFloatsTo999E5(source[x * 3 + 0], source[x * 3 + 1],
1093                                                       source[x * 3 + 2]);
1094             }
1095         }
1096     }
1097 }
1098 
LoadRGB16FToRG11B10F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1099 void LoadRGB16FToRG11B10F(size_t width,
1100                           size_t height,
1101                           size_t depth,
1102                           const uint8_t *input,
1103                           size_t inputRowPitch,
1104                           size_t inputDepthPitch,
1105                           uint8_t *output,
1106                           size_t outputRowPitch,
1107                           size_t outputDepthPitch)
1108 {
1109     for (size_t z = 0; z < depth; z++)
1110     {
1111         for (size_t y = 0; y < height; y++)
1112         {
1113             const uint16_t *source =
1114                 priv::OffsetDataPointer<uint16_t>(input, y, z, inputRowPitch, inputDepthPitch);
1115             uint32_t *dest =
1116                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1117             for (size_t x = 0; x < width; x++)
1118             {
1119                 dest[x] = (gl::float32ToFloat11(gl::float16ToFloat32(source[x * 3 + 0])) << 0) |
1120                           (gl::float32ToFloat11(gl::float16ToFloat32(source[x * 3 + 1])) << 11) |
1121                           (gl::float32ToFloat10(gl::float16ToFloat32(source[x * 3 + 2])) << 22);
1122             }
1123         }
1124     }
1125 }
1126 
LoadRGB32FToRG11B10F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1127 void LoadRGB32FToRG11B10F(size_t width,
1128                           size_t height,
1129                           size_t depth,
1130                           const uint8_t *input,
1131                           size_t inputRowPitch,
1132                           size_t inputDepthPitch,
1133                           uint8_t *output,
1134                           size_t outputRowPitch,
1135                           size_t outputDepthPitch)
1136 {
1137     for (size_t z = 0; z < depth; z++)
1138     {
1139         for (size_t y = 0; y < height; y++)
1140         {
1141             const float *source =
1142                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1143             uint32_t *dest =
1144                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1145             for (size_t x = 0; x < width; x++)
1146             {
1147                 dest[x] = (gl::float32ToFloat11(source[x * 3 + 0]) << 0) |
1148                           (gl::float32ToFloat11(source[x * 3 + 1]) << 11) |
1149                           (gl::float32ToFloat10(source[x * 3 + 2]) << 22);
1150             }
1151         }
1152     }
1153 }
1154 
LoadG8R24ToR24G8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1155 void LoadG8R24ToR24G8(size_t width,
1156                       size_t height,
1157                       size_t depth,
1158                       const uint8_t *input,
1159                       size_t inputRowPitch,
1160                       size_t inputDepthPitch,
1161                       uint8_t *output,
1162                       size_t outputRowPitch,
1163                       size_t outputDepthPitch)
1164 {
1165     for (size_t z = 0; z < depth; z++)
1166     {
1167         for (size_t y = 0; y < height; y++)
1168         {
1169             const uint32_t *source =
1170                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1171             uint32_t *dest =
1172                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1173             for (size_t x = 0; x < width; x++)
1174             {
1175                 uint32_t d = source[x] >> 8;
1176                 uint8_t s  = source[x] & 0xFF;
1177                 dest[x]    = d | (s << 24);
1178             }
1179         }
1180     }
1181 }
1182 
LoadD32FToD32F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1183 void LoadD32FToD32F(size_t width,
1184                     size_t height,
1185                     size_t depth,
1186                     const uint8_t *input,
1187                     size_t inputRowPitch,
1188                     size_t inputDepthPitch,
1189                     uint8_t *output,
1190                     size_t outputRowPitch,
1191                     size_t outputDepthPitch)
1192 {
1193     for (size_t z = 0; z < depth; z++)
1194     {
1195         for (size_t y = 0; y < height; y++)
1196         {
1197             const float *source =
1198                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1199             float *dest =
1200                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1201             for (size_t x = 0; x < width; x++)
1202             {
1203                 dest[x] = gl::clamp01(source[x]);
1204             }
1205         }
1206     }
1207 }
1208 
LoadD32FS8X24ToD32FS8X24(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1209 void LoadD32FS8X24ToD32FS8X24(size_t width,
1210                               size_t height,
1211                               size_t depth,
1212                               const uint8_t *input,
1213                               size_t inputRowPitch,
1214                               size_t inputDepthPitch,
1215                               uint8_t *output,
1216                               size_t outputRowPitch,
1217                               size_t outputDepthPitch)
1218 {
1219     for (size_t z = 0; z < depth; z++)
1220     {
1221         for (size_t y = 0; y < height; y++)
1222         {
1223             const float *sourceDepth =
1224                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1225             const uint32_t *sourceStencil =
1226                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch) + 1;
1227             float *destDepth =
1228                 priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
1229             uint32_t *destStencil =
1230                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch) +
1231                 1;
1232             for (size_t x = 0; x < width; x++)
1233             {
1234                 destDepth[x * 2]   = gl::clamp01(sourceDepth[x * 2]);
1235                 destStencil[x * 2] = sourceStencil[x * 2] & 0xFF000000;
1236             }
1237         }
1238     }
1239 }
1240 
LoadRGB32FToRGBA16F(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1241 void LoadRGB32FToRGBA16F(size_t width,
1242                          size_t height,
1243                          size_t depth,
1244                          const uint8_t *input,
1245                          size_t inputRowPitch,
1246                          size_t inputDepthPitch,
1247                          uint8_t *output,
1248                          size_t outputRowPitch,
1249                          size_t outputDepthPitch)
1250 {
1251     for (size_t z = 0; z < depth; z++)
1252     {
1253         for (size_t y = 0; y < height; y++)
1254         {
1255             const float *source =
1256                 priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
1257             uint16_t *dest =
1258                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1259             for (size_t x = 0; x < width; x++)
1260             {
1261                 dest[x * 4 + 0] = gl::float32ToFloat16(source[x * 3 + 0]);
1262                 dest[x * 4 + 1] = gl::float32ToFloat16(source[x * 3 + 1]);
1263                 dest[x * 4 + 2] = gl::float32ToFloat16(source[x * 3 + 2]);
1264                 dest[x * 4 + 3] = gl::Float16One;
1265             }
1266         }
1267     }
1268 }
1269 
LoadR32ToR16(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1270 void LoadR32ToR16(size_t width,
1271                   size_t height,
1272                   size_t depth,
1273                   const uint8_t *input,
1274                   size_t inputRowPitch,
1275                   size_t inputDepthPitch,
1276                   uint8_t *output,
1277                   size_t outputRowPitch,
1278                   size_t outputDepthPitch)
1279 {
1280     for (size_t z = 0; z < depth; z++)
1281     {
1282         for (size_t y = 0; y < height; y++)
1283         {
1284             const uint32_t *source =
1285                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1286             uint16_t *dest =
1287                 priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
1288             for (size_t x = 0; x < width; x++)
1289             {
1290                 dest[x] = source[x] >> 16;
1291             }
1292         }
1293     }
1294 }
1295 
LoadR32ToR24G8(size_t width,size_t height,size_t depth,const uint8_t * input,size_t inputRowPitch,size_t inputDepthPitch,uint8_t * output,size_t outputRowPitch,size_t outputDepthPitch)1296 void LoadR32ToR24G8(size_t width,
1297                     size_t height,
1298                     size_t depth,
1299                     const uint8_t *input,
1300                     size_t inputRowPitch,
1301                     size_t inputDepthPitch,
1302                     uint8_t *output,
1303                     size_t outputRowPitch,
1304                     size_t outputDepthPitch)
1305 {
1306     for (size_t z = 0; z < depth; z++)
1307     {
1308         for (size_t y = 0; y < height; y++)
1309         {
1310             const uint32_t *source =
1311                 priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
1312             uint32_t *dest =
1313                 priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
1314 
1315             for (size_t x = 0; x < width; x++)
1316             {
1317                 dest[x] = source[x] >> 8;
1318             }
1319         }
1320     }
1321 }
1322 
1323 }  // namespace angle
1324