1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 #include "util/format/u_format_zs.h"
30 #include "util/u_math.h"
31 
32 
33 /*
34  * z32_unorm conversion functions
35  */
36 
37 static inline uint16_t
z32_unorm_to_z16_unorm(uint32_t z)38 z32_unorm_to_z16_unorm(uint32_t z)
39 {
40    /* z * 0xffff / 0xffffffff */
41    return z >> 16;
42 }
43 
44 static inline uint32_t
z16_unorm_to_z32_unorm(uint16_t z)45 z16_unorm_to_z32_unorm(uint16_t z)
46 {
47    /* z * 0xffffffff / 0xffff */
48    return ((uint32_t)z << 16) | z;
49 }
50 
51 static inline uint32_t
z32_unorm_to_z24_unorm(uint32_t z)52 z32_unorm_to_z24_unorm(uint32_t z)
53 {
54    /* z * 0xffffff / 0xffffffff */
55    return z >> 8;
56 }
57 
58 static inline uint32_t
z24_unorm_to_z32_unorm(uint32_t z)59 z24_unorm_to_z32_unorm(uint32_t z)
60 {
61    /* z * 0xffffffff / 0xffffff */
62    return (z << 8) | (z >> 16);
63 }
64 
65 
66 /*
67  * z32_float conversion functions
68  */
69 
70 static inline uint16_t
z32_float_to_z16_unorm(float z)71 z32_float_to_z16_unorm(float z)
72 {
73    const float scale = 0xffff;
74    return (uint16_t)(z * scale + 0.5f);
75 }
76 
77 static inline float
z16_unorm_to_z32_float(uint16_t z)78 z16_unorm_to_z32_float(uint16_t z)
79 {
80    const float scale = 1.0 / 0xffff;
81    return (float)(z * scale);
82 }
83 
84 static inline uint32_t
z32_float_to_z24_unorm(float z)85 z32_float_to_z24_unorm(float z)
86 {
87    const double scale = 0xffffff;
88    return (uint32_t)(z * scale) & 0xffffff;
89 }
90 
91 static inline float
z24_unorm_to_z32_float(uint32_t z)92 z24_unorm_to_z32_float(uint32_t z)
93 {
94    const double scale = 1.0 / 0xffffff;
95    return (float)(z * scale);
96 }
97 
98 static inline uint32_t
z32_float_to_z32_unorm(float z)99 z32_float_to_z32_unorm(float z)
100 {
101    const double scale = 0xffffffff;
102    return (uint32_t)(z * scale);
103 }
104 
105 static inline float
z32_unorm_to_z32_float(uint32_t z)106 z32_unorm_to_z32_float(uint32_t z)
107 {
108    const double scale = 1.0 / 0xffffffff;
109    return (float)(z * scale);
110 }
111 
112 
113 void
util_format_s8_uint_unpack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)114 util_format_s8_uint_unpack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride,
115                                          const uint8_t *restrict src_row, unsigned src_stride,
116                                          unsigned width, unsigned height)
117 {
118    unsigned y;
119    for(y = 0; y < height; ++y) {
120       memcpy(dst_row, src_row, width);
121       src_row += src_stride/sizeof(*src_row);
122       dst_row += dst_stride/sizeof(*dst_row);
123    }
124 }
125 
126 void
util_format_s8_uint_pack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)127 util_format_s8_uint_pack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride,
128                                        const uint8_t *restrict src_row, unsigned src_stride,
129                                        unsigned width, unsigned height)
130 {
131    unsigned y;
132    for(y = 0; y < height; ++y) {
133       memcpy(dst_row, src_row, width);
134       src_row += src_stride/sizeof(*src_row);
135       dst_row += dst_stride/sizeof(*dst_row);
136    }
137 }
138 
139 void
util_format_z16_unorm_unpack_z_float(float * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)140 util_format_z16_unorm_unpack_z_float(float *restrict dst_row, unsigned dst_stride,
141                                      const uint8_t *restrict src_row, unsigned src_stride,
142                                      unsigned width, unsigned height)
143 {
144    unsigned x, y;
145    for(y = 0; y < height; ++y) {
146       float *dst = dst_row;
147       const uint16_t *src = (const uint16_t *)src_row;
148       for(x = 0; x < width; ++x) {
149          *dst++ = z16_unorm_to_z32_float(*src++);
150       }
151       src_row += src_stride/sizeof(*src_row);
152       dst_row += dst_stride/sizeof(*dst_row);
153    }
154 }
155 
156 void
util_format_z16_unorm_pack_z_float(uint8_t * restrict dst_row,unsigned dst_stride,const float * restrict src_row,unsigned src_stride,unsigned width,unsigned height)157 util_format_z16_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned dst_stride,
158                                    const float *restrict src_row, unsigned src_stride,
159                                    unsigned width, unsigned height)
160 {
161    unsigned x, y;
162    for(y = 0; y < height; ++y) {
163       const float *src = src_row;
164       uint16_t *dst = (uint16_t *)dst_row;
165       for(x = 0; x < width; ++x) {
166          *dst++ = z32_float_to_z16_unorm(*src++);
167       }
168       dst_row += dst_stride/sizeof(*dst_row);
169       src_row += src_stride/sizeof(*src_row);
170    }
171 }
172 
173 void
util_format_z16_unorm_unpack_z_32unorm(uint32_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)174 util_format_z16_unorm_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned dst_stride,
175                                        const uint8_t *restrict src_row, unsigned src_stride,
176                                        unsigned width, unsigned height)
177 {
178    unsigned x, y;
179    for(y = 0; y < height; ++y) {
180       uint32_t *dst = dst_row;
181       const uint16_t *src = (const uint16_t *)src_row;
182       for(x = 0; x < width; ++x) {
183          *dst++ = z16_unorm_to_z32_unorm(*src++);
184       }
185       src_row += src_stride/sizeof(*src_row);
186       dst_row += dst_stride/sizeof(*dst_row);
187    }
188 }
189 
190 void
util_format_z16_unorm_pack_z_32unorm(uint8_t * restrict dst_row,unsigned dst_stride,const uint32_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)191 util_format_z16_unorm_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_stride,
192                                      const uint32_t *restrict src_row, unsigned src_stride,
193                                      unsigned width, unsigned height)
194 {
195    unsigned x, y;
196    for(y = 0; y < height; ++y) {
197       const uint32_t *src = src_row;
198       uint16_t *dst = (uint16_t *)dst_row;
199       for(x = 0; x < width; ++x) {
200          *dst++ = z32_unorm_to_z16_unorm(*src++);
201       }
202       dst_row += dst_stride/sizeof(*dst_row);
203       src_row += src_stride/sizeof(*src_row);
204    }
205 }
206 
207 void
util_format_z32_unorm_unpack_z_float(float * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)208 util_format_z32_unorm_unpack_z_float(float *restrict dst_row, unsigned dst_stride,
209                                      const uint8_t *restrict src_row, unsigned src_stride,
210                                      unsigned width, unsigned height)
211 {
212    unsigned x, y;
213    for(y = 0; y < height; ++y) {
214       float *dst = dst_row;
215       const uint32_t *src = (const uint32_t *)src_row;
216       for(x = 0; x < width; ++x) {
217          *dst++ = z32_unorm_to_z32_float(*src++);
218       }
219       src_row += src_stride/sizeof(*src_row);
220       dst_row += dst_stride/sizeof(*dst_row);
221    }
222 }
223 
224 void
util_format_z32_unorm_pack_z_float(uint8_t * restrict dst_row,unsigned dst_stride,const float * restrict src_row,unsigned src_stride,unsigned width,unsigned height)225 util_format_z32_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned dst_stride,
226                                    const float *restrict src_row, unsigned src_stride,
227                                    unsigned width, unsigned height)
228 {
229    unsigned x, y;
230    for(y = 0; y < height; ++y) {
231       const float *src = src_row;
232       uint32_t *dst = (uint32_t *)dst_row;
233       for(x = 0; x < width; ++x) {
234          *dst++ =z32_float_to_z32_unorm(*src++);
235       }
236       dst_row += dst_stride/sizeof(*dst_row);
237       src_row += src_stride/sizeof(*src_row);
238    }
239 }
240 
241 void
util_format_z32_unorm_unpack_z_32unorm(uint32_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)242 util_format_z32_unorm_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned dst_stride,
243                                        const uint8_t *restrict src_row, unsigned src_stride,
244                                        unsigned width, unsigned height)
245 {
246    unsigned y;
247    for(y = 0; y < height; ++y) {
248       memcpy(dst_row, src_row, width * 4);
249       src_row += src_stride/sizeof(*src_row);
250       dst_row += dst_stride/sizeof(*dst_row);
251    }
252 }
253 
254 void
util_format_z32_unorm_pack_z_32unorm(uint8_t * restrict dst_row,unsigned dst_stride,const uint32_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)255 util_format_z32_unorm_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_stride,
256                                      const uint32_t *restrict src_row, unsigned src_stride,
257                                      unsigned width, unsigned height)
258 {
259    unsigned y;
260    for(y = 0; y < height; ++y) {
261       memcpy(dst_row, src_row, width * 4);
262       src_row += src_stride/sizeof(*src_row);
263       dst_row += dst_stride/sizeof(*dst_row);
264    }
265 }
266 
267 void
util_format_z32_float_unpack_z_float(float * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)268 util_format_z32_float_unpack_z_float(float *restrict dst_row, unsigned dst_stride,
269                                      const uint8_t *restrict src_row, unsigned src_stride,
270                                      unsigned width, unsigned height)
271 {
272    unsigned y;
273    for(y = 0; y < height; ++y) {
274       memcpy(dst_row, src_row, width * 4);
275       src_row += src_stride/sizeof(*src_row);
276       dst_row += dst_stride/sizeof(*dst_row);
277    }
278 }
279 
280 void
util_format_z32_float_pack_z_float(uint8_t * restrict dst_row,unsigned dst_stride,const float * restrict src_row,unsigned src_stride,unsigned width,unsigned height)281 util_format_z32_float_pack_z_float(uint8_t *restrict dst_row, unsigned dst_stride,
282                                    const float *restrict src_row, unsigned src_stride,
283                                    unsigned width, unsigned height)
284 {
285    unsigned y;
286    for(y = 0; y < height; ++y) {
287       memcpy(dst_row, src_row, width * 4);
288       src_row += src_stride/sizeof(*src_row);
289       dst_row += dst_stride/sizeof(*dst_row);
290    }
291 }
292 
293 void
util_format_z32_float_unpack_z_32unorm(uint32_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)294 util_format_z32_float_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned dst_stride,
295                                        const uint8_t *restrict src_row, unsigned src_stride,
296                                        unsigned width, unsigned height)
297 {
298    unsigned x, y;
299    for(y = 0; y < height; ++y) {
300       uint32_t *dst = dst_row;
301       const float *src = (const float *)src_row;
302       for(x = 0; x < width; ++x) {
303          float z = *src++;
304          *dst++ = z32_float_to_z32_unorm(CLAMP(z, 0.0f, 1.0f));
305       }
306       src_row += src_stride/sizeof(*src_row);
307       dst_row += dst_stride/sizeof(*dst_row);
308    }
309 }
310 
311 void
util_format_z32_float_pack_z_32unorm(uint8_t * restrict dst_row,unsigned dst_stride,const uint32_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)312 util_format_z32_float_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_stride,
313                                      const uint32_t *restrict src_row, unsigned src_stride,
314                                      unsigned width, unsigned height)
315 {
316    unsigned x, y;
317    for(y = 0; y < height; ++y) {
318       const uint32_t *src = src_row;
319       float *dst = (float *)dst_row;
320       for(x = 0; x < width; ++x) {
321          *dst++ = z32_unorm_to_z32_float(*src++);
322       }
323       dst_row += dst_stride/sizeof(*dst_row);
324       src_row += src_stride/sizeof(*src_row);
325    }
326 }
327 
328 void
util_format_z16_unorm_s8_uint_unpack_z_float(float * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)329 util_format_z16_unorm_s8_uint_unpack_z_float(float *restrict dst_row, unsigned dst_stride,
330                                              const uint8_t *restrict src_row, unsigned src_stride,
331                                              unsigned width, unsigned height)
332 {
333    unreachable("z16_s8 packing/unpacking is not implemented.");
334 }
335 
336 void
util_format_z16_unorm_s8_uint_pack_z_float(uint8_t * restrict dst_row,unsigned dst_stride,const float * restrict src_row,unsigned src_stride,unsigned width,unsigned height)337 util_format_z16_unorm_s8_uint_pack_z_float(uint8_t *restrict dst_row, unsigned dst_stride,
338                                            const float *restrict src_row, unsigned src_stride,
339                                            unsigned width, unsigned height)
340 {
341    unreachable("z16_s8 packing/unpacking is not implemented.");
342 }
343 
344 void
util_format_z16_unorm_s8_uint_unpack_z_32unorm(uint32_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)345 util_format_z16_unorm_s8_uint_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned dst_stride,
346                                                const uint8_t *restrict src_row, unsigned src_stride,
347                                                unsigned width, unsigned height)
348 {
349    unreachable("z16_s8 packing/unpacking is not implemented.");
350 }
351 
352 void
util_format_z16_unorm_s8_uint_pack_z_32unorm(uint8_t * restrict dst_row,unsigned dst_stride,const uint32_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)353 util_format_z16_unorm_s8_uint_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_stride,
354                                              const uint32_t *restrict src_row, unsigned src_stride,
355                                              unsigned width, unsigned height)
356 {
357    unreachable("z16_s8 packing/unpacking is not implemented.");
358 }
359 
360 void
util_format_z16_unorm_s8_uint_unpack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)361 util_format_z16_unorm_s8_uint_unpack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride,
362                                              const uint8_t *restrict src_row, unsigned src_stride,
363                                              unsigned width, unsigned height)
364 {
365    unreachable("z16_s8 packing/unpacking is not implemented.");
366 }
367 
368 void
util_format_z16_unorm_s8_uint_pack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)369 util_format_z16_unorm_s8_uint_pack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride,
370                                            const uint8_t *restrict src_row, unsigned src_stride,
371                                            unsigned width, unsigned height)
372 {
373    unreachable("z16_s8 packing/unpacking is not implemented.");
374 }
375 
376 void
util_format_z24_unorm_s8_uint_unpack_z_float(float * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)377 util_format_z24_unorm_s8_uint_unpack_z_float(float *restrict dst_row, unsigned dst_stride,
378                                                 const uint8_t *restrict src_row, unsigned src_stride,
379                                                 unsigned width, unsigned height)
380 {
381    unsigned x, y;
382    for(y = 0; y < height; ++y) {
383       float *dst = dst_row;
384       const uint32_t *src = (const uint32_t *)src_row;
385       for(x = 0; x < width; ++x) {
386          *dst++ = z24_unorm_to_z32_float((*src++) & 0xffffff);
387       }
388       src_row += src_stride/sizeof(*src_row);
389       dst_row += dst_stride/sizeof(*dst_row);
390    }
391 }
392 
393 void
util_format_z24_unorm_s8_uint_pack_z_float(uint8_t * restrict dst_row,unsigned dst_stride,const float * restrict src_row,unsigned src_stride,unsigned width,unsigned height)394 util_format_z24_unorm_s8_uint_pack_z_float(uint8_t *restrict dst_row, unsigned dst_stride,
395                                               const float *restrict src_row, unsigned src_stride,
396                                               unsigned width, unsigned height)
397 {
398    unsigned x, y;
399    for(y = 0; y < height; ++y) {
400       const float *src = src_row;
401       uint32_t *dst = (uint32_t *)dst_row;
402       for(x = 0; x < width; ++x) {
403          uint32_t value = *dst;
404          value &= 0xff000000;
405          value |= z32_float_to_z24_unorm(*src++);
406          *dst++ = value;
407       }
408       dst_row += dst_stride/sizeof(*dst_row);
409       src_row += src_stride/sizeof(*src_row);
410    }
411 }
412 
413 
414 void
util_format_z24_unorm_s8_uint_unpack_z24(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)415 util_format_z24_unorm_s8_uint_unpack_z24(uint8_t *restrict dst_row, unsigned dst_stride,
416                                          const uint8_t *restrict src_row, unsigned src_stride,
417                                          unsigned width, unsigned height)
418 {
419    unsigned x, y;
420    for(y = 0; y < height; ++y) {
421       uint32_t *dst = (uint32_t *)dst_row;
422       const uint32_t *src = (const uint32_t *)src_row;
423       for(x = 0; x < width; ++x) {
424          *dst++ = ((*src++) & 0xffffff);
425       }
426       src_row += src_stride/sizeof(*src_row);
427       dst_row += dst_stride/sizeof(*dst_row);
428    }
429 }
430 
431 void
util_format_z24_unorm_s8_uint_pack_z24(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)432 util_format_z24_unorm_s8_uint_pack_z24(uint8_t *restrict dst_row, unsigned dst_stride,
433                                        const uint8_t *restrict src_row, unsigned src_stride,
434                                        unsigned width, unsigned height)
435 {
436    unsigned x, y;
437    for(y = 0; y < height; ++y) {
438       const uint32_t *src = (const uint32_t *)src_row;
439       uint32_t *dst = (uint32_t *)dst_row;
440       for(x = 0; x < width; ++x) {
441          uint32_t value = *dst;
442          value &= 0xff000000;
443          value |= *src & 0xffffff;
444          src++;
445          *dst++ = value;
446       }
447       dst_row += dst_stride/sizeof(*dst_row);
448       src_row += src_stride/sizeof(*src_row);
449    }
450 }
451 
452 void
util_format_z24_unorm_s8_uint_unpack_z_32unorm(uint32_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)453 util_format_z24_unorm_s8_uint_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned dst_stride,
454                                                   const uint8_t *restrict src_row, unsigned src_stride,
455                                                   unsigned width, unsigned height)
456 {
457    unsigned x, y;
458    for(y = 0; y < height; ++y) {
459       uint32_t *dst = dst_row;
460       const uint32_t *src = (const uint32_t *)src_row;
461       for(x = 0; x < width; ++x) {
462          *dst++ = z24_unorm_to_z32_unorm((*src++) & 0xffffff);
463       }
464       src_row += src_stride/sizeof(*src_row);
465       dst_row += dst_stride/sizeof(*dst_row);
466    }
467 }
468 
469 void
util_format_z24_unorm_s8_uint_pack_z_32unorm(uint8_t * restrict dst_row,unsigned dst_stride,const uint32_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)470 util_format_z24_unorm_s8_uint_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_stride,
471                                                 const uint32_t *restrict src_row, unsigned src_stride,
472                                                 unsigned width, unsigned height)
473 {
474    unsigned x, y;
475    for(y = 0; y < height; ++y) {
476       const uint32_t *src = src_row;
477       uint32_t *dst = (uint32_t *)dst_row;
478       for(x = 0; x < width; ++x) {
479          uint32_t value = *dst;
480          value &= 0xff000000;
481          value |= z32_unorm_to_z24_unorm(*src++);
482          *dst++ = value;
483       }
484       dst_row += dst_stride/sizeof(*dst_row);
485       src_row += src_stride/sizeof(*src_row);
486    }
487 }
488 
489 void
util_format_z24_unorm_s8_uint_unpack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)490 util_format_z24_unorm_s8_uint_unpack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride,
491                                                    const uint8_t *restrict src_row, unsigned src_stride,
492                                                    unsigned width, unsigned height)
493 {
494    unsigned x, y;
495    for(y = 0; y < height; ++y) {
496       uint8_t *dst = dst_row;
497       const uint32_t *src = (const uint32_t *)src_row;
498       for(x = 0; x < width; ++x) {
499          *dst++ = (*src++) >> 24;
500       }
501       src_row += src_stride/sizeof(*src_row);
502       dst_row += dst_stride/sizeof(*dst_row);
503    }
504 }
505 
506 void
util_format_z24_unorm_s8_uint_pack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)507 util_format_z24_unorm_s8_uint_pack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride,
508                                                  const uint8_t *restrict src_row, unsigned src_stride,
509                                                  unsigned width, unsigned height)
510 {
511    unsigned x, y;
512    for(y = 0; y < height; ++y) {
513       const uint8_t *src = src_row;
514       uint32_t *dst = (uint32_t *)dst_row;
515       for(x = 0; x < width; ++x) {
516          uint32_t value = util_le32_to_cpu(*dst);
517          value &= 0x00ffffff;
518          value |= (uint32_t)*src++ << 24;
519          *dst++ = value;
520       }
521       dst_row += dst_stride/sizeof(*dst_row);
522       src_row += src_stride/sizeof(*src_row);
523    }
524 }
525 
526 void
util_format_z24_unorm_s8_uint_pack_separate(uint8_t * restrict dst_row,unsigned dst_stride,const uint32_t * z_src_row,unsigned z_src_stride,const uint8_t * s_src_row,unsigned s_src_stride,unsigned width,unsigned height)527 util_format_z24_unorm_s8_uint_pack_separate(uint8_t *restrict dst_row, unsigned dst_stride,
528                                             const uint32_t *z_src_row, unsigned z_src_stride,
529                                             const uint8_t *s_src_row, unsigned s_src_stride,
530                                             unsigned width, unsigned height)
531 {
532    unsigned x, y;
533    for (y = 0; y < height; ++y) {
534       const uint32_t *z_src = z_src_row;
535       const uint8_t *s_src = s_src_row;
536       uint32_t *dst = (uint32_t *)dst_row;
537       for (x = 0; x < width; ++x) {
538          *dst++ = (*z_src++ & 0x00ffffff) | ((uint32_t)*s_src++ << 24);
539       }
540       dst_row += dst_stride / sizeof(*dst_row);
541       z_src_row += z_src_stride / sizeof(*z_src_row);
542       s_src_row += s_src_stride / sizeof(*s_src_row);
543    }
544 }
545 
546 void
util_format_s8_uint_z24_unorm_unpack_z_float(float * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)547 util_format_s8_uint_z24_unorm_unpack_z_float(float *restrict dst_row, unsigned dst_stride,
548                                                 const uint8_t *restrict src_row, unsigned src_stride,
549                                                 unsigned width, unsigned height)
550 {
551    unsigned x, y;
552    for(y = 0; y < height; ++y) {
553       float *dst = dst_row;
554       const uint32_t *src = (const uint32_t *)src_row;
555       for(x = 0; x < width; ++x) {
556          *dst++ = z24_unorm_to_z32_float((*src++) >> 8);
557       }
558       src_row += src_stride/sizeof(*src_row);
559       dst_row += dst_stride/sizeof(*dst_row);
560    }
561 }
562 
563 void
util_format_s8_uint_z24_unorm_pack_z_float(uint8_t * restrict dst_row,unsigned dst_stride,const float * restrict src_row,unsigned src_stride,unsigned width,unsigned height)564 util_format_s8_uint_z24_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned dst_stride,
565                                               const float *restrict src_row, unsigned src_stride,
566                                               unsigned width, unsigned height)
567 {
568    unsigned x, y;
569    for(y = 0; y < height; ++y) {
570       const float *src = src_row;
571       uint32_t *dst = (uint32_t *)dst_row;
572       for(x = 0; x < width; ++x) {
573          uint32_t value = *dst;
574          value &= 0x000000ff;
575          value |= z32_float_to_z24_unorm(*src++) << 8;
576          *dst++ = value;
577       }
578       dst_row += dst_stride/sizeof(*dst_row);
579       src_row += src_stride/sizeof(*src_row);
580    }
581 }
582 
583 void
util_format_s8_uint_z24_unorm_unpack_z_32unorm(uint32_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)584 util_format_s8_uint_z24_unorm_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned dst_stride,
585                                                   const uint8_t *restrict src_row, unsigned src_stride,
586                                                   unsigned width, unsigned height)
587 {
588    unsigned x, y;
589    for(y = 0; y < height; ++y) {
590       uint32_t *dst = dst_row;
591       const uint32_t *src = (const uint32_t *)src_row;
592       for(x = 0; x < width; ++x) {
593          uint32_t value = *src++;
594          *dst++ = z24_unorm_to_z32_unorm(value >> 8);
595       }
596       src_row += src_stride/sizeof(*src_row);
597       dst_row += dst_stride/sizeof(*dst_row);
598    }
599 }
600 
601 void
util_format_s8_uint_z24_unorm_pack_z_32unorm(uint8_t * restrict dst_row,unsigned dst_stride,const uint32_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)602 util_format_s8_uint_z24_unorm_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_stride,
603                                                 const uint32_t *restrict src_row, unsigned src_stride,
604                                                 unsigned width, unsigned height)
605 {
606    unsigned x, y;
607    for(y = 0; y < height; ++y) {
608       const uint32_t *src = src_row;
609       uint32_t *dst = (uint32_t *)dst_row;
610       for(x = 0; x < width; ++x) {
611          uint32_t value = *dst;
612          value &= 0x000000ff;
613          value |= *src++ & 0xffffff00;
614          *dst++ = value;
615       }
616       dst_row += dst_stride/sizeof(*dst_row);
617       src_row += src_stride/sizeof(*src_row);
618    }
619 }
620 
621 void
util_format_s8_uint_z24_unorm_unpack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)622 util_format_s8_uint_z24_unorm_unpack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride,
623                                                    const uint8_t *restrict src_row, unsigned src_stride,
624                                                    unsigned width, unsigned height)
625 {
626    unsigned x, y;
627    for(y = 0; y < height; ++y) {
628       uint8_t *dst = dst_row;
629       const uint32_t *src = (const uint32_t *)src_row;
630       for(x = 0; x < width; ++x) {
631          *dst++ = (*src++) & 0xff;
632       }
633       src_row += src_stride/sizeof(*src_row);
634       dst_row += dst_stride/sizeof(*dst_row);
635    }
636 }
637 
638 void
util_format_s8_uint_z24_unorm_pack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)639 util_format_s8_uint_z24_unorm_pack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride,
640                                                  const uint8_t *restrict src_row, unsigned src_stride,
641                                                  unsigned width, unsigned height)
642 {
643    unsigned x, y;
644    for(y = 0; y < height; ++y) {
645       const uint8_t *src = src_row;
646       uint32_t *dst = (uint32_t *)dst_row;
647       for(x = 0; x < width; ++x) {
648          uint32_t value = *dst;
649          value &= 0xffffff00;
650          value |= *src++;
651          *dst++ = value;
652       }
653       dst_row += dst_stride/sizeof(*dst_row);
654       src_row += src_stride/sizeof(*src_row);
655    }
656 }
657 
658 void
util_format_z24x8_unorm_unpack_z_float(float * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)659 util_format_z24x8_unorm_unpack_z_float(float *restrict dst_row, unsigned dst_stride,
660                                        const uint8_t *restrict src_row, unsigned src_stride,
661                                        unsigned width, unsigned height)
662 {
663    unsigned x, y;
664    for(y = 0; y < height; ++y) {
665       float *dst = dst_row;
666       const uint32_t *src = (const uint32_t *)src_row;
667       for(x = 0; x < width; ++x) {
668          *dst++ = z24_unorm_to_z32_float((*src++) & 0xffffff);
669       }
670       src_row += src_stride/sizeof(*src_row);
671       dst_row += dst_stride/sizeof(*dst_row);
672    }
673 }
674 
675 void
util_format_z24x8_unorm_pack_z_float(uint8_t * restrict dst_row,unsigned dst_stride,const float * restrict src_row,unsigned src_stride,unsigned width,unsigned height)676 util_format_z24x8_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned dst_stride,
677                                      const float *restrict src_row, unsigned src_stride,
678                                      unsigned width, unsigned height)
679 {
680    unsigned x, y;
681    for(y = 0; y < height; ++y) {
682       const float *src = src_row;
683       uint32_t *dst = (uint32_t *)dst_row;
684       for(x = 0; x < width; ++x) {
685          *dst++ = z32_float_to_z24_unorm(*src++);
686       }
687       dst_row += dst_stride/sizeof(*dst_row);
688       src_row += src_stride/sizeof(*src_row);
689    }
690 }
691 
692 void
util_format_z24x8_unorm_unpack_z_32unorm(uint32_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)693 util_format_z24x8_unorm_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned dst_stride,
694                                          const uint8_t *restrict src_row, unsigned src_stride,
695                                          unsigned width, unsigned height)
696 {
697    unsigned x, y;
698    for(y = 0; y < height; ++y) {
699       uint32_t *dst = dst_row;
700       const uint32_t *src = (const uint32_t *)src_row;
701       for(x = 0; x < width; ++x) {
702          *dst++ = z24_unorm_to_z32_unorm((*src++) & 0xffffff);
703       }
704       src_row += src_stride/sizeof(*src_row);
705       dst_row += dst_stride/sizeof(*dst_row);
706    }
707 }
708 
709 void
util_format_z24x8_unorm_pack_z_32unorm(uint8_t * restrict dst_row,unsigned dst_stride,const uint32_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)710 util_format_z24x8_unorm_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_stride,
711                                        const uint32_t *restrict src_row, unsigned src_stride,
712                                        unsigned width, unsigned height)
713 {
714    unsigned x, y;
715    for(y = 0; y < height; ++y) {
716       const uint32_t *src = src_row;
717       uint32_t *dst = (uint32_t *)dst_row;
718       for(x = 0; x < width; ++x) {
719          *dst++ = z32_unorm_to_z24_unorm(*src++);
720       }
721       dst_row += dst_stride/sizeof(*dst_row);
722       src_row += src_stride/sizeof(*src_row);
723    }
724 }
725 
726 void
util_format_x8z24_unorm_unpack_z_float(float * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)727 util_format_x8z24_unorm_unpack_z_float(float *restrict dst_row, unsigned dst_stride,
728                                        const uint8_t *restrict src_row, unsigned src_stride,
729                                        unsigned width, unsigned height)
730 {
731    unsigned x, y;
732    for(y = 0; y < height; ++y) {
733       float *dst = dst_row;
734       const uint32_t *src = (uint32_t *)src_row;
735       for(x = 0; x < width; ++x) {
736          *dst++ = z24_unorm_to_z32_float((*src++) >> 8);
737       }
738       src_row += src_stride/sizeof(*src_row);
739       dst_row += dst_stride/sizeof(*dst_row);
740    }
741 }
742 
743 void
util_format_x8z24_unorm_pack_z_float(uint8_t * restrict dst_row,unsigned dst_stride,const float * restrict src_row,unsigned src_stride,unsigned width,unsigned height)744 util_format_x8z24_unorm_pack_z_float(uint8_t *restrict dst_row, unsigned dst_stride,
745                                      const float *restrict src_row, unsigned src_stride,
746                                      unsigned width, unsigned height)
747 {
748    unsigned x, y;
749    for(y = 0; y < height; ++y) {
750       const float *src = src_row;
751       uint32_t *dst = (uint32_t *)dst_row;
752       for(x = 0; x < width; ++x) {
753          *dst++ = z32_float_to_z24_unorm(*src++) << 8;
754       }
755       dst_row += dst_stride/sizeof(*dst_row);
756       src_row += src_stride/sizeof(*src_row);
757    }
758 }
759 
760 void
util_format_x8z24_unorm_unpack_z_32unorm(uint32_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)761 util_format_x8z24_unorm_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned dst_stride,
762                                          const uint8_t *restrict src_row, unsigned src_stride,
763                                          unsigned width, unsigned height)
764 {
765    unsigned x, y;
766    for(y = 0; y < height; ++y) {
767       uint32_t *dst = dst_row;
768       const uint32_t *src = (const uint32_t *)src_row;
769       for(x = 0; x < width; ++x) {
770          *dst++ = z24_unorm_to_z32_unorm((*src++) >> 8);
771       }
772       src_row += src_stride/sizeof(*src_row);
773       dst_row += dst_stride/sizeof(*dst_row);
774    }
775 }
776 
777 void
util_format_x8z24_unorm_pack_z_32unorm(uint8_t * restrict dst_row,unsigned dst_stride,const uint32_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)778 util_format_x8z24_unorm_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_stride,
779                                        const uint32_t *restrict src_row, unsigned src_stride,
780                                        unsigned width, unsigned height)
781 {
782    unsigned x, y;
783    for(y = 0; y < height; ++y) {
784       const uint32_t *src = src_row;
785       uint32_t *dst = (uint32_t *)dst_row;
786       for(x = 0; x < width; ++x) {
787          *dst++ = z32_unorm_to_z24_unorm(*src++) << 8;
788       }
789       dst_row += dst_stride/sizeof(*dst_row);
790       src_row += src_stride/sizeof(*src_row);
791    }
792 }
793 
794 void
util_format_z32_float_s8x24_uint_unpack_z_float(float * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)795 util_format_z32_float_s8x24_uint_unpack_z_float(float *restrict dst_row, unsigned dst_stride,
796                                                    const uint8_t *restrict src_row, unsigned src_stride,
797                                                    unsigned width, unsigned height)
798 {
799    unsigned x, y;
800    for(y = 0; y < height; ++y) {
801       float *dst = dst_row;
802       const float *src = (const float *)src_row;
803       for(x = 0; x < width; ++x) {
804          *dst = *src;
805          src += 2;
806          dst += 1;
807       }
808       src_row += src_stride/sizeof(*src_row);
809       dst_row += dst_stride/sizeof(*dst_row);
810    }
811 }
812 
813 void
util_format_z32_float_s8x24_uint_pack_z_float(uint8_t * restrict dst_row,unsigned dst_stride,const float * restrict src_row,unsigned src_stride,unsigned width,unsigned height)814 util_format_z32_float_s8x24_uint_pack_z_float(uint8_t *restrict dst_row, unsigned dst_stride,
815                                                  const float *restrict src_row, unsigned src_stride,
816                                                  unsigned width, unsigned height)
817 {
818    unsigned x, y;
819    for(y = 0; y < height; ++y) {
820       const float *src = src_row;
821       float *dst = (float *)dst_row;
822       for(x = 0; x < width; ++x) {
823          *dst = *src;
824          src += 1;
825          dst += 2;
826       }
827       dst_row += dst_stride/sizeof(*dst_row);
828       src_row += src_stride/sizeof(*src_row);
829    }
830 }
831 
832 void
util_format_z32_float_s8x24_uint_unpack_z_32unorm(uint32_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)833 util_format_z32_float_s8x24_uint_unpack_z_32unorm(uint32_t *restrict dst_row, unsigned dst_stride,
834                                                      const uint8_t *restrict src_row, unsigned src_stride,
835                                                      unsigned width, unsigned height)
836 {
837    unsigned x, y;
838    for(y = 0; y < height; ++y) {
839       uint32_t *dst = dst_row;
840       const float *src = (const float *)src_row;
841       for(x = 0; x < width; ++x) {
842          *dst = z32_float_to_z32_unorm(CLAMP(*src, 0.0f, 1.0f));
843          src += 2;
844          dst += 1;
845       }
846       src_row += src_stride/sizeof(*src_row);
847       dst_row += dst_stride/sizeof(*dst_row);
848    }
849 }
850 
851 void
util_format_z32_float_s8x24_uint_pack_z_32unorm(uint8_t * restrict dst_row,unsigned dst_stride,const uint32_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)852 util_format_z32_float_s8x24_uint_pack_z_32unorm(uint8_t *restrict dst_row, unsigned dst_stride,
853                                                    const uint32_t *restrict src_row, unsigned src_stride,
854                                                    unsigned width, unsigned height)
855 {
856    unsigned x, y;
857    for(y = 0; y < height; ++y) {
858       const uint32_t *src = src_row;
859       float *dst = (float *)dst_row;
860       for(x = 0; x < width; ++x) {
861          *dst = z32_unorm_to_z32_float(*src++);
862          dst += 2;
863       }
864       dst_row += dst_stride/sizeof(*dst_row);
865       src_row += src_stride/sizeof(*src_row);
866    }
867 }
868 
869 void
util_format_z32_float_s8x24_uint_unpack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)870 util_format_z32_float_s8x24_uint_unpack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride,
871                                                       const uint8_t *restrict src_row, unsigned src_stride,
872                                                       unsigned width, unsigned height)
873 {
874    unsigned x, y;
875    for(y = 0; y < height; ++y) {
876       uint8_t *dst = dst_row;
877       const uint32_t *src = (uint32_t *)(src_row + 4);
878       for(x = 0; x < width; ++x) {
879          *dst = *src;
880          src += 2;
881          dst += 1;
882       }
883       src_row += src_stride/sizeof(*src_row);
884       dst_row += dst_stride/sizeof(*dst_row);
885    }
886 }
887 
888 void
util_format_z32_float_s8x24_uint_pack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)889 util_format_z32_float_s8x24_uint_pack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride,
890                                                     const uint8_t *restrict src_row, unsigned src_stride,
891                                                     unsigned width, unsigned height)
892 {
893    unsigned x, y;
894    for(y = 0; y < height; ++y) {
895       const uint8_t *src = src_row;
896       uint32_t *dst = ((uint32_t *)dst_row) + 1;
897       for(x = 0; x < width; ++x) {
898          *dst = *src;
899          src += 1;
900          dst += 2;
901       }
902       dst_row += dst_stride/sizeof(*dst_row);
903       src_row += src_stride/sizeof(*src_row);
904    }
905 }
906 
907 
908 void
util_format_x24s8_uint_unpack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)909 util_format_x24s8_uint_unpack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride, const uint8_t *restrict src_row, unsigned src_stride, unsigned width, unsigned height)
910 {
911    util_format_z24_unorm_s8_uint_unpack_s_8uint(dst_row, dst_stride,
912 						      src_row, src_stride,
913 						      width, height);
914 }
915 
916 void
util_format_x24s8_uint_pack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)917 util_format_x24s8_uint_pack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride, const uint8_t *restrict src_row, unsigned src_stride, unsigned width, unsigned height)
918 {
919    util_format_z24_unorm_s8_uint_pack_s_8uint(dst_row, dst_stride,
920 						    src_row, src_stride,
921 						    width, height);
922 }
923 
924 void
util_format_s8x24_uint_unpack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)925 util_format_s8x24_uint_unpack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride, const uint8_t *restrict src_row, unsigned src_stride, unsigned width, unsigned height)
926 {
927    util_format_s8_uint_z24_unorm_unpack_s_8uint(dst_row, dst_stride,
928 						      src_row, src_stride,
929 						      width, height);
930 }
931 
932 void
util_format_s8x24_uint_pack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)933 util_format_s8x24_uint_pack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride, const uint8_t *restrict src_row, unsigned src_stride, unsigned width, unsigned height)
934 {
935    util_format_s8_uint_z24_unorm_pack_s_8uint(dst_row, dst_stride,
936 						      src_row, src_stride,
937 						      width, height);
938 }
939 
940 void
util_format_x32_s8x24_uint_unpack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)941 util_format_x32_s8x24_uint_unpack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride,
942 						const uint8_t *restrict src_row, unsigned src_stride,
943 						unsigned width, unsigned height)
944 {
945    util_format_z32_float_s8x24_uint_unpack_s_8uint(dst_row, dst_stride,
946 							 src_row, src_stride,
947 							 width, height);
948 
949 }
950 
951 void
util_format_x32_s8x24_uint_pack_s_8uint(uint8_t * restrict dst_row,unsigned dst_stride,const uint8_t * restrict src_row,unsigned src_stride,unsigned width,unsigned height)952 util_format_x32_s8x24_uint_pack_s_8uint(uint8_t *restrict dst_row, unsigned dst_stride,
953 					      const uint8_t *restrict src_row, unsigned src_stride,
954 					      unsigned width, unsigned height)
955 {
956    util_format_z32_float_s8x24_uint_pack_s_8uint(dst_row, dst_stride,
957                                                        src_row, src_stride,
958 						       width, height);
959 }
960