1 /*
2 * This file is part of libc_utils.
3 *
4 * libc_utils is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * libc_utils is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with libc_utils; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 /*
20 * libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
21 * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
22 * (DLR).
23 */
24
25 /*
26 * Utilities for conversion between coordinates, Morton, and Peano indices
27 *
28 * Copyright (C) 2015-2020 Max-Planck-Society
29 * Author: Martin Reinecke
30 */
31
32 #include "ducc0/math/space_filling.h"
33
34 namespace ducc0 {
35
36 #ifndef DUCC0_USE_PDEP_PEXT
37
38 #if 1
39
40 namespace {
41
42 const uint16_t utab[] = {
43 #define Z(a) 0x##a##0, 0x##a##1, 0x##a##4, 0x##a##5
44 #define Y(a) Z(a##0), Z(a##1), Z(a##4), Z(a##5)
45 #define X(a) Y(a##0), Y(a##1), Y(a##4), Y(a##5)
46 X(0),X(1),X(4),X(5)
47 #undef X
48 #undef Y
49 #undef Z
50 };
51 static const uint16_t ctab[] = {
52 #define Z(a) a,a+1,a+256,a+257
53 #define Y(a) Z(a),Z(a+2),Z(a+512),Z(a+514)
54 #define X(a) Y(a),Y(a+4),Y(a+1024),Y(a+1028)
55 X(0),X(8),X(2048),X(2056)
56 #undef X
57 #undef Y
58 #undef Z
59 };
60
61 }
62
spread_bits_2D_32(uint32_t v)63 uint32_t spread_bits_2D_32 (uint32_t v)
64 {
65 using I=uint32_t;
66 return I(utab[ v &0xff]) | (I(utab[(v>> 8)&0xff])<<16);
67 }
spread_bits_2D_64(uint64_t v)68 uint64_t spread_bits_2D_64 (uint64_t v)
69 {
70 using I=uint64_t;
71 return I(utab[ v &0xff]) | (I(utab[(v>> 8)&0xff])<<16)
72 | (I(utab[(v>>16)&0xff])<<32) | (I(utab[(v>>24)&0xff])<<48);
73 }
block2morton2D_32(uint32_t v)74 uint32_t block2morton2D_32 (uint32_t v)
75 {
76 using I=uint32_t;
77 return I(utab[ v &0xff]) | (I(utab[(v>> 8)&0xff])<<16)
78 | (I(utab[(v>>16)&0xff])<<1) | (I(utab[(v>>24)&0xff])<<17);
79 }
coord2morton2D_32(std::array<uint32_t,2> xy)80 uint32_t coord2morton2D_32 (std::array<uint32_t,2> xy)
81 {
82 using I=uint32_t;
83 return I(utab[xy[0]&0xff]) | (I(utab[(xy[0]>>8)&0xff])<<16)
84 | (I(utab[xy[1]&0xff])<<1) | (I(utab[(xy[1]>>8)&0xff])<<17);
85 }
morton2block2D_32(uint32_t v)86 uint32_t morton2block2D_32 (uint32_t v)
87 {
88 using I=uint32_t;
89 I raw1 = v&0x55555555, raw2 = (v>>1)&0x55555555;
90 raw1|=raw1>>15;
91 raw2|=raw2>>15;
92 return I(ctab[raw1&0xff] ) | I(ctab[(raw1>>8)&0xff]<< 4)
93 | I(ctab[raw2&0xff]<<16) | I(ctab[(raw2>>8)&0xff]<<20);
94 }
morton2coord2D_32(uint32_t v)95 std::array<uint32_t,2> morton2coord2D_32 (uint32_t v)
96 {
97 using I=uint32_t;
98 I raw1 = v&0x55555555u, raw2 = (v>>1)&0x55555555u;
99 raw1|=raw1>>15;
100 raw2|=raw2>>15;
101 return {I(ctab[raw1&0xff]) | I(ctab[(raw1>>8)&0xff]<<4),
102 I(ctab[raw2&0xff]) | I(ctab[(raw2>>8)&0xff]<<4)};
103 }
block2morton2D_64(uint64_t v)104 uint64_t block2morton2D_64 (uint64_t v)
105 {
106 using I=uint64_t;
107 return I(utab[ v &0xff]) | (I(utab[(v>> 8)&0xff])<<16)
108 | (I(utab[(v>>16)&0xff])<<32) | (I(utab[(v>>24)&0xff])<<48)
109 | (I(utab[(v>>32)&0xff])<< 1) | (I(utab[(v>>40)&0xff])<<17)
110 | (I(utab[(v>>48)&0xff])<<33) | (I(utab[(v>>56)&0xff])<<49);
111 }
coord2morton2D_64(std::array<uint64_t,2> xy)112 uint64_t coord2morton2D_64 (std::array<uint64_t,2> xy)
113 {
114 using I=uint64_t;
115 return I(utab[ xy[0] &0xff]) | (I(utab[(xy[0]>> 8)&0xff])<<16)
116 | (I(utab[(xy[0]>>16)&0xff])<<32) | (I(utab[(xy[0]>>24)&0xff])<<48)
117 | (I(utab[ xy[1] &0xff])<< 1) | (I(utab[(xy[1]>> 8)&0xff])<<17)
118 | (I(utab[(xy[1]>>16)&0xff])<<33) | (I(utab[(xy[1]>>24)&0xff])<<49);
119 }
morton2block2D_64(uint64_t v)120 uint64_t morton2block2D_64 (uint64_t v)
121 {
122 using I=uint64_t;
123 I raw1 = v&0x5555555555555555, raw2 = (v>>1)&0x5555555555555555;
124 raw1|=raw1>>15;
125 raw2|=raw2>>15;
126 return I(ctab[ raw1 &0xff]) | (I(ctab[(raw1>> 8)&0xff])<< 4)
127 | (I(ctab[(raw1>>32)&0xff])<<16) | (I(ctab[(raw1>>40)&0xff])<<20)
128 | (I(ctab[ raw2 &0xff])<<32) | (I(ctab[(raw2>> 8)&0xff])<<36)
129 | (I(ctab[(raw2>>32)&0xff])<<48) | (I(ctab[(raw2>>40)&0xff])<<52);
130 }
morton2coord2D_64(uint64_t v)131 std::array<uint64_t,2> morton2coord2D_64 (uint64_t v)
132 {
133 using I=uint64_t;
134 I raw1 = v&0x5555555555555555, raw2 = (v>>1)&0x5555555555555555;
135 raw1|=raw1>>15;
136 raw2|=raw2>>15;
137 return { I(ctab[ raw1 &0xff]) | (I(ctab[(raw1>> 8)&0xff])<< 4)
138 | (I(ctab[(raw1>>32)&0xff])<<16) | (I(ctab[(raw1>>40)&0xff])<<20),
139 I(ctab[ raw2 &0xff]) | (I(ctab[(raw2>> 8)&0xff])<< 4)
140 | (I(ctab[(raw2>>32)&0xff])<<16) | (I(ctab[(raw2>>40)&0xff])<<20)};
141 }
142
143 #else
144 // alternative implementation, usually slower
145
146 static inline uint64_t spread2D_64 (uint64_t v)
147 {
148 v&=0xffffffffu;
149 v = (v|(v<<16)) & 0x0000ffff0000ffffu;
150 v = (v|(v<< 8)) & 0x00ff00ff00ff00ffu;
151 v = (v|(v<< 4)) & 0x0f0f0f0f0f0f0f0fu;
152 v = (v|(v<< 2)) & 0x3333333333333333u;
153 v = (v|(v<< 1)) & 0x5555555555555555u;
154 return v;
155 }
156 static inline uint64_t compress2D_64 (uint64_t v)
157 {
158 v&=0x5555555555555555u;
159 v = (v|(v>> 1)) & 0x3333333333333333u;
160 v = (v|(v>> 2)) & 0x0f0f0f0f0f0f0f0fu;
161 v = (v|(v>> 4)) & 0x00ff00ff00ff00ffu;
162 v = (v|(v>> 8)) & 0x0000ffff0000ffffu;
163 v = (v|(v>>16)) & 0x00000000ffffffffu;
164 return v;
165 }
166
167 uint32_t block2morton2D_32 (uint32_t v)
168 { uint64_t t=spread2D_64(v); return (t | (t>>31)) & 0xffffffffu; }
169 uint32_t coord2morton2D_32 (uint32_t x, uint32_t y)
170 {
171 uint64_t t=spread2D_64((x&0xffff)|(y<<16));
172 return (t | (t>>31)) & 0xffffffffu;
173 }
174 uint32_t morton2block2D_32 (uint32_t v)
175 { uint64_t t=v; t|=t<<31; t=compress2D_64(t); return t; }
176 void morton2coord2D_32 (uint32_t v, uint32_t *x, uint32_t *y)
177 {
178 uint64_t t=v; t|=t<<31; t=compress2D_64(t);
179 *x = t&0xffff;
180 *y = t>>16;
181 }
182 uint64_t block2morton2D_64 (uint64_t v)
183 { return spread2D_64(v) | (spread2D_64(v>>32)<<1); }
184 uint64_t coord2morton2D_64 (uint64_t x, uint64_t y)
185 { return spread2D_64(x) | (spread2D_64(y)<<1); }
186 uint64_t morton2block2D_64 (uint64_t v)
187 { return compress2D_64(v) | (compress2D_64(v>>1)<<32); }
188 void morton2coord2D_64 (uint64_t v, uint64_t *x, uint64_t *y)
189 { *x = compress2D_64(v); *y = compress2D_64(v>>1); }
190
191 #endif
192
193 namespace {
194
spread3D_32(uint32_t v)195 inline uint32_t spread3D_32 (uint32_t v)
196 {
197 v&=0x3ff;
198 v = (v|(v<< 8)|(v<<16)) & 0x0f00f00fu;
199 v = (v|(v<< 4)) & 0xc30c30c3u;
200 v = (v|(v<< 2)) & 0x49249249u;
201 return v;
202 }
compress3D_32(uint32_t v)203 inline uint32_t compress3D_32 (uint32_t v)
204 {
205 v&=0x9249249u;
206 v = (v|(v>> 2)) & 0xc30c30c3u;
207 v = (v|(v>> 4)) & 0x0f00f00fu;
208 v = (v|(v>> 8)|(v>>16)) & 0x3ffu;
209 return v;
210 }
spread3D_64(uint64_t v)211 inline uint64_t spread3D_64 (uint64_t v)
212 {
213 v&=0x1fffff;
214 v = (v|(v<<16)|(v<<32)) & 0x00ff0000ff0000ffu;
215 v = (v|(v<< 8)) & 0xf00f00f00f00f00fu;
216 v = (v|(v<< 4)) & 0x30c30c30c30c30c3u;
217 v = (v|(v<< 2)) & 0x9249249249249249u;
218 return v;
219 }
compress3D_64(uint64_t v)220 inline uint64_t compress3D_64 (uint64_t v)
221 {
222 v&=0x1249249249249249u;
223 v=(v|(v>> 2)) & 0x30c30c30c30c30c3u;
224 v=(v|(v>> 4)) & 0xf00f00f00f00f00fu;
225 v=(v|(v>> 8)) & 0x00ff0000ff0000ffu;
226 v=(v|(v>>16)|(v>>32)) & 0x1fffffu;
227 return v;
228 }
229
230 }
231
block2morton3D_32(uint32_t v)232 uint32_t block2morton3D_32 (uint32_t v)
233 {
234 uint32_t v2=v&0xfffff;
235 uint64_t v3=spread3D_64(v2);
236 v3=(v3|(v3>>29))&0x1fffffff;
237 return uint32_t(v3)|(spread3D_32(v>>20)<<2);
238 }
coord2morton3D_32(std::array<uint32_t,3> xyz)239 uint32_t coord2morton3D_32 (std::array<uint32_t,3> xyz)
240 {
241 uint32_t v2=(xyz[0]&0x3ff)|((xyz[1]&0x3ff)<<10);
242 uint64_t v3=spread3D_64(v2);
243 v3=(v3|(v3>>29))&0x1fffffff;
244 return uint32_t(v3)|(spread3D_32(xyz[2]&0x3ff)<<2);
245 }
morton2block3D_32(uint32_t v)246 uint32_t morton2block3D_32 (uint32_t v)
247 {
248 return compress3D_32(v)
249 | (compress3D_32(v>>1)<<10)
250 | (compress3D_32(v>>2)<<20);
251 }
morton2coord3D_32(uint32_t v)252 std::array<uint32_t,3> morton2coord3D_32 (uint32_t v)
253 {
254 return {compress3D_32(v),
255 compress3D_32(v>>1),
256 compress3D_32(v>>2)};
257 }
258
block2morton3D_64(uint64_t v)259 uint64_t block2morton3D_64 (uint64_t v)
260 {
261 return spread3D_64(v)
262 | (spread3D_64(v>>21)<<1)
263 | (spread3D_64(v>>42)<<2);
264 }
coord2morton3D_64(std::array<uint64_t,3> xyz)265 uint64_t coord2morton3D_64 (std::array<uint64_t,3> xyz)
266 {
267 return spread3D_64(xyz[0])
268 | (spread3D_64(xyz[1])<<1)
269 | (spread3D_64(xyz[2])<<2);
270 }
morton2block3D_64(uint64_t v)271 uint64_t morton2block3D_64 (uint64_t v)
272 {
273 return compress3D_64(v)
274 | (compress3D_64(v>>1)<<21)
275 | (compress3D_64(v>>2)<<42);
276 }
morton2coord3D_64(uint64_t v)277 std::array<uint64_t,3> morton2coord3D_64 (uint64_t v)
278 {
279 return { compress3D_64(v),
280 compress3D_64(v>>1),
281 compress3D_64(v>>2)};
282 }
283
284 #endif
285
286 namespace {
287
288 const uint8_t m2p3D[24][8]={
289 {144,119, 97,110, 43, 44, 98,109},
290 { 36, 35,101,106,127,152,102,105},
291 { 96, 27,135, 28,161,162,182,181},
292 {170,169,189,190, 19,104, 20,143},
293 {134,137,151,112,133,138, 12, 11},
294 {130,141, 3, 4,129,142,120,159},
295 {174,173,185,186,103, 60,128, 59},
296 { 52,111, 51,136,165,166,178,177},
297 {118,167,117, 92,153,168,154, 91},
298 {160,145, 83,146,175,126, 84,125},
299 {114, 75,113,176,157, 76,158,191},
300 { 68,149,183,150, 67,122,184,121},
301 { 16,139, 1, 2, 55,140, 14, 13},
302 {132, 63, 5, 6,131, 24, 10, 9},
303 { 70, 7, 81, 32, 69,124, 82,123},
304 {116, 77,115, 90, 15, 78, 40, 89},
305 { 38, 37, 23,108, 41, 42, 48,107},
306 { 34, 33, 99, 56, 45, 46,100, 31},
307 { 0, 73, 39, 94,155, 74,156, 93},
308 { 66,147, 85,148, 65, 8, 86, 47},
309 { 72, 71,187,188, 17, 62, 18, 61},
310 { 54, 25, 53, 26, 79, 64,180,179},
311 {172,171, 95, 80, 21, 58, 22, 57},
312 { 50, 29, 49, 30,163,164, 88, 87}};
313
314 const uint8_t p2m3D[24][8]={
315 {144, 98,102, 44, 45,111,107,113},
316 {157,111,107, 33, 32, 98,102,124},
317 { 96,164,165, 25, 27,183,182,130},
318 {109,169,168, 20, 22,186,187,143},
319 {115,137,141, 15, 14,132,128,146},
320 {126,132,128, 2, 3,137,141,159},
321 {134,186,187, 63, 61,169,168,100},
322 {139,183,182, 50, 48,164,165,105},
323 {173,156,158, 95, 91,114,112,161},
324 {160,145,147, 82, 86,127,125,172},
325 {179,114,112, 73, 77,156,158,191},
326 {190,127,125, 68, 64,145,147,178},
327 { 16, 2, 3,137,141, 15, 14, 52},
328 { 29, 15, 14,132,128, 2, 3, 57},
329 { 35, 82, 86,127,125, 68, 64, 1},
330 { 46, 95, 91,114,112, 73, 77, 12},
331 { 54, 44, 45,111,107, 33, 32, 18},
332 { 59, 33, 32, 98,102, 44, 45, 31},
333 { 0, 73, 77,156,158, 95, 91, 34},
334 { 13, 68, 64,145,147, 82, 86, 47},
335 { 72, 20, 22,186,187, 63, 61, 65},
336 { 69, 25, 27,183,182, 50, 48, 76},
337 { 83, 63, 61,169,168, 20, 22, 90},
338 { 94, 50, 48,164,165, 25, 27, 87}};
339
340 const uint8_t m2p2D_1[4][4] = {
341 { 4, 1, 11, 2},{0,15, 5, 6},{10,9,3,12},{14,7,13,8}};
342 uint8_t m2p2D_3[4][64];
343 const uint8_t p2m2D_1[4][4] = {
344 { 4, 1, 3, 10},{0,6,7,13},{15,9,8,2},{11,14,12,5}};
345 uint8_t p2m2D_3[4][64];
346 bool peano2d_done=false;
347
init_peano2d(void)348 void init_peano2d (void)
349 {
350 peano2d_done=true;
351
352 for (unsigned d=0; d<4; ++d)
353 for (uint32_t p=0; p<64; ++p)
354 {
355 unsigned rot = d;
356 uint32_t v = p<<26;
357 uint32_t res = 0;
358 for (unsigned i=0; i<3; ++i)
359 {
360 unsigned tab=m2p2D_1[rot][v>>30];
361 v<<=2;
362 res = (res<<2) | (tab&0x3);
363 rot = tab>>2;
364 }
365 m2p2D_3[d][p]=res|(rot<<6);
366 }
367 for (unsigned d=0; d<4; ++d)
368 for (uint32_t p=0; p<64; ++p)
369 {
370 unsigned rot = d;
371 uint32_t v = p<<26;
372 uint32_t res = 0;
373 for (unsigned i=0; i<3; ++i)
374 {
375 unsigned tab=p2m2D_1[rot][v>>30];
376 v<<=2;
377 res = (res<<2) | (tab&0x3);
378 rot = tab>>2;
379 }
380 p2m2D_3[d][p]=res|(rot<<6);
381 }
382 }
383
384 }
385
morton2peano3D_32(uint32_t v,unsigned bits)386 uint32_t morton2peano3D_32(uint32_t v, unsigned bits)
387 {
388 unsigned rot = 0;
389 uint32_t res = 0;
390 v<<=3*(10-bits)+2;
391 for (unsigned i=0; i<bits; ++i)
392 {
393 unsigned tab=m2p3D[rot][v>>29];
394 v<<=3;
395 res = (res<<3) | (tab&0x7);
396 rot = tab>>3;
397 }
398 return res;
399 }
peano2morton3D_32(uint32_t v,unsigned bits)400 uint32_t peano2morton3D_32(uint32_t v, unsigned bits)
401 {
402 unsigned rot = 0;
403 uint32_t res = 0;
404 v<<=3*(10-bits)+2;
405 for (unsigned i=0; i<bits; ++i)
406 {
407 unsigned tab=p2m3D[rot][v>>29];
408 v<<=3;
409 res = (res<<3) | (tab&0x7);
410 rot = tab>>3;
411 }
412 return res;
413 }
414
morton2peano3D_64(uint64_t v,unsigned bits)415 uint64_t morton2peano3D_64(uint64_t v, unsigned bits)
416 {
417 unsigned rot = 0;
418 uint64_t res = 0;
419 v<<=3*(21-bits)+1;
420 for (unsigned i=0; i<bits; ++i)
421 {
422 unsigned tab=m2p3D[rot][v>>61];
423 v<<=3;
424 res = (res<<3) | (tab&0x7);
425 rot = tab>>3;
426 }
427 return res;
428 }
peano2morton3D_64(uint64_t v,unsigned bits)429 uint64_t peano2morton3D_64(uint64_t v, unsigned bits)
430 {
431 unsigned rot = 0;
432 uint64_t res = 0;
433 v<<=3*(21-bits)+1;
434 for (unsigned i=0; i<bits; ++i)
435 {
436 unsigned tab=p2m3D[rot][v>>61];
437 v<<=3;
438 res = (res<<3) | (tab&0x7);
439 rot = tab>>3;
440 }
441 return res;
442 }
443
morton2peano2D_32(uint32_t v,unsigned bits)444 uint32_t morton2peano2D_32(uint32_t v, unsigned bits)
445 {
446 if (!peano2d_done) init_peano2d();
447 unsigned rot = 0;
448 uint32_t res = 0;
449 v<<=32-(bits<<1);
450 unsigned i=0;
451 for (; i+2<bits; i+=3)
452 {
453 unsigned tab=m2p2D_3[rot][v>>26];
454 v<<=6;
455 res = (res<<6) | (tab&0x3fu);
456 rot = tab>>6;
457 }
458 for (; i<bits; ++i)
459 {
460 unsigned tab=m2p2D_1[rot][v>>30];
461 v<<=2;
462 res = (res<<2) | (tab&0x3);
463 rot = tab>>2;
464 }
465 return res;
466 }
peano2morton2D_32(uint32_t v,unsigned bits)467 uint32_t peano2morton2D_32(uint32_t v, unsigned bits)
468 {
469 if (!peano2d_done) init_peano2d();
470 unsigned rot = 0;
471 uint32_t res = 0;
472 v<<=32-(bits<<1);
473 unsigned i=0;
474 for (; i+2<bits; i+=3)
475 {
476 unsigned tab=p2m2D_3[rot][v>>26];
477 v<<=6;
478 res = (res<<6) | (tab&0x3fu);
479 rot = tab>>6;
480 }
481 for (; i<bits; ++i)
482 {
483 unsigned tab=p2m2D_1[rot][v>>30];
484 v<<=2;
485 res = (res<<2) | (tab&0x3);
486 rot = tab>>2;
487 }
488 return res;
489 }
morton2peano2D_64(uint64_t v,unsigned bits)490 uint64_t morton2peano2D_64(uint64_t v, unsigned bits)
491 {
492 if (!peano2d_done) init_peano2d();
493 unsigned rot = 0;
494 uint64_t res = 0;
495 v<<=64-(bits<<1);
496 unsigned i=0;
497 for (; i+2<bits; i+=3)
498 {
499 unsigned tab=m2p2D_3[rot][v>>58];
500 v<<=6;
501 res = (res<<6) | (tab&0x3fu);
502 rot = tab>>6;
503 }
504 for (; i<bits; ++i)
505 {
506 unsigned tab=m2p2D_1[rot][v>>62];
507 v<<=2;
508 res = (res<<2) | (tab&0x3);
509 rot = tab>>2;
510 }
511 return res;
512 }
peano2morton2D_64(uint64_t v,unsigned bits)513 uint64_t peano2morton2D_64(uint64_t v, unsigned bits)
514 {
515 if (!peano2d_done) init_peano2d();
516 unsigned rot = 0;
517 uint64_t res = 0;
518 v<<=64-(bits<<1);
519 unsigned i=0;
520 for (; i+2<bits; i+=3)
521 {
522 unsigned tab=p2m2D_3[rot][v>>58];
523 v<<=6;
524 res = (res<<6) | (tab&0x3fu);
525 rot = tab>>6;
526 }
527 for (; i<bits; ++i)
528 {
529 unsigned tab=p2m2D_1[rot][v>>62];
530 v<<=2;
531 res = (res<<2) | (tab&0x3);
532 rot = tab>>2;
533 }
534 return res;
535 }
536
537 }
538