1 /*
2  *  This file is part of libc_utils.
3  *
4  *  libc_utils is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  libc_utils is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with libc_utils; if not, write to the Free Software
16  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 
19 /*
20  *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
21  *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
22  *  (DLR).
23  */
24 
25 /*
26  *  Utilities for conversion between coordinates, Morton, and Peano indices
27  *
28  *  Copyright (C) 2015-2020 Max-Planck-Society
29  *  Author: Martin Reinecke
30  */
31 
32 #include "ducc0/math/space_filling.h"
33 
34 namespace ducc0 {
35 
36 #ifndef DUCC0_USE_PDEP_PEXT
37 
38 #if 1
39 
40 namespace {
41 
42 const uint16_t utab[] = {
43 #define Z(a) 0x##a##0, 0x##a##1, 0x##a##4, 0x##a##5
44 #define Y(a) Z(a##0), Z(a##1), Z(a##4), Z(a##5)
45 #define X(a) Y(a##0), Y(a##1), Y(a##4), Y(a##5)
46 X(0),X(1),X(4),X(5)
47 #undef X
48 #undef Y
49 #undef Z
50 };
51 static const uint16_t ctab[] = {
52 #define Z(a) a,a+1,a+256,a+257
53 #define Y(a) Z(a),Z(a+2),Z(a+512),Z(a+514)
54 #define X(a) Y(a),Y(a+4),Y(a+1024),Y(a+1028)
55 X(0),X(8),X(2048),X(2056)
56 #undef X
57 #undef Y
58 #undef Z
59 };
60 
61 }
62 
spread_bits_2D_32(uint32_t v)63 uint32_t spread_bits_2D_32 (uint32_t v)
64   {
65   using I=uint32_t;
66   return  I(utab[ v     &0xff])     | (I(utab[(v>> 8)&0xff])<<16);
67   }
spread_bits_2D_64(uint64_t v)68 uint64_t spread_bits_2D_64 (uint64_t v)
69   {
70   using I=uint64_t;
71   return  I(utab[ v     &0xff])      | (I(utab[(v>> 8)&0xff])<<16)
72        | (I(utab[(v>>16)&0xff])<<32) | (I(utab[(v>>24)&0xff])<<48);
73   }
block2morton2D_32(uint32_t v)74 uint32_t block2morton2D_32 (uint32_t v)
75   {
76   using I=uint32_t;
77   return  I(utab[ v     &0xff])     | (I(utab[(v>> 8)&0xff])<<16)
78        | (I(utab[(v>>16)&0xff])<<1) | (I(utab[(v>>24)&0xff])<<17);
79   }
coord2morton2D_32(std::array<uint32_t,2> xy)80 uint32_t coord2morton2D_32 (std::array<uint32_t,2> xy)
81   {
82   using I=uint32_t;
83   return  I(utab[xy[0]&0xff])     | (I(utab[(xy[0]>>8)&0xff])<<16)
84        | (I(utab[xy[1]&0xff])<<1) | (I(utab[(xy[1]>>8)&0xff])<<17);
85   }
morton2block2D_32(uint32_t v)86 uint32_t morton2block2D_32 (uint32_t v)
87   {
88   using I=uint32_t;
89   I raw1 = v&0x55555555, raw2 = (v>>1)&0x55555555;
90   raw1|=raw1>>15;
91   raw2|=raw2>>15;
92   return I(ctab[raw1&0xff]    ) | I(ctab[(raw1>>8)&0xff]<< 4)
93        | I(ctab[raw2&0xff]<<16) | I(ctab[(raw2>>8)&0xff]<<20);
94   }
morton2coord2D_32(uint32_t v)95 std::array<uint32_t,2> morton2coord2D_32 (uint32_t v)
96   {
97   using I=uint32_t;
98   I raw1 = v&0x55555555u, raw2 = (v>>1)&0x55555555u;
99   raw1|=raw1>>15;
100   raw2|=raw2>>15;
101   return {I(ctab[raw1&0xff]) | I(ctab[(raw1>>8)&0xff]<<4),
102           I(ctab[raw2&0xff]) | I(ctab[(raw2>>8)&0xff]<<4)};
103   }
block2morton2D_64(uint64_t v)104 uint64_t block2morton2D_64 (uint64_t v)
105   {
106   using I=uint64_t;
107   return  I(utab[ v     &0xff])      | (I(utab[(v>> 8)&0xff])<<16)
108        | (I(utab[(v>>16)&0xff])<<32) | (I(utab[(v>>24)&0xff])<<48)
109        | (I(utab[(v>>32)&0xff])<< 1) | (I(utab[(v>>40)&0xff])<<17)
110        | (I(utab[(v>>48)&0xff])<<33) | (I(utab[(v>>56)&0xff])<<49);
111   }
coord2morton2D_64(std::array<uint64_t,2> xy)112 uint64_t coord2morton2D_64 (std::array<uint64_t,2> xy)
113   {
114   using I=uint64_t;
115   return  I(utab[ xy[0]     &0xff])      | (I(utab[(xy[0]>> 8)&0xff])<<16)
116        | (I(utab[(xy[0]>>16)&0xff])<<32) | (I(utab[(xy[0]>>24)&0xff])<<48)
117        | (I(utab[ xy[1]     &0xff])<< 1) | (I(utab[(xy[1]>> 8)&0xff])<<17)
118        | (I(utab[(xy[1]>>16)&0xff])<<33) | (I(utab[(xy[1]>>24)&0xff])<<49);
119   }
morton2block2D_64(uint64_t v)120 uint64_t morton2block2D_64 (uint64_t v)
121   {
122   using I=uint64_t;
123   I raw1 = v&0x5555555555555555, raw2 = (v>>1)&0x5555555555555555;
124   raw1|=raw1>>15;
125   raw2|=raw2>>15;
126   return I(ctab[ raw1     &0xff])      | (I(ctab[(raw1>> 8)&0xff])<< 4)
127       | (I(ctab[(raw1>>32)&0xff])<<16) | (I(ctab[(raw1>>40)&0xff])<<20)
128       | (I(ctab[ raw2     &0xff])<<32) | (I(ctab[(raw2>> 8)&0xff])<<36)
129       | (I(ctab[(raw2>>32)&0xff])<<48) | (I(ctab[(raw2>>40)&0xff])<<52);
130   }
morton2coord2D_64(uint64_t v)131 std::array<uint64_t,2> morton2coord2D_64 (uint64_t v)
132   {
133   using I=uint64_t;
134   I raw1 = v&0x5555555555555555, raw2 = (v>>1)&0x5555555555555555;
135   raw1|=raw1>>15;
136   raw2|=raw2>>15;
137   return { I(ctab[ raw1     &0xff])      | (I(ctab[(raw1>> 8)&0xff])<< 4)
138         | (I(ctab[(raw1>>32)&0xff])<<16) | (I(ctab[(raw1>>40)&0xff])<<20),
139            I(ctab[ raw2     &0xff])      | (I(ctab[(raw2>> 8)&0xff])<< 4)
140         | (I(ctab[(raw2>>32)&0xff])<<16) | (I(ctab[(raw2>>40)&0xff])<<20)};
141   }
142 
143 #else
144 // alternative implementation, usually slower
145 
146 static inline uint64_t spread2D_64 (uint64_t v)
147   {
148   v&=0xffffffffu;
149   v = (v|(v<<16)) & 0x0000ffff0000ffffu;
150   v = (v|(v<< 8)) & 0x00ff00ff00ff00ffu;
151   v = (v|(v<< 4)) & 0x0f0f0f0f0f0f0f0fu;
152   v = (v|(v<< 2)) & 0x3333333333333333u;
153   v = (v|(v<< 1)) & 0x5555555555555555u;
154   return v;
155   }
156 static inline uint64_t compress2D_64 (uint64_t v)
157   {
158   v&=0x5555555555555555u;
159   v = (v|(v>> 1)) & 0x3333333333333333u;
160   v = (v|(v>> 2)) & 0x0f0f0f0f0f0f0f0fu;
161   v = (v|(v>> 4)) & 0x00ff00ff00ff00ffu;
162   v = (v|(v>> 8)) & 0x0000ffff0000ffffu;
163   v = (v|(v>>16)) & 0x00000000ffffffffu;
164   return v;
165   }
166 
167 uint32_t block2morton2D_32 (uint32_t v)
168   { uint64_t t=spread2D_64(v); return (t | (t>>31)) & 0xffffffffu; }
169 uint32_t coord2morton2D_32 (uint32_t x, uint32_t y)
170   {
171   uint64_t t=spread2D_64((x&0xffff)|(y<<16));
172   return (t | (t>>31)) & 0xffffffffu;
173   }
174 uint32_t morton2block2D_32 (uint32_t v)
175   { uint64_t t=v; t|=t<<31; t=compress2D_64(t); return t; }
176 void morton2coord2D_32 (uint32_t v, uint32_t *x, uint32_t *y)
177   {
178   uint64_t t=v; t|=t<<31; t=compress2D_64(t);
179   *x = t&0xffff;
180   *y = t>>16;
181   }
182 uint64_t block2morton2D_64 (uint64_t v)
183   { return spread2D_64(v) | (spread2D_64(v>>32)<<1); }
184 uint64_t coord2morton2D_64 (uint64_t x, uint64_t y)
185   { return spread2D_64(x) | (spread2D_64(y)<<1); }
186 uint64_t morton2block2D_64 (uint64_t v)
187   { return compress2D_64(v) | (compress2D_64(v>>1)<<32); }
188 void morton2coord2D_64 (uint64_t v, uint64_t *x, uint64_t *y)
189   { *x = compress2D_64(v); *y = compress2D_64(v>>1); }
190 
191 #endif
192 
193 namespace {
194 
spread3D_32(uint32_t v)195 inline uint32_t spread3D_32 (uint32_t v)
196   {
197   v&=0x3ff;
198   v = (v|(v<< 8)|(v<<16)) & 0x0f00f00fu;
199   v = (v|(v<< 4)) & 0xc30c30c3u;
200   v = (v|(v<< 2)) & 0x49249249u;
201   return v;
202   }
compress3D_32(uint32_t v)203 inline uint32_t compress3D_32 (uint32_t v)
204   {
205   v&=0x9249249u;
206   v = (v|(v>> 2)) & 0xc30c30c3u;
207   v = (v|(v>> 4)) & 0x0f00f00fu;
208   v = (v|(v>> 8)|(v>>16)) & 0x3ffu;
209   return v;
210   }
spread3D_64(uint64_t v)211 inline uint64_t spread3D_64 (uint64_t v)
212   {
213   v&=0x1fffff;
214   v = (v|(v<<16)|(v<<32)) & 0x00ff0000ff0000ffu;
215   v = (v|(v<< 8)) & 0xf00f00f00f00f00fu;
216   v = (v|(v<< 4)) & 0x30c30c30c30c30c3u;
217   v = (v|(v<< 2)) & 0x9249249249249249u;
218   return v;
219   }
compress3D_64(uint64_t v)220 inline uint64_t compress3D_64 (uint64_t v)
221   {
222   v&=0x1249249249249249u;
223   v=(v|(v>> 2)) & 0x30c30c30c30c30c3u;
224   v=(v|(v>> 4)) & 0xf00f00f00f00f00fu;
225   v=(v|(v>> 8)) & 0x00ff0000ff0000ffu;
226   v=(v|(v>>16)|(v>>32)) & 0x1fffffu;
227   return v;
228   }
229 
230 }
231 
block2morton3D_32(uint32_t v)232 uint32_t block2morton3D_32 (uint32_t v)
233   {
234   uint32_t v2=v&0xfffff;
235   uint64_t v3=spread3D_64(v2);
236   v3=(v3|(v3>>29))&0x1fffffff;
237   return uint32_t(v3)|(spread3D_32(v>>20)<<2);
238   }
coord2morton3D_32(std::array<uint32_t,3> xyz)239 uint32_t coord2morton3D_32 (std::array<uint32_t,3> xyz)
240   {
241   uint32_t v2=(xyz[0]&0x3ff)|((xyz[1]&0x3ff)<<10);
242   uint64_t v3=spread3D_64(v2);
243   v3=(v3|(v3>>29))&0x1fffffff;
244   return uint32_t(v3)|(spread3D_32(xyz[2]&0x3ff)<<2);
245   }
morton2block3D_32(uint32_t v)246 uint32_t morton2block3D_32 (uint32_t v)
247   {
248   return compress3D_32(v)
249       | (compress3D_32(v>>1)<<10)
250       | (compress3D_32(v>>2)<<20);
251   }
morton2coord3D_32(uint32_t v)252 std::array<uint32_t,3> morton2coord3D_32 (uint32_t v)
253   {
254   return {compress3D_32(v),
255           compress3D_32(v>>1),
256           compress3D_32(v>>2)};
257   }
258 
block2morton3D_64(uint64_t v)259 uint64_t block2morton3D_64 (uint64_t v)
260   {
261   return spread3D_64(v)
262       | (spread3D_64(v>>21)<<1)
263       | (spread3D_64(v>>42)<<2);
264   }
coord2morton3D_64(std::array<uint64_t,3> xyz)265 uint64_t coord2morton3D_64 (std::array<uint64_t,3> xyz)
266   {
267   return spread3D_64(xyz[0])
268       | (spread3D_64(xyz[1])<<1)
269       | (spread3D_64(xyz[2])<<2);
270   }
morton2block3D_64(uint64_t v)271 uint64_t morton2block3D_64 (uint64_t v)
272   {
273   return compress3D_64(v)
274       | (compress3D_64(v>>1)<<21)
275       | (compress3D_64(v>>2)<<42);
276   }
morton2coord3D_64(uint64_t v)277 std::array<uint64_t,3> morton2coord3D_64 (uint64_t v)
278   {
279   return { compress3D_64(v),
280            compress3D_64(v>>1),
281            compress3D_64(v>>2)};
282   }
283 
284 #endif
285 
286 namespace {
287 
288 const uint8_t m2p3D[24][8]={
289 {144,119, 97,110, 43, 44, 98,109},
290 { 36, 35,101,106,127,152,102,105},
291 { 96, 27,135, 28,161,162,182,181},
292 {170,169,189,190, 19,104, 20,143},
293 {134,137,151,112,133,138, 12, 11},
294 {130,141,  3,  4,129,142,120,159},
295 {174,173,185,186,103, 60,128, 59},
296 { 52,111, 51,136,165,166,178,177},
297 {118,167,117, 92,153,168,154, 91},
298 {160,145, 83,146,175,126, 84,125},
299 {114, 75,113,176,157, 76,158,191},
300 { 68,149,183,150, 67,122,184,121},
301 { 16,139,  1,  2, 55,140, 14, 13},
302 {132, 63,  5,  6,131, 24, 10,  9},
303 { 70,  7, 81, 32, 69,124, 82,123},
304 {116, 77,115, 90, 15, 78, 40, 89},
305 { 38, 37, 23,108, 41, 42, 48,107},
306 { 34, 33, 99, 56, 45, 46,100, 31},
307 {  0, 73, 39, 94,155, 74,156, 93},
308 { 66,147, 85,148, 65,  8, 86, 47},
309 { 72, 71,187,188, 17, 62, 18, 61},
310 { 54, 25, 53, 26, 79, 64,180,179},
311 {172,171, 95, 80, 21, 58, 22, 57},
312 { 50, 29, 49, 30,163,164, 88, 87}};
313 
314 const uint8_t p2m3D[24][8]={
315 {144, 98,102, 44, 45,111,107,113},
316 {157,111,107, 33, 32, 98,102,124},
317 { 96,164,165, 25, 27,183,182,130},
318 {109,169,168, 20, 22,186,187,143},
319 {115,137,141, 15, 14,132,128,146},
320 {126,132,128,  2,  3,137,141,159},
321 {134,186,187, 63, 61,169,168,100},
322 {139,183,182, 50, 48,164,165,105},
323 {173,156,158, 95, 91,114,112,161},
324 {160,145,147, 82, 86,127,125,172},
325 {179,114,112, 73, 77,156,158,191},
326 {190,127,125, 68, 64,145,147,178},
327 { 16,  2,  3,137,141, 15, 14, 52},
328 { 29, 15, 14,132,128,  2,  3, 57},
329 { 35, 82, 86,127,125, 68, 64,  1},
330 { 46, 95, 91,114,112, 73, 77, 12},
331 { 54, 44, 45,111,107, 33, 32, 18},
332 { 59, 33, 32, 98,102, 44, 45, 31},
333 {  0, 73, 77,156,158, 95, 91, 34},
334 { 13, 68, 64,145,147, 82, 86, 47},
335 { 72, 20, 22,186,187, 63, 61, 65},
336 { 69, 25, 27,183,182, 50, 48, 76},
337 { 83, 63, 61,169,168, 20, 22, 90},
338 { 94, 50, 48,164,165, 25, 27, 87}};
339 
340 const uint8_t m2p2D_1[4][4] = {
341 { 4, 1, 11, 2},{0,15, 5, 6},{10,9,3,12},{14,7,13,8}};
342 uint8_t m2p2D_3[4][64];
343 const uint8_t p2m2D_1[4][4] = {
344 { 4, 1, 3, 10},{0,6,7,13},{15,9,8,2},{11,14,12,5}};
345 uint8_t p2m2D_3[4][64];
346 bool peano2d_done=false;
347 
init_peano2d(void)348 void init_peano2d (void)
349   {
350   peano2d_done=true;
351 
352   for (unsigned d=0; d<4; ++d)
353     for (uint32_t p=0; p<64; ++p)
354       {
355       unsigned rot = d;
356       uint32_t v = p<<26;
357       uint32_t res = 0;
358       for (unsigned i=0; i<3; ++i)
359         {
360         unsigned tab=m2p2D_1[rot][v>>30];
361         v<<=2;
362         res = (res<<2) | (tab&0x3);
363         rot = tab>>2;
364         }
365       m2p2D_3[d][p]=res|(rot<<6);
366       }
367   for (unsigned d=0; d<4; ++d)
368     for (uint32_t p=0; p<64; ++p)
369       {
370       unsigned rot = d;
371       uint32_t v = p<<26;
372       uint32_t res = 0;
373       for (unsigned i=0; i<3; ++i)
374         {
375         unsigned tab=p2m2D_1[rot][v>>30];
376         v<<=2;
377         res = (res<<2) | (tab&0x3);
378         rot = tab>>2;
379         }
380       p2m2D_3[d][p]=res|(rot<<6);
381       }
382   }
383 
384 }
385 
morton2peano3D_32(uint32_t v,unsigned bits)386 uint32_t morton2peano3D_32(uint32_t v, unsigned bits)
387   {
388   unsigned rot = 0;
389   uint32_t res = 0;
390   v<<=3*(10-bits)+2;
391   for (unsigned i=0; i<bits; ++i)
392     {
393     unsigned tab=m2p3D[rot][v>>29];
394     v<<=3;
395     res = (res<<3) | (tab&0x7);
396     rot = tab>>3;
397     }
398   return res;
399   }
peano2morton3D_32(uint32_t v,unsigned bits)400 uint32_t peano2morton3D_32(uint32_t v, unsigned bits)
401   {
402   unsigned rot = 0;
403   uint32_t res = 0;
404   v<<=3*(10-bits)+2;
405   for (unsigned i=0; i<bits; ++i)
406     {
407     unsigned tab=p2m3D[rot][v>>29];
408     v<<=3;
409     res = (res<<3) | (tab&0x7);
410     rot = tab>>3;
411     }
412   return res;
413   }
414 
morton2peano3D_64(uint64_t v,unsigned bits)415 uint64_t morton2peano3D_64(uint64_t v, unsigned bits)
416   {
417   unsigned rot = 0;
418   uint64_t res = 0;
419   v<<=3*(21-bits)+1;
420   for (unsigned i=0; i<bits; ++i)
421     {
422     unsigned tab=m2p3D[rot][v>>61];
423     v<<=3;
424     res = (res<<3) | (tab&0x7);
425     rot = tab>>3;
426     }
427   return res;
428   }
peano2morton3D_64(uint64_t v,unsigned bits)429 uint64_t peano2morton3D_64(uint64_t v, unsigned bits)
430   {
431   unsigned rot = 0;
432   uint64_t res = 0;
433   v<<=3*(21-bits)+1;
434   for (unsigned i=0; i<bits; ++i)
435     {
436     unsigned tab=p2m3D[rot][v>>61];
437     v<<=3;
438     res = (res<<3) | (tab&0x7);
439     rot = tab>>3;
440     }
441   return res;
442   }
443 
morton2peano2D_32(uint32_t v,unsigned bits)444 uint32_t morton2peano2D_32(uint32_t v, unsigned bits)
445   {
446   if (!peano2d_done) init_peano2d();
447   unsigned rot = 0;
448   uint32_t res = 0;
449   v<<=32-(bits<<1);
450   unsigned i=0;
451   for (; i+2<bits; i+=3)
452     {
453     unsigned tab=m2p2D_3[rot][v>>26];
454     v<<=6;
455     res = (res<<6) | (tab&0x3fu);
456     rot = tab>>6;
457     }
458   for (; i<bits; ++i)
459     {
460     unsigned tab=m2p2D_1[rot][v>>30];
461     v<<=2;
462     res = (res<<2) | (tab&0x3);
463     rot = tab>>2;
464     }
465   return res;
466   }
peano2morton2D_32(uint32_t v,unsigned bits)467 uint32_t peano2morton2D_32(uint32_t v, unsigned bits)
468   {
469   if (!peano2d_done) init_peano2d();
470   unsigned rot = 0;
471   uint32_t res = 0;
472   v<<=32-(bits<<1);
473   unsigned i=0;
474   for (; i+2<bits; i+=3)
475     {
476     unsigned tab=p2m2D_3[rot][v>>26];
477     v<<=6;
478     res = (res<<6) | (tab&0x3fu);
479     rot = tab>>6;
480     }
481   for (; i<bits; ++i)
482     {
483     unsigned tab=p2m2D_1[rot][v>>30];
484     v<<=2;
485     res = (res<<2) | (tab&0x3);
486     rot = tab>>2;
487     }
488   return res;
489   }
morton2peano2D_64(uint64_t v,unsigned bits)490 uint64_t morton2peano2D_64(uint64_t v, unsigned bits)
491   {
492   if (!peano2d_done) init_peano2d();
493   unsigned rot = 0;
494   uint64_t res = 0;
495   v<<=64-(bits<<1);
496   unsigned i=0;
497   for (; i+2<bits; i+=3)
498     {
499     unsigned tab=m2p2D_3[rot][v>>58];
500     v<<=6;
501     res = (res<<6) | (tab&0x3fu);
502     rot = tab>>6;
503     }
504   for (; i<bits; ++i)
505     {
506     unsigned tab=m2p2D_1[rot][v>>62];
507     v<<=2;
508     res = (res<<2) | (tab&0x3);
509     rot = tab>>2;
510     }
511   return res;
512   }
peano2morton2D_64(uint64_t v,unsigned bits)513 uint64_t peano2morton2D_64(uint64_t v, unsigned bits)
514   {
515   if (!peano2d_done) init_peano2d();
516   unsigned rot = 0;
517   uint64_t res = 0;
518   v<<=64-(bits<<1);
519   unsigned i=0;
520   for (; i+2<bits; i+=3)
521     {
522     unsigned tab=p2m2D_3[rot][v>>58];
523     v<<=6;
524     res = (res<<6) | (tab&0x3fu);
525     rot = tab>>6;
526     }
527   for (; i<bits; ++i)
528     {
529     unsigned tab=p2m2D_1[rot][v>>62];
530     v<<=2;
531     res = (res<<2) | (tab&0x3);
532     rot = tab>>2;
533     }
534   return res;
535   }
536 
537 }
538