1 /* 2 =============================================================================== 3 4 FILE: io.hpp 5 6 CONTENTS: 7 LAZ io 8 9 PROGRAMMERS: 10 11 martin.isenburg@rapidlasso.com - http://rapidlasso.com 12 uday.karan@gmail.com - Hobu, Inc. 13 14 COPYRIGHT: 15 16 (c) 2007-2014, martin isenburg, rapidlasso - tools to catch reality 17 (c) 2014, Uday Verma, Hobu, Inc. 18 19 This is free software; you can redistribute and/or modify it under the 20 terms of the GNU Lesser General Licence as published by the Free Software 21 Foundation. See the COPYING file for more information. 22 23 This software is distributed WITHOUT ANY WARRANTY and without even the 24 implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 25 26 CHANGE HISTORY: 27 28 =============================================================================== 29 */ 30 31 #ifndef __io_hpp__ 32 #define __io_hpp__ 33 34 #include <fstream> 35 #include <functional> 36 #include <limits> 37 #include <string.h> 38 #include <mutex> 39 40 #include "formats.hpp" 41 #include "excepts.hpp" 42 #include "factory.hpp" 43 #include "decoder.hpp" 44 #include "encoder.hpp" 45 #include "util.hpp" 46 #include "portable_endian.hpp" 47 48 namespace laszip { 49 // A simple datastructure to get input from the user 50 template< 51 typename T 52 > 53 struct vector3 { 54 T x, y, z; 55 vector3laszip::vector356 vector3() : x(0), y(0), z(0) {} vector3laszip::vector357 vector3(const T& _x, const T& _y, const T& _z) : 58 x(_x), y(_y), z(_z) { 59 } 60 61 }; 62 63 #define DefaultChunkSize 50000 64 65 namespace io { 66 // LAZ file header 67 #pragma pack(push, 1) 68 struct header { 69 char magic[4]; 70 unsigned short file_source_id; 71 unsigned short global_encoding; 72 char guid[16]; 73 74 struct { 75 unsigned char major; 76 unsigned char minor; 77 } version; 78 79 char system_identifier[32]; 80 char generating_software[32]; 81 82 struct { 83 unsigned short day; 84 unsigned short year; 85 } creation; 86 87 unsigned short header_size; 88 unsigned int point_offset; 89 unsigned int vlr_count; 90 91 unsigned char point_format_id; 92 unsigned short point_record_length; 93 94 unsigned int point_count; 95 unsigned int points_by_return[5]; 96 97 struct { 98 double x, y, z; 99 } scale; 100 101 struct { 102 double x, y, z; 103 } offset; 104 105 struct { 106 double x, y, z; 107 } minimum; 108 109 struct { 110 double x, y, z; 111 } maximum; 112 }; 113 114 // A Single LAZ Item representation 115 struct laz_item { 116 unsigned short type, 117 size, 118 version; 119 }; 120 121 struct laz_vlr { 122 uint16_t compressor; 123 uint16_t coder; 124 125 struct { 126 unsigned char major; 127 unsigned char minor; 128 uint16_t revision; 129 } version; 130 131 uint32_t options; 132 uint32_t chunk_size; 133 134 int64_t num_points, 135 num_bytes; 136 137 uint16_t num_items; 138 laz_item *items; laz_vlrlaszip::io::laz_vlr139 laz_vlr() : num_items(0), items(NULL) {} ~laz_vlrlaszip::io::laz_vlr140 ~laz_vlr() { 141 delete [] items; 142 } 143 laz_vlrlaszip::io::laz_vlr144 laz_vlr(const char *data) { 145 items = NULL; 146 fill(data); 147 } 148 sizelaszip::io::laz_vlr149 size_t size() const { 150 return sizeof(laz_vlr) - sizeof(laz_item *) + 151 (num_items * sizeof(laz_item)); 152 } 153 laz_vlrlaszip::io::laz_vlr154 laz_vlr(const laz_vlr& rhs) { 155 compressor = rhs.compressor; 156 coder = rhs.coder; 157 158 // the version we're compatible with 159 version.major = rhs.version.major; 160 version.minor = rhs.version.minor; 161 version.revision = rhs.version.revision; 162 163 options = rhs.options; 164 chunk_size = rhs.chunk_size; 165 166 num_points = rhs.num_points; 167 num_bytes = rhs.num_bytes; 168 169 num_items = rhs.num_items; 170 if (rhs.items) { 171 items = new laz_item[num_items]; 172 for (int i = 0 ; i < num_items ; i ++) { 173 items[i] = rhs.items[i]; 174 } 175 } 176 } 177 operator =laszip::io::laz_vlr178 laz_vlr& operator = (const laz_vlr& rhs) { 179 if (this == &rhs) 180 return *this; 181 182 compressor = rhs.compressor; 183 coder = rhs.coder; 184 185 // the version we're compatible with 186 version.major = rhs.version.major; 187 version.minor = rhs.version.minor; 188 version.revision = rhs.version.revision; 189 190 options = rhs.options; 191 chunk_size = rhs.chunk_size; 192 193 num_points = rhs.num_points; 194 num_bytes = rhs.num_bytes; 195 196 num_items = rhs.num_items; 197 if (rhs.items) { 198 items = new laz_item[num_items]; 199 for (int i = 0 ; i < num_items ; i ++) { 200 items[i] = rhs.items[i]; 201 } 202 } 203 204 return *this; 205 } 206 filllaszip::io::laz_vlr207 void fill(const char *data) { 208 std::copy(data, data + sizeof(compressor), (char *)&compressor); 209 compressor = le16toh(compressor); 210 data += sizeof(compressor); 211 212 std::copy(data, data + sizeof(coder), (char *)&coder); 213 coder = le16toh(coder); 214 data += sizeof(coder); 215 216 version.major = *(const unsigned char *)data++; 217 version.minor = *(const unsigned char *)data++; 218 219 std::copy(data, data + sizeof(version.revision), (char *)&version.revision); 220 version.revision = le16toh(version.revision); 221 data += sizeof(version.revision); 222 223 std::copy(data, data + sizeof(options), (char *)&options); 224 options = le32toh(options); 225 data += sizeof(options); 226 227 std::copy(data, data + sizeof(chunk_size), (char *)&chunk_size); 228 chunk_size = le32toh(chunk_size); 229 data += sizeof(chunk_size); 230 231 std::copy(data, data + sizeof(num_points), (char *)&num_points); 232 num_points = le64toh(num_points); 233 data += sizeof(num_points); 234 235 std::copy(data, data + sizeof(num_bytes), (char *)&num_bytes); 236 num_bytes = le64toh(num_bytes); 237 data += sizeof(num_bytes); 238 239 std::copy(data, data + sizeof(num_items), (char *)&num_items); 240 num_items = le16toh(num_items); 241 data += sizeof(num_items); 242 243 delete [] items; 244 items = new laz_item[num_items]; 245 for (int i = 0 ; i < num_items ; i ++) { 246 laz_item& item = items[i]; 247 248 std::copy(data, data + sizeof(item.type), (char *)&item.type); 249 item.type = le16toh(item.type); 250 data += sizeof(item.type); 251 252 std::copy(data, data + sizeof(item.size), (char *)&item.size); 253 item.size = le16toh(item.size); 254 data += sizeof(item.size); 255 256 std::copy(data, data + sizeof(item.version), (char *)&item.version); 257 item.version = le16toh(item.version); 258 data += sizeof(item.version); 259 } 260 } 261 extractlaszip::io::laz_vlr262 void extract(char *data) { 263 uint16_t s; 264 uint32_t i; 265 uint64_t ll; 266 char *src; 267 268 s = htole16(compressor); 269 src = (char *)&s; 270 std::copy(src, src + sizeof(compressor), data); 271 data += sizeof(compressor); 272 273 s = htole16(coder); 274 src = (char *)&s; 275 std::copy(src, src + sizeof(coder), data); 276 data += sizeof(coder); 277 278 *data++ = version.major; 279 *data++ = version.minor; 280 281 s = htole16(version.revision); 282 src = (char *)&s; 283 std::copy(src, src + sizeof(version.revision), data); 284 data += sizeof(version.revision); 285 286 i = htole32(options); 287 src = (char *)&i; 288 std::copy(src, src + sizeof(options), data); 289 data += sizeof(options); 290 291 i = htole32(chunk_size); 292 src = (char *)&i; 293 std::copy(src, src + sizeof(chunk_size), data); 294 data += sizeof(chunk_size); 295 296 ll = htole64(num_points); 297 src = (char *)≪ 298 std::copy(src, src + sizeof(num_points), data); 299 data += sizeof(num_points); 300 301 ll = htole64(num_bytes); 302 src = (char *)≪ 303 std::copy(src, src + sizeof(num_bytes), data); 304 data += sizeof(num_bytes); 305 306 s = htole16(num_items); 307 src = (char *)&s; 308 std::copy(src, src + sizeof(num_items), data); 309 data += sizeof(num_items); 310 311 for (int k = 0 ; k < num_items ; k ++) { 312 laz_item& item = items[k]; 313 314 s = htole16(item.type); 315 src = (char *)&s; 316 std::copy(src, src + sizeof(item.type), data); 317 data += sizeof(item.type); 318 319 s = htole16(item.size); 320 src = (char *)&s; 321 std::copy(src, src + sizeof(item.size), data); 322 data += sizeof(item.size); 323 324 s = htole16(item.version); 325 src = (char *)&s; 326 std::copy(src, src + sizeof(item.version), data); 327 data += sizeof(item.version); 328 } 329 } 330 from_schemalaszip::io::laz_vlr331 static laz_vlr from_schema(const factory::record_schema& s, uint32_t chunksize = DefaultChunkSize) { 332 laz_vlr r; 333 334 // We only do pointwise chunking. 335 r.compressor = 2; 336 r.coder = 0; 337 338 // the version we're compatible with 339 r.version.major = 2; 340 r.version.minor = 2; 341 r.version.revision = 0; 342 343 r.options = 0; 344 r.chunk_size = chunksize; 345 346 r.num_points = -1; 347 r.num_bytes = -1; 348 349 r.num_items = static_cast<unsigned short>(s.records.size()); 350 r.items = new laz_item[s.records.size()]; 351 for (size_t i = 0 ; i < s.records.size() ; i ++) { 352 laz_item& item = r.items[i]; 353 const factory::record_item& rec = s.records.at(i); 354 355 item.type = static_cast<unsigned short>(rec.type); 356 item.size = static_cast<unsigned short>(rec.size); 357 item.version = static_cast<unsigned short>(rec.version); 358 } 359 360 return r; 361 } 362 to_schemalaszip::io::laz_vlr363 static factory::record_schema to_schema(const laz_vlr& vlr, int point_len) { 364 // convert the laszip items into record schema to be used by 365 // compressor/decompressor 366 367 using namespace factory; 368 factory::record_schema schema; 369 370 for(auto i = 0 ; i < vlr.num_items ; i++) { 371 laz_item& item = vlr.items[i]; 372 schema.push(factory::record_item(item.type, item.size, 373 item.version)); 374 point_len -= item.size; 375 } 376 if (point_len < 0) 377 throw laszip_format_unsupported(); 378 // Add extra bytes information 379 if (point_len) 380 schema.push(factory::record_item(record_item::BYTE, 381 point_len, 2)); 382 return schema; 383 } 384 385 #ifdef _WIN32 to_schemalaszip::io::laz_vlr386 __declspec(deprecated) static factory::record_schema to_schema(const laz_vlr& vlr) 387 #else 388 static factory::record_schema to_schema(const laz_vlr& vlr) __attribute__ ((deprecated)) 389 #endif 390 { 391 // convert the laszip items into record schema to be used by 392 // compressor/decompressor 393 394 using namespace factory; 395 factory::record_schema schema; 396 397 for(auto i = 0 ; i < vlr.num_items ; i++) { 398 laz_item& item = vlr.items[i]; 399 schema.push(factory::record_item(item.type, item.size, 400 item.version)); 401 } 402 return schema; 403 } 404 }; 405 #pragma pack(pop) 406 407 // cache line 408 #define BUF_SIZE (1 << 20) 409 410 template<typename StreamType> 411 struct __ifstream_wrapper { __ifstream_wrapperlaszip::io::__ifstream_wrapper412 __ifstream_wrapper(StreamType& f) : f_(f), offset(0), have(0), 413 buf_((char*)utils::aligned_malloc(BUF_SIZE)) { 414 } 415 ~__ifstream_wrapperlaszip::io::__ifstream_wrapper416 ~__ifstream_wrapper() { 417 utils::aligned_free(buf_); 418 } 419 420 __ifstream_wrapper(const __ifstream_wrapper<StreamType>&) = delete; 421 __ifstream_wrapper& operator = (const __ifstream_wrapper<StreamType>&) = delete; 422 fillit_laszip::io::__ifstream_wrapper423 inline void fillit_() { 424 offset = 0; 425 f_.read(buf_, BUF_SIZE); 426 have = f_.gcount(); 427 if (have == 0) 428 throw end_of_file(); // this is an exception since we shouldn't be hitting eof 429 } 430 resetlaszip::io::__ifstream_wrapper431 inline void reset() { 432 offset = have = 0; // when a file is seeked, reset this 433 } 434 getBytelaszip::io::__ifstream_wrapper435 inline unsigned char getByte() { 436 if (offset >= have) 437 fillit_(); 438 return static_cast<unsigned char>(buf_[offset++]); 439 } 440 getByteslaszip::io::__ifstream_wrapper441 inline void getBytes(unsigned char *buf, size_t request) { 442 // Use what's left in the buffer, if anything. 443 size_t fetchable = (std::min)((size_t)(have - offset), request); 444 std::copy(buf_ + offset, buf_ + offset + fetchable, buf); 445 offset += fetchable; 446 request -= fetchable; 447 448 // If we couldn't fetch everything requested, fill buffer 449 // and go again. We assume fillit_() satisfies any request. 450 if (request) 451 { 452 fillit_(); 453 std::copy(buf_ + offset, buf_ + offset + request, buf + fetchable); 454 offset += request; 455 } 456 } 457 458 StreamType& f_; 459 std::streamsize offset, have; 460 char *buf_; 461 }; 462 463 template<typename StreamType> 464 struct __ofstream_wrapper { __ofstream_wrapperlaszip::io::__ofstream_wrapper465 __ofstream_wrapper(StreamType& f) : f_(f) {} 466 putByteslaszip::io::__ofstream_wrapper467 void putBytes(const unsigned char *b, size_t len) { 468 f_.write(reinterpret_cast<const char*>(b), len); 469 } 470 putBytelaszip::io::__ofstream_wrapper471 void putByte(unsigned char b) { 472 f_.put((char)b); 473 } 474 475 __ofstream_wrapper(const __ofstream_wrapper&) = delete; 476 __ofstream_wrapper& operator = (const __ofstream_wrapper&) = delete; 477 478 StreamType& f_; 479 }; 480 481 namespace reader { 482 template <typename StreamType> 483 class basic_file { 484 typedef std::function<void (header&)> validator_type; 485 486 public: basic_file(StreamType & st)487 basic_file(StreamType& st) : f_(st), wrapper_(f_) { 488 _open(); 489 } 490 ~basic_file()491 ~basic_file() { 492 } 493 get_header() const494 const header& get_header() const { 495 return header_; 496 } 497 get_laz_vlr() const498 const laz_vlr& get_laz_vlr() const { 499 return laz_; 500 } 501 get_schema() const502 const factory::record_schema& get_schema() const { 503 return schema_; 504 } 505 readPoint(char * out)506 void readPoint(char *out) { 507 // read the next point in 508 if (chunk_state_.points_read == laz_.chunk_size || 509 !pdecomperssor_ || !pdecoder_) { 510 // Its time to (re)init the decoder 511 // 512 pdecomperssor_.reset(); 513 pdecoder_.reset(); 514 515 pdecoder_.reset(new decoders::arithmetic<__ifstream_wrapper<StreamType> >(wrapper_)); 516 pdecomperssor_ = factory::build_decompressor(*pdecoder_, schema_); 517 518 // reset chunk state 519 chunk_state_.current++; 520 chunk_state_.points_read = 0; 521 } 522 523 pdecomperssor_->decompress(out); 524 chunk_state_.points_read ++; 525 } 526 527 private: _open()528 void _open() { 529 // Make sure our header is correct 530 // 531 char magic[4]; 532 f_.read(magic, sizeof(magic)); 533 534 if (std::string(magic, magic+4) != "LASF") 535 throw invalid_magic(); 536 537 // Read the header in 538 f_.seekg(0); 539 f_.read((char*)&header_, sizeof(header_)); 540 541 // The mins and maxes are in a weird order, fix them 542 _fixMinMax(header_); 543 544 // make sure everything is valid with the header, note that validators are allowed 545 // to manipulate the header, since certain validators depend on a header's orignial state 546 // to determine what its final stage is going to be 547 for (auto f : _validators()) 548 f(header_); 549 550 // things look fine, move on with VLR extraction 551 _parseLASZIP(); 552 553 // parse the chunk table offset 554 _parseChunkTable(); 555 556 // set the file pointer to the beginning of data to start reading 557 f_.clear(); // may have treaded past the EOL, so reset everything before we start reading:w 558 f_.seekg(header_.point_offset + sizeof(int64_t)); 559 560 wrapper_.reset(); 561 } 562 _fixMinMax(header & h)563 void _fixMinMax(header& h) { 564 double mx, my, mz, nx, ny, nz; 565 566 mx = h.minimum.x; nx = h.minimum.y; 567 my = h.minimum.z; ny = h.maximum.x; 568 mz = h.maximum.y; nz = h.maximum.z; 569 570 h.minimum.x = nx; h.maximum.x = mx; 571 h.minimum.y = ny; h.maximum.y = my; 572 h.minimum.z = nz; h.maximum.z = mz; 573 } 574 _parseLASZIP()575 void _parseLASZIP() { 576 // move the pointer to the begining of the VLRs 577 f_.seekg(header_.header_size); 578 579 #pragma pack(push, 1) 580 struct { 581 unsigned short reserved; 582 char user_id[16]; 583 unsigned short record_id; 584 unsigned short record_length; 585 char desc[32]; 586 } vlr_header; 587 #pragma pack(pop) 588 589 size_t count = 0; 590 bool laszipFound = false; 591 while(count < header_.vlr_count && f_.good() && !f_.eof()) { 592 f_.read((char*)&vlr_header, sizeof(vlr_header)); 593 594 const char *user_id = "laszip encoded"; 595 596 if (std::equal(vlr_header.user_id, vlr_header.user_id + 14, user_id) && 597 vlr_header.record_id == 22204) { 598 // this is the laszip VLR 599 // 600 laszipFound = true; 601 602 std::unique_ptr<char[]> buffer( 603 new char[vlr_header.record_length]); 604 605 f_.read(buffer.get(), vlr_header.record_length); 606 _parseLASZIPVLR(buffer.get()); 607 608 break; // no need to keep iterating 609 } 610 611 f_.seekg(vlr_header.record_length, std::ios::cur); // jump foward 612 count++; 613 } 614 615 if (!laszipFound) 616 throw no_laszip_vlr(); 617 618 schema_ = laz_vlr::to_schema(laz_, header_.point_record_length); 619 } 620 binPrint(const char * buf,int len)621 void binPrint(const char *buf, int len) { 622 for (int i = 0 ; i < len ; i ++) { 623 char b[256]; 624 sprintf(b, "%02X", buf[i] & 0xFF); 625 std::cout << b << " "; 626 } 627 628 std::cout << std::endl; 629 } 630 _parseLASZIPVLR(const char * buf)631 void _parseLASZIPVLR(const char *buf) { 632 laz_.fill(buf); 633 634 if (laz_.compressor != 2) 635 throw laszip_format_unsupported(); 636 } 637 _parseChunkTable()638 void _parseChunkTable() { 639 // Move to the begining of the data 640 // 641 f_.seekg(header_.point_offset); 642 643 int64_t chunkoffset = 0; 644 f_.read((char*)&chunkoffset, sizeof(chunkoffset)); 645 if (!f_.good()) 646 throw chunk_table_read_error(); 647 648 if (chunkoffset == -1) 649 throw not_supported("Chunk table offset == -1 is not supported at this time"); 650 651 // Go to the chunk offset and read in the table 652 // 653 f_.seekg(chunkoffset); 654 if (!f_.good()) 655 throw chunk_table_read_error(); 656 657 // Now read in the chunk table 658 struct { 659 unsigned int version, 660 chunk_count; 661 } chunk_table_header; 662 663 f_.read((char *)&chunk_table_header, sizeof(chunk_table_header)); 664 if (!f_.good()) 665 throw chunk_table_read_error(); 666 667 if (chunk_table_header.version != 0) 668 throw unknown_chunk_table_format(); 669 670 // start pushing in chunk table offsets 671 chunk_table_offsets_.clear(); 672 673 if (laz_.chunk_size == (std::numeric_limits<unsigned int>::max)()) 674 throw not_supported("chunk_size == uint.max is not supported at this time."); 675 676 // Allocate enough room for our chunk 677 chunk_table_offsets_.resize(chunk_table_header.chunk_count + 1); 678 679 // Add The first one 680 chunk_table_offsets_[0] = header_.point_offset + sizeof(uint64_t); 681 682 if (chunk_table_header.chunk_count > 1) { 683 // decode the index out 684 // 685 __ifstream_wrapper<StreamType> w(f_); 686 687 decoders::arithmetic<__ifstream_wrapper<StreamType> > decoder(w); 688 decompressors::integer decomp(32, 2); 689 690 // start decoder 691 decoder.readInitBytes(); 692 decomp.init(); 693 694 for (size_t i = 1 ; i <= chunk_table_header.chunk_count ; i ++) { 695 chunk_table_offsets_[i] = static_cast<uint64_t>(decomp.decompress(decoder, (i > 1) ? static_cast<I32>(chunk_table_offsets_[i - 1]) : 0, 1)); 696 } 697 698 for (size_t i = 1 ; i < chunk_table_offsets_.size() ; i ++) { 699 chunk_table_offsets_[i] += chunk_table_offsets_[i-1]; 700 } 701 } 702 } 703 704 _validators()705 static const std::vector<validator_type>& _validators() { 706 static std::vector<validator_type> v; // static collection of validators 707 static std::mutex lock; 708 709 // To remain thread safe we need to make sure we have appropriate guards here 710 // 711 if (v.empty()) { 712 lock.lock(); 713 // Double check here if we're still empty, the first empty just makes sure 714 // we have a quick way out where validators are already filled up (for all calls 715 // except the first one), for two threads competing to fill out the validators 716 // only one of the will get here first, and the second one will bail if the v 717 // is not empty, and hence the double check 718 // 719 if (v.empty()) { 720 // TODO: Fill all validators here 721 // 722 v.push_back( 723 // Make sure that the header indicates that file is compressed 724 // 725 [](header& h) { 726 int bit_7 = (h.point_format_id >> 7) & 1, 727 bit_6 = (h.point_format_id >> 6) & 1; 728 729 if (bit_7 == 1 && bit_6 == 1) 730 throw old_style_compression(); 731 732 if ((bit_7 ^ bit_6) == 0) 733 throw not_compressed(); 734 735 h.point_format_id &= 0x3f; 736 } 737 ); 738 } 739 740 lock.unlock(); 741 } 742 743 return v; 744 } 745 746 // The file object is not copyable or copy constructible 747 basic_file(const basic_file<StreamType>&) = delete; 748 basic_file<StreamType>& operator = (const basic_file<StreamType>&) = delete; 749 750 StreamType& f_; 751 __ifstream_wrapper<StreamType> wrapper_; 752 753 header header_; 754 laz_vlr laz_; 755 std::vector<uint64_t> chunk_table_offsets_; 756 757 factory::record_schema schema_; // the schema of this file, the LAZ items converted into factory recognizable description, 758 759 // Our decompressor 760 std::shared_ptr<decoders::arithmetic<__ifstream_wrapper<StreamType> > > pdecoder_; 761 formats::dynamic_decompressor::ptr pdecomperssor_; 762 763 // Establish our current state as we iterate through the file 764 struct __chunk_state{ 765 int64_t current; 766 int64_t points_read; 767 int64_t current_index; 768 __chunk_statelaszip::io::reader::basic_file::__chunk_state769 __chunk_state() : current(0u), points_read(0u), current_index(-1) {} 770 } chunk_state_; 771 }; 772 773 typedef basic_file<std::ifstream> file; 774 } 775 776 namespace writer { 777 778 // An object to encapsulate what gets passed to 779 struct config { 780 vector3<double> scale, offset; 781 unsigned int chunk_size; 782 configlaszip::io::writer::config783 explicit config() : scale(1.0, 1.0, 1.0), offset(0.0, 0.0, 0.0), chunk_size(DefaultChunkSize) {} configlaszip::io::writer::config784 config(const vector3<double>& s, const vector3<double>& o, unsigned int cs = DefaultChunkSize) : 785 scale(s), offset(o), chunk_size(cs) {} configlaszip::io::writer::config786 config(const header& h) : scale(h.scale.x, h.scale.y, h.scale.z), offset(h.offset.x, h.offset.y, h.offset.z), 787 chunk_size(DefaultChunkSize) {} 788 to_headerlaszip::io::writer::config789 header to_header() const { 790 header h; memset(&h, 0, sizeof(h)); // clear out header 791 h.minimum = { (std::numeric_limits<double>::max)(), (std::numeric_limits<double>::max)(), 792 (std::numeric_limits<double>::max)() }; 793 h.maximum = { std::numeric_limits<double>::lowest(), std::numeric_limits<double>::lowest(), 794 std::numeric_limits<double>::lowest()}; 795 796 h.offset.x = offset.x; 797 h.offset.y = offset.y; 798 h.offset.z = offset.z; 799 800 h.scale.x = scale.x; 801 h.scale.y = scale.y; 802 h.scale.z = scale.z; 803 804 return h; 805 } 806 }; 807 808 class file { 809 public: file()810 file() : 811 wrapper_(f_) {} 812 file(const std::string & filename,const factory::record_schema & s,const config & config)813 file(const std::string& filename, 814 const factory::record_schema& s, 815 const config& config) : 816 wrapper_(f_), 817 schema_(s), 818 header_(config.to_header()), 819 chunk_size_(config.chunk_size) { 820 open(filename, s, config); 821 } 822 open(const std::string & filename,const factory::record_schema & s,const config & c)823 void open(const std::string& filename, const factory::record_schema& s, const config& c) { 824 // open the file and move to offset of data, we'll write 825 // headers and all other things on file close 826 f_.open(filename, std::ios::binary | std::ios::trunc); 827 if (!f_.good()) 828 throw write_open_failed(); 829 830 schema_ = s; 831 header_ = c.to_header(); 832 chunk_size_ = c.chunk_size; 833 834 // write junk to our prelude, we'll overwrite this with 835 // awesome data later 836 // 837 size_t preludeSize = 838 sizeof(header) + // the LAS header 839 54 + // size of one vlr header 840 (34 + s.records.size() * 6) + // the LAZ vlr size 841 sizeof(int64_t); // chunk table offset 842 843 char *junk = new char[preludeSize]; 844 std::fill(junk, junk + preludeSize, 0); 845 f_.write(junk, preludeSize); 846 delete [] junk; 847 848 // the first chunk begins at the end of prelude 849 } 850 writePoint(const char * p)851 void writePoint(const char *p) { 852 if (chunk_state_.points_in_chunk == chunk_size_ || 853 !pcompressor_ || !pencoder_) { 854 // Time to (re)init the encoder 855 // 856 pcompressor_.reset(); 857 if (pencoder_) { 858 pencoder_->done(); // make sure we flush it out 859 pencoder_.reset(); 860 } 861 862 // reset chunk state 863 // 864 chunk_state_.current_chunk_index ++; 865 chunk_state_.points_in_chunk = 0; 866 867 // take note of the current offset 868 std::streamsize offset = f_.tellp(); 869 if (chunk_state_.current_chunk_index > 0) { 870 // When we hit this point the first time around, we don't do anything since we are just 871 // starting to write out our first chunk. 872 chunk_sizes_.push_back(offset - chunk_state_.last_chunk_write_offset); 873 } 874 875 chunk_state_.last_chunk_write_offset = offset; 876 877 // reinit stuff 878 pencoder_.reset(new encoders::arithmetic<__ofstream_wrapper<std::ofstream> >(wrapper_)); 879 pcompressor_ = factory::build_compressor(*pencoder_, schema_); 880 } 881 882 // now write the point 883 pcompressor_->compress(p); 884 chunk_state_.total_written ++; 885 chunk_state_.points_in_chunk ++; 886 887 888 _update_min_max(*(reinterpret_cast<const formats::las::point10*>(p))); 889 } 890 close()891 void close() { 892 _flush(); 893 894 if (f_.is_open()) 895 f_.close(); 896 } 897 898 private: _update_min_max(const formats::las::point10 & p)899 void _update_min_max(const formats::las::point10& p) { 900 double x = p.x * header_.scale.x + header_.offset.x, 901 y = p.y * header_.scale.y + header_.offset.y, 902 z = p.z * header_.scale.z + header_.offset.z; 903 904 header_.minimum.x = (std::min)(x, header_.minimum.x); 905 header_.minimum.y = (std::min)(y, header_.minimum.y); 906 header_.minimum.z = (std::min)(z, header_.minimum.z); 907 908 header_.maximum.x = (std::max)(x, header_.maximum.x); 909 header_.maximum.y = (std::max)(y, header_.maximum.y); 910 header_.maximum.z = (std::max)(z, header_.maximum.z); 911 } 912 _flush()913 void _flush() { 914 // flush out the encoder 915 pencoder_->done(); 916 917 // Note down the size of the offset of this last chunk 918 chunk_sizes_.push_back((std::streamsize)f_.tellp() - chunk_state_.last_chunk_write_offset); 919 920 // Time to write our header 921 // Fill up things not filled up by our header 922 // 923 header_.magic[0] = 'L'; header_.magic[1] = 'A'; 924 header_.magic[2] = 'S'; header_.magic[3] = 'F'; 925 926 header_.version.major = 1; 927 header_.version.minor = 2; 928 929 header_.header_size = sizeof(header_); 930 header_.point_offset = sizeof(header) + 54 + (34 + static_cast<unsigned int>(schema_.records.size()) * 6); // 54 is the size of one vlr header 931 header_.vlr_count = 1; 932 933 header_.point_format_id = schema_.format(); 934 header_.point_format_id |= (1 << 7); 935 header_.point_record_length = static_cast<unsigned short>(schema_.size_in_bytes()); 936 header_.point_count = static_cast<unsigned int>(chunk_state_.total_written); 937 938 // make sure we re-arrange mins and maxs for writing 939 // 940 double mx, my, mz, nx, ny, nz; 941 nx = header_.minimum.x; mx = header_.maximum.x; 942 ny = header_.minimum.y; my = header_.maximum.y; 943 nz = header_.minimum.z; mz = header_.maximum.z; 944 945 header_.minimum.x = mx; header_.minimum.y = nx; 946 header_.minimum.z = my; header_.maximum.x = ny; 947 header_.maximum.y = mz; header_.maximum.z = nz; 948 949 f_.seekp(0); 950 f_.write(reinterpret_cast<char*>(&header_), sizeof(header_)); 951 952 // before we can write the VLR, we need to write the LAS VLR definition 953 // for it 954 // 955 #pragma pack(push, 1) 956 struct { 957 unsigned short reserved; 958 char user_id[16]; 959 unsigned short record_id; 960 unsigned short record_length_after_header; 961 char description[32]; 962 } las_vlr_header; 963 #pragma pack(pop) 964 965 las_vlr_header.reserved = 0; 966 las_vlr_header.record_id = 22204; 967 las_vlr_header.record_length_after_header = static_cast<unsigned short>(34 + (schema_.records.size() * 6)); 968 969 strcpy(las_vlr_header.user_id, "laszip encoded"); 970 strcpy(las_vlr_header.description, "laz-perf variant"); 971 972 // write the las vlr header 973 f_.write(reinterpret_cast<char*>(&las_vlr_header), sizeof(las_vlr_header)); 974 975 976 // prep our VLR so we can write it 977 // 978 laz_vlr vlr = laz_vlr::from_schema(schema_, chunk_size_); 979 980 std::unique_ptr<char[]> vlrbuf(new char[vlr.size()]); 981 vlr.extract(vlrbuf.get()); 982 f_.write(vlrbuf.get(), vlr.size()); 983 984 // TODO: Write chunk table 985 // 986 _writeChunks(); 987 } 988 _writeChunks()989 void _writeChunks() { 990 // move to the end of the file to start emitting our compresed table 991 f_.seekp(0, std::ios::end); 992 993 // take note of where we're writing the chunk table, we need this later 994 int64_t chunk_table_offset = static_cast<int64_t>(f_.tellp()); 995 996 // write out the chunk table header (version and total chunks) 997 #pragma pack(push, 1) 998 struct { 999 unsigned int version, 1000 chunks_count; 1001 } chunk_table_header = { 0, static_cast<unsigned int>(chunk_sizes_.size()) }; 1002 #pragma pack(pop) 1003 1004 f_.write(reinterpret_cast<char*>(&chunk_table_header), 1005 sizeof(chunk_table_header)); 1006 1007 1008 // Now compress and write the chunk table 1009 // 1010 __ofstream_wrapper<std::ofstream> w(f_); 1011 1012 encoders::arithmetic<__ofstream_wrapper<std::ofstream> > encoder(w); 1013 compressors::integer comp(32, 2); 1014 1015 comp.init(); 1016 1017 for (size_t i = 0 ; i < chunk_sizes_.size() ; i ++) { 1018 comp.compress(encoder, 1019 i ? static_cast<int>(chunk_sizes_[i-1]) : 0, 1020 static_cast<int>(chunk_sizes_[i]), 1); 1021 } 1022 1023 encoder.done(); 1024 1025 // go back to where we're supposed to write chunk table offset 1026 f_.seekp(header_.point_offset); 1027 f_.write(reinterpret_cast<char*>(&chunk_table_offset), sizeof(chunk_table_offset)); 1028 } 1029 1030 std::ofstream f_; 1031 __ofstream_wrapper<std::ofstream> wrapper_; 1032 1033 formats::dynamic_compressor::ptr pcompressor_; 1034 std::shared_ptr<encoders::arithmetic<__ofstream_wrapper<std::ofstream> > > pencoder_; 1035 1036 factory::record_schema schema_; 1037 1038 header header_; 1039 unsigned int chunk_size_; 1040 1041 struct __chunk_state { 1042 int64_t total_written; // total points written 1043 int64_t current_chunk_index; // the current chunk index we're compressing 1044 unsigned int points_in_chunk; 1045 std::streamsize last_chunk_write_offset; __chunk_statelaszip::io::writer::file::__chunk_state1046 __chunk_state() : total_written(0), current_chunk_index(-1), points_in_chunk(0), last_chunk_write_offset(0) {} 1047 } chunk_state_; 1048 1049 std::vector<int64_t> chunk_sizes_; // all the places where chunks begin 1050 }; 1051 } 1052 } 1053 } 1054 1055 #endif // __io_hpp__ 1056