1# 2# $Id: laola.pl,v 0.5.1.5 1997/07/01 00:06:42 schwartz Rel $ 3# 4# laola.pl, LAOLA filesystem. 5# 6# This perl 4 library gives raw access to "Ole/Com" documents. These are 7# documents like created by Microsoft Word 6.0+ or newer Star Divisions 8# Word by using so called "Structured Storage" technology. Write access 9# still is nearly not supported, but will be done one day. This library 10# is part of LAOLA, a distribution this file should have come along with. 11# It can be found at: 12# 13# http://wwwwbs.cs.tu-berlin.de/~schwartz/pmh/index.html 14# or 15# http://www.cs.tu-berlin.de/~schwartz/pmh/index.html 16# 17# Copyright (C) 1996, 1997 Martin Schwartz 18# 19# This program is free software; you can redistribute it and/or modify 20# it under the terms of the GNU General Public License as published by 21# the Free Software Foundation; either version 2 of the License, or 22# (at your option) any later version. 23# 24# This program is distributed in the hope that it will be useful, 25# but WITHOUT ANY WARRANTY; without even the implied warranty of 26# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 27# GNU General Public License for more details. 28# 29# You should have received a copy of the GNU General Public License 30# along with this program; if not, you should find it at: 31# 32# http://wwwwbs.cs.tu-berlin.de/~schwartz/pmh/COPYING 33# 34# Diese Ver�ffentlichung erfolgt ohne Ber�cksichtigung eines eventuellen 35# Patentschutzes. Warennamen werden ohne Gew�hrleistung einer freien 36# Verwendung benutzt. ;-) 37# 38# Contact: schwartz@cs.tu-berlin.de 39# 40 41# 42# Really important topics still MISSING until now: 43# 44# - human rights and civil rights where _you_live_ 45# - Reformfraktion president for Technische Universit�t Berlin 46# 47# - creating documents 48# - sensible error handling... 49# - many property set things: 50# * documentation of variable types 51# * code page support 52# - opening multiple documents at a time 53# - consistant name giving, checked against MS' 54# 55# Please refer to the Quick Reference at Laolas home page for further 56# explanations. 57# 58 59# 60# Abbreviations 61# 62# bbd Big Block Depot 63# pps Property Storage 64# ppset Property Set 65# ppss Property Set Storage 66# sb Start Block 67# sbd Small Block Depot 68# tss Time Stamp Seconds 69# tsd Time Stamp Days 70# 71 72## 73## "public" 74## 75 76sub laola_open_document { &laola'laola_open_document; } 77sub laola_close_document { &laola'laola_close_document; } 78 79sub laola_pps_get_name { &laola'laola_pps_get_name; } 80sub laola_pps_get_date { &laola'laola_pps_get_date; } 81 82sub laola_is_directory { &laola'laola_is_directory; } 83sub laola_get_directory { &laola'laola_get_directory; } 84sub laola_get_dirhandles { &laola'laola_get_dirhandles; } 85 86sub laola_is_file { &laola'laola_is_file; } 87sub laola_get_filesize { &laola'laola_get_filesize; } 88sub laola_get_file { &laola'laola_get_file; } 89 90sub laola_is_root { &laola'laola_is_root; } 91 92# 93# writing 94# 95sub laola_modify_file { &laola'laola_modify_file; } 96 97# 98# property set handling 99# 100sub laola_is_file_ppset { &laola'laola_is_file_ppset; } 101sub laola_ppset_get_dictionary { &laola'laola_ppset_get_dictionary; } 102sub laola_ppset_get_idset { &laola'laola_ppset_get_idset; } 103sub laola_ppset_get_property { &laola'laola_ppset_get_property; } 104 105# 106# trash handling 107# 108sub laola_get_trashsize { &laola'laola_get_trashsize; } 109sub laola_get_trash { &laola'laola_get_trash; } 110sub laola_modify_trash { &laola'laola_modify_trash; } 111 112 113package laola; 114 115$laola_date = "03/25/97"; 116 117changable_options: { 118 $optional_do_iobuf=0; # 0: don't cache 1: cache whole compound document 119 $optional_do_debug=0; # 0: don't debug 1: print some debugging information 120} 121 122## 123## File and directory handling 124## 125 126sub laola_open_document { ## 127# 128# "ok"||$error = laola_open_document($filename [,$openmode [,$streambuf]]); 129# 130# openmode bitmask (0 is default): 131# 132# Bit 0: 0 read only 1 read and write 133# Bit 4: 0 file mode 1 buffer mode 134# 135 local($status)=""; 136 open_doc1: { 137 &init_vars(); 138 if ( ($status=&init_io(@_)) ne "ok") { 139 last; 140 } 141 if ( ($status=&init_doc()) ne "ok") { 142 &laola_close_document(); 143 last; 144 } 145 return "ok"; 146 } 147 $status; 148} 149 150sub laola_close_document { ## 151# 152# "ok" = laola_close_document([streambuf]) 153# 154 if ($openmode & 0x10) { 155 if (defined $_[0]) { 156 $_[0]=$iobuf; 157 } 158 } else { 159 &flush_cache(); 160 &clean_file(); 161 } 162 &init_vars(); 163 return "ok"; 164} 165 166sub laola_is_directory { ## 167# 168# 1||0 = laola_is_directory($pps) 169# 170 local($pps)=shift; 171 (!$pps || ($pps_type[$pps] == 1)); 172} 173 174sub laola_is_file { ## 175# 176# 1||0 = laola_is_file($pps) 177# 178 ($pps_type[shift] == 2); 179} 180 181sub laola_is_root { ## 182# 183# 1||0 = laola_is_root($pps) 184# 185 ($pps_type[shift] == 5); 186} 187 188sub laola_get_dirhandles { ## 189# 190# @pps = laola_get_dirhandles($pps); 191# 192 local($start)=shift; 193 194 local (@chain) = (); 195 local (%chaincontrol) = (); 196 197 (!$start || &laola_is_directory($start)) 198 && &get_ppss_chain($pps_dir[$start]) 199 ; 200 201 @chain; 202} 203 204sub laola_get_directory { ## 205# 206# %pps_names = laola_get_directory($pps); 207# 208 local(%pps_namehandle)=(); 209 for (&laola_get_dirhandles) { 210 $pps_namehandle{&laola_pps_get_name($_)} = $_; 211 } 212 %pps_namehandle; 213} 214 215sub laola_pps_get_name { ## 216# 217# $name_of_pps = laola_pps_get_name($pps); 218# 219 $pps_name[shift]; 220} 221 222sub laola_pps_get_date { ## 223# 224# ($day,$month,$year,$hour,$min,$sec)||0 = laola_pps_get_date($pps) 225# (1..31, 1..12, 1601..., 0..23, 0..59, 0.x .. 59.x) 226# 227 local($pps)=shift; 228 &laola_is_directory($pps) 229 && &filetime_to_time($pps_ts2s[$pps], $pps_ts2d[$pps]); 230} 231 232sub laola_get_filesize { ## 233# 234# $filesize || 0 = laola_get_filesize($pps); 235# 236 local($pps)=shift; 237 &laola_is_file($pps) && $pps_size[$pps]; 238} 239 240sub laola_get_file { ## 241# 242# "ok"||$error = laola_get_file($pps, extern $buf [,$offset, $size]); 243# 244 &rw_file("r", @_); 245} 246 247sub laola_modify_file { ## 248# 249# "ok"||$error = laola_modify_file($pps,extern $buf, $offset, $size); 250# 251 return "Laola: File is write protected!" if !io_writable; 252 &rw_file("w", @_); 253} 254 255 256## 257## Property set handling 258## 259 260sub laola_is_file_ppset { ## 261# 262# ppset_type || 0 = laola_is_file_ppset($pps) 263# ppset_type e {1, 5} 264# 265 local($pps)=shift; 266 (&laola_is_file($pps)) 267 && ( (&laola_pps_get_name($pps) =~ /^\05/) && 5 268 || (&laola_pps_get_name($pps) =~ /^\01CompObj$/) && 1 269 ); 270} 271 272sub laola_ppset_get_dictionary { ## 273# 274# ("ok", %dictionary)||$error = laola_ppset_get_dictionary($pps) 275# 276 local($pps)=shift; 277 local($status) = &load_propertyset($pps); 278 if ($status ne "ok") { 279 return $status; 280 } else { 281 return ("ok", %ppset_dictionary); 282 } 283} 284 285sub laola_ppset_get_idset { ## 286# 287# ("ok", %ppset_idset) || $error = laola_ppset_get_idset($pps); 288# 289 local($pps)=shift; 290 local($status) = &load_propertyset($pps); 291 return $status if $status ne "ok"; 292 293 local(%ts)=(); 294 foreach $key (keys %ppset_fido) { 295 $ts{$key} = $ppset_dictionary{$key}; 296 } 297 ("ok", %ts); 298} 299 300sub laola_ppset_get_property { ## 301# 302# ($type,@mixed)||("error",$error)=laola_ppset_get_property($pps, $id) 303# 304 local($pps, $id)=@_; 305 local($type, $l, $var, @var); 306 local($o, $n); 307 308 local($status)= &load_propertyset($pps); 309 return ("error", $status) if $status ne "ok"; 310 311 return "" if !defined $ppset_fido{$id}; 312 $n = int($id / 0x1000); 313 $o = $ppset_o[$n]+$ppset_fido{$id}; 314 315 if ($ppset_type == 5) { 316 #return ("error", "Property Identifier is invalid.") if $id < 2; 317 ($type, $l, @var) = &ppset_get_property($o); 318 return ($type, @var); 319 320 } elsif ($ppset_type == 1) { 321 ($l, $var) = &ppset_get_var(0x1e, $o); 322 return (0x1e, $var); 323 } 324} 325 326 327## 328## Trash handling 329## 330 331sub laola_get_trashsize { ## 332# 333# $sizeof_trash_section = laola_get_trashsize($type) 334# 335 &get_trash_size(@_); 336} 337 338sub laola_get_trash { ## 339# 340# "ok"||$error = laola_get_trash ($type, extern $buf [,$offset,$size]); 341# 342 &rw_trash("r", @_); 343} 344 345sub laola_modify_trash { ## 346# 347# "ok"||$error = laola_modify_trash ($type, extern $buf [,$offset,$size]); 348# 349 return "Laola: File is write protected!" if !io_writable; 350 &rw_trash("w", @_); 351} 352 353 354## 355## "private" 356## 357 358global_init: { 359 &var_init(); 360 &filetime_init(); 361 &propertyset_type_init(); 362 $[=0; 363} 364 365# 366# laola_open_document -> 367# 368 369sub init_vars { 370 # laola_open_document->init_vars 371 # laola_close_document->init_vars 372 internal: { 373 $infilename=undef; 374 $filesize=undef; 375 $openmode=undef; 376 $io_writable=undef; 377 378 $curfile=undef; 379 @curfile_iolist = (); 380 381 $iobuf=undef; 382 @iobuf_modify_a=(); 383 @iobuf_modify_l=(); 384 } 385 386 &init_propertyset(); 387 388 OLEstructure: { 389 # unknown header things that matter: 390 # ? $version=undef; # word(1a) 391 # ? $revision=undef; # word(18) 392 # ? $bigunknown=undef; # byte(1e) 393 394 # known header things that matter: 395 $header_size=0x200; 396 $big_block_size=undef; # word(1e) 397 $small_block_size=undef; # word(20) 398 $num_of_bbd_blocks=undef; # long(2c) 399 $root_startblock=undef; # long(30) 400 $sbd_startblock=undef; # long(3c) 401 $ext_startblock=undef; # long(44) 402 $num_of_ext_blocks=undef; # long(48) 403 404 # property storage things 405 @pps_name=(); # 0 .. pps_sizeofname 406 #pps_sizeofname=(); # word(40) 407 @pps_type=(); # byte(42) 408 @pps_uk0=(); # byte(43) 409 @pps_prev=(); # long(44) 410 @pps_next=(); # long(48) 411 @pps_dir=(); # long(4c) 412 @pps_ts1s=(); # long(64) 413 @pps_ts1d=(); # long(68) 414 @pps_ts2s=(); # long(6c) 415 @pps_ts2d=(); # long(70) 416 @pps_sb=(); # long(74) 417 @pps_size=(); # long(78) 418 } 419 420 various: { 421 $maxblock=undef; 422 $maxsmallblock=undef; 423 424 # block depot blocks 425 # - these blocks are building the block depots 426 @bbd_list=(); 427 @sbd_list=(); 428 429 # block depot tables 430 @bbd=(); 431 @sbd=(); 432 433 # contents blocks 434 @root_list=(); 435 @sb_list=(); 436 437 blockusage: { 438 @bb_usage=(); # big blocks usage 439 @sb_usage=(); # small blocks usage 440 $usage_known=undef; 441 } 442 443 trash: { 444 %trashsize=(); 445 @trash1_o=(); @trash1_l=(); 446 @trash2_o=(); @trash2_l=(); 447 @trash3_o=(); @trash3_l=(); 448 @trash4_o=(); @trash4_l=(); 449 $trash_known=undef; 450 } 451 } 452} 453 454 455sub init_io { 456 ($infilename, $openmode) = @_; 457 458 if ($openmode & 0x10) { 459 return &init_stream; 460 } else { 461 return &init_file; 462 } 463} 464 465sub init_stream { 466 return "No stream data available!" if !defined $_[2]; 467 #$openmode &= 0xfffffffe; # clear writeable flag 468 $optional_do_iobuf=1; 469 $iobuf = $_[2]; 470 $filesize = length($iobuf); 471 if ( (&read_long(0) != 0xe011cfd0) || 472 (&read_long(4) != 0xe11ab1a1) ) { 473 return "\"$infilename\" is no Ole / Compound Document!\n"; 474 } 475 "ok"; 476} 477 478sub init_file { 479 local($status); 480 return "\"$infilename\" does not exist!" if ! -e $infilename; 481 return "\"$infilename\" is a directory!" if -d $infilename; 482 return "\"$infilename\" is no proper file!" if ! -f $infilename; 483 return "Cannot read \"$infilename\"!" if ! -r $infilename; 484 if ($openmode & 1) { 485 return "\"$infilename\" is write protected!" if ! -w $infilename; 486 $io_writable = 1; 487 $status = open(IO, '+<'.$infilename); 488 } else { 489 $io_writable = 0; 490 $status = open(IO, $infilename); 491 } 492 return "Cannot open \"$infilename\"!" if !$status; 493 494 binmode(IO); 495 if ($io_writable) { 496 select(IO); $|=1; select(STDOUT); 497 } 498 499 if ( (&read_long(0) != 0xe011cfd0) || 500 (&read_long(4) != 0xe11ab1a1) ) { 501 return "\"$infilename\" is no Ole / Compound Document!\n"; 502 } 503 504 $filesize = -s $infilename; 505 506 read_iobuf: { 507 if ($optional_do_iobuf) { 508 if (!&myread(0, $filesize, $iobuf, 0)) { 509 undef $iobuf; 510 } 511 } 512 } 513 514 "ok"; 515} 516 517sub init_doc { 518 # read bbd, 519 # get bbd -> root-chain, get bbd -> sbd-chain 520 local($i, $tmp)=(undef, undef); 521 local(@tmp)=undef; 522 523 header_information: { 524 $big_block_size=1<<&read_word(0x1e); 525 $small_block_size=1<<&read_word(0x20); 526 $num_of_bbd_blocks=&read_long(0x2c); 527 $root_startblock=&read_long(0x30); 528 $sbd_startblock=&read_long(0x3c); 529 $ext_startblock=&read_long(0x44); 530 $num_of_ext_blocks=&read_long(0x48); 531 $maxsmallblock= int ( 532 &read_long( $header_size + $root_startblock*$big_block_size + 0x78 ) 533 / $small_block_size 534 -1 535 ); 536 } 537 538 internal: { 539 $maxblock = int ( ($filesize-$header_size) / $big_block_size -1); 540 return "Document is corrupt - size is too small." if $maxblock < 1; 541 } 542 543 # read big block depot 544 read_bbd: { 545 $max_in_header = int ( ($header_size-0x4c)/4 ); 546 547 $todo = $num_of_bbd_blocks; 548 $num = $todo; 549 $num = $max_in_header if $num_of_bbd_blocks > $max_in_header; 550 551 for ($i=0; $i<$num; $i++) { 552 push (@bbd_list, &read_long(0x4c+4*$i)); 553 } 554 $todo -= $num; 555 $next = $ext_startblock; 556 557 while ($todo > 0) { 558 $num = $todo; 559 $num = ($big_block_size-4)/4 if $todo>(($big_block_size-4)/4); 560 $o = $header_size + $next*$big_block_size; 561 for ($i=0; $i<$num; $i++) { 562 push (@bbd_list, &read_long($o+4*$i)); 563 } 564 $todo -= $num; 565 $next = &read_long($o+4*$num); 566 } 567 568 $tmp=""; 569 &rw_iolist("r", $tmp, 570 &get_iolist(3, 0, 0xffffffff, 0, @bbd_list) 571 ); 572 @bbd = unpack ($vtype{"l"}.($maxblock+1), $tmp); 573 } 574 575 # read small block depot 576 read_sbd: { 577 $tmp=""; 578 @sbd_list=&get_list_from_depot($sbd_startblock, 1); 579 &rw_iolist("r", $tmp, 580 &get_iolist(3, 0, 0xffffffff, 0, @sbd_list) 581 ); 582 @sbd = unpack ($vtype{"l"}.($maxsmallblock+1), $tmp); 583 } 584 585 root_and_sb_chains: { 586 @root_list=&get_list_from_depot($root_startblock, 1); 587 return "Document is corrupt - no root entry." if !@root_list; 588 @sb_list=&get_list_from_depot ( 589 &read_long ( $header_size + $root_startblock*$big_block_size + 0x74 ), 590 1 591 ); 592 } 593 594 read_PropertyStorages: { 595 &read_ppss(0); 596 597 # 598 # If there are many property storages, they will be loaded 599 # dynamically. If there are few (I randomly chosed 50), they 600 # all will be read (ditto for debugging). 601 # 602 last if $#root_list>50 || !$optional_do_debug; 603 604 local($buf)=""; 605 local($i, $nl); 606 &rw_iolist("r", $buf, 607 &get_iolist(3, 0, 0xffffffff, 0, @root_list) 608 ); 609 print "\n\n" 610 ."---------------------------------------------\n" 611 ."LAOLA INTERNAL start of debugging information\n\n" 612 ." n size chain typ name date\n" 613 ; 614 for ($i=0; $i<=($#root_list+1)*4; $i++) { 615 &read_ppss_buf($i, $buf); 616 &debug_report_pps($i) if $optional_do_debug; 617 } 618 print "\n" 619 ."LAOLA INTERNAL end of debugging information\n" 620 ."-------------------------------------------\n\n" 621 ; 622 } 623 624 &report_blockuse_statistic() if $optional_do_debug; 625 "ok"; 626} 627 628## 629## laola_close_document -> 630## 631 632sub clean_file { 633 close(IO); 634} 635 636 637## 638## -------------------------- File IO ------------------------------ 639## 640 641sub rw_file { 642# 643# "ok"||error = rw_file("r"||"w", $pps_handle, extern $buf [,$offset, $size]) 644# 645 local($maxarg)=$#_; 646 local($rw, $pps) = @_[0..1]; 647 return "Laola: pps is no file!" if !&laola_is_file($pps); 648 return "Laola: no method \"$rw\"!" if !($rw =~ /^[rw]$/i); 649 650 local($status, $offset, $size) = 651 &get_default_iosize($pps_size[$pps], $rw, @_[2..$maxarg]); 652 return $status if $status ne "ok"; 653 654 return "Bad document structure!" if ! &get_curfile_iolist($pps); 655 return "ok" if &rw_iolist($rw, $_[2], &get_iolist(4, $offset, $size)); 656 657 $rw =~ /^r$/i ? "Laola: read error!" : "Laola: write error!"; 658} 659 660sub get_default_iosize { 661# 662# ("ok", $offset, $size) || $error = 663# get_default_iosize (defsize, "r"||"w", extern buf, offset, size) 664# 665 local($maxarg)=$#_; 666 local($defsize, $rw) = @_[0..1]; 667 local($offset, $size) = @_[3..4]; 668 669 if (!$size) { 670 if ($rw =~ /^r$/i) { 671 if ($maxarg < 4) { 672 # read default: read trashsize 673 $offset=0; $size=$defsize; 674 } else { 675 # read zero size: no problem 676 $_[2]=""; 677 } 678 } else { 679 if ($maxarg < 4) { 680 # write default: not allowed! 681 return "Laola: write error! Unknown size."; 682 } else { 683 # write zero size: no problem 684 } 685 } 686 } 687 ("ok", $offset, $size); 688} 689 690sub get_curfile_iolist { 691# 692# 1||0 = get_curfile_iolist($pps) 693# 694# Gets the iolist for the current file $pps 695# 696 if ($curfile) { 697 return 1 if $curfile==$pps; 698 } 699 @curfile_iolist = &get_iolist( 700 $pps_size[$pps]>=0x1000, 0, $pps_size[$pps], $pps_sb[$pps] 701 ); 702 $curfile = $pps; 703 1; 704} 705 706sub get_all_filehandles { 707# 708# &get_all_filehandles(starting directory) 709# 710# !recursive! 711# Recurse over all files and directories, 712# return all file handles as @files. 713# 714 local($directory_pps)=shift; 715 local(@dir)=&laola_get_dirhandles($directory_pps); 716 local(@files)=(); 717 local(%filescontrol)=(); 718 719 foreach $entry (@dir) { 720 if (!$filescontrol{$entry}) { 721 $filescontrol{$entry} = 1; 722 if (&laola_is_file($entry)) { 723 push (@files, $entry) 724 } elsif (&laola_is_directory($entry)) { 725 push (@files, &get_all_filehandles($entry)); 726 } 727 } else { 728 print STDERR "This document is corrupt!\n"; 729 } 730 } 731 @files; 732} 733 734## 735## --------------------- Property Set Handling ------------------------- 736## 737 738sub propertyset_type_init { 739 %ppset_vtype = ( 740 0x00, "empty", 741 0x01, "null", 742 0x02, "i2", 743 0x03, "i4", 744 0x04, "r4", 745 0x05, "r8", 746 0x06, "cy", 747 0x07, "date", 748 0x08, "bstr", 749 0x0a, "error", 750 0x0b, "bool", 751 0x0c, "variant", 752 0x11, "ui1", 753 0x12, "ui2", 754 0x13, "ui4", 755 0x14, "i8", 756 0x15, "ui8", 757 0x1e, "lpstr", 758 0x1f, "lpwstr", 759 0x40, "filetime", 760 0x41, "blob", 761 0x42, "stream", 762 0x43, "storage", 763 0x44, "streamed_object", 764 0x45, "stored_object", 765 0x46, "blobobject", 766 0x48, "clsid", 767 0x49, "cf", 768 0xfff, "typemask", 769 ); 770 local(@type) = keys %ppset_vtype; 771 for (@type) { 772 $ppset_vtype{$_+0x1000} = $ppset_vtype{$_}.'[]'; 773 } 774 775 # \05 776 %ppset_SummaryInformation = ( 777 2, "title", 3, "subject", 4, "authress", 5, "keywords", 778 6, "comments", 7, "template", 8, "lastauthress", 779 9, "revnumber", 10, "edittime", 11, "lastprinted", 780 12, "create_dtm_ro", 13, "lastsave_dtm", 14, "pagecount", 781 15, "wordcount", 16, "charcount", 17, "thumbnail", 782 18, "appname", 19, "security" 783 ); 784 785 %ppset_DocumentSummaryInformation = ( 786 15, "organization" 787 ); 788 789 # \01CompObj 790 %ppset_CompObj = ( 791 0, "doc_long", 1, "doc_class", 2, "doc_spec" 792 ); 793} 794 795 796sub load_dictionary { 797# 798# "ok"||"done"||0 = load_dictionary($pps) 799# 800 local($pps)=shift; 801 &load_dictionary_defaults($pps); 802 803 local($i, $n, $o, $ps); 804 local($did, $dname, $l); 805 806 foreach $id (keys %ppset_fido_dict) { 807 next if !$ppset_fido_dict{$id}; 808 809 $ps = int($id/0x1000); 810 $o = $ppset_o[$ps]+$ppset_fido_dict{$id}; 811 $n = &get_long($o, $ppset_buf); $o+=4; 812 813 for (; $n; $n--) { 814 $did = &get_long($o, $ppset_buf); $o+=4; 815 ($l, $dname) = &ppset_get_var(0x1e, $o); $o+=$l; 816 $ppset_dictionary{$did+$ps*0x1000} = $dname; 817 } 818 } 819 return "ok"; 820} 821 822sub load_dictionary_defaults { 823 local($name)=&laola_pps_get_name($pps); 824 if ($name eq "\05SummaryInformation") { 825 %ppset_dictionary = %ppset_SummaryInformation; 826 return "ok"; 827 } elsif ($name eq "\05DocumentSummaryInformation") { 828 %ppset_dictionary = %ppset_DocumentSummaryInformation; 829 return "ok"; 830 } elsif ($name eq "\01CompObj") { 831 %ppset_dictionary = %ppset_CompObj; 832 return "ok"; 833 } 834 return 0; 835} 836 837sub load_propertyset { 838 local($pps)=shift; 839 local($status)=""; 840 841 check_current: { 842 if ($ppset_current && $pps && ($ppset_current == $pps)) { 843 $status="ok"; last; 844 } 845 if (!&laola_is_file_ppset($pps)) { 846 $status="This is not a property set handle."; last; 847 } 848 &init_propertyset(); 849 if (!&laola_get_file($pps, $ppset_buf)) { 850 $status="Cannot load property set."; 851 } 852 $ppset_type = &laola_is_file_ppset($pps); 853 } 854 return $status if $status; 855 856 if ($ppset_type == 5) { 857 $status = &load_propertyset_05($pps); 858 return $status if $status ne "ok"; 859 } elsif ($ppset_type == 1) { 860 $status = &load_propertyset_01CompObj($pps); 861 return $status if $status ne "ok"; 862 } else { 863 return "Unknown property set!"; 864 } 865 866 $status = &load_dictionary($pps); 867 return $status; 868} 869 870sub init_propertyset { 871 # !global! property set things 872 873 $ppset_current=undef; # current property storage handle 874 $ppset_type=undef; # \05, \01CompObj 875 $ppset_buf=undef; # buffer for whole property 876 %ppset_fido=(); # $ppset_fido{Identifier}=Offset; 877 # Format Pairs of $ppset_current 878 %ppset_fido_dict=(); # Dictionaries 879 %ppset_fido_cp=(); # Code pages 880 881 $ppset_codepage=undef; 882 %ppset_dictionary=(); 883 884 structure_05: { # 05 ppsets 885 # Header 886 $ppset_byteorder=undef; # word (0) {0xfffe} 887 $ppset_format=undef; # word (2) {0} 888 $ppset_osver=undef; # word (4) {lbyte=version hbyte=revision} 889 $ppset_os=undef; # word (6) {0=win16|1=mac|2=win32) 890 @ppset_clsid=(); # class identifier (8) {e.g. @0} 891 $ppset_reserved=undef; # long (18) {>=1} 892 893 # FormatIDOffset 894 @ppset_fmtid=(); # format identifier (1c) 895 @ppset_o=(); # ppset_o[0]: long (2c) 896 897 # PropertySectionHeader 898 @ppset_size=(); # word ($ppset_o[]) 899 @ppset_num=(); # long ($ppset_o[]+4) 900 } 901 902 #structure_01CompObj: { 903 #$ppset_uk1=undef; # word (0) {0x0001} 904 #$ppset_byteorder=undef; # word (2) {0xfffe} 905 #$ppset_osver=undef; # word (4) {lbyte=version hbyte=revision} 906 #$ppset_os=undef; # word (6) {0=win16|1=mac|2=win32) 907 # { ff ff ff ff 00 09 02 00 00 00 00 00 908 # c0 00 00 00 00 00 00 46 } 909 #@ppset_o=(); # 0x1c 910 #} 911} 912 913sub load_propertyset_01CompObj { 914 local($pps)=shift; 915 set_current: { 916 $ppset_current = $pps; 917 get_structure: { 918 $ppset_byteorder = &get_word(0x02, $ppset_buf); 919 $ppset_osver = &get_word(0x04, $ppset_buf); 920 $ppset_os = &get_word(0x06, $ppset_buf); 921 @ppset_o = (0x1c); 922 } 923 check_structure: { 924 if ($ppset_byteorder !=0xfffe) { 925 return "Cannot understand property set."; 926 } 927 } 928 } 929 get_offsets: { 930 local($i); 931 local($offset, $length)=(0, 0); 932 for ($i=0; $i<3; $i++) { 933 $length = &get_long($ppset_o[0] + $offset, $ppset_buf); 934 last if !$length; 935 $ppset_fido{$i} = $offset; 936 $offset = $offset + 4 + $length; 937 } 938 } 939 "ok"; 940} 941 942sub load_propertyset_05 { 943 local($pps)=shift; 944 set_current: { 945 $ppset_current = $pps; 946 get_structure: { 947 ($ppset_byteorder, $ppset_format, $ppset_osver, $ppset_os) = 948 &get_nword(4, 0, $ppset_buf) 949 ; 950 @ppset_clsid = &get_uuid(0x08, $ppset_buf); 951 $ppset_reserved = &get_long(0x18, $ppset_buf); 952 @ppset_fmtid = &get_uuid(0x1c, $ppset_buf); 953 $ppset_o[0] = &get_word(0x2c, $ppset_buf); 954 $ppset_size[0] = &get_word($ppset_o[0], $ppset_buf); 955 $ppset_num[0] = &get_word($ppset_o[0]+4, $ppset_buf); 956 } 957 check_structure: { 958 $status="Cannot understand property set."; 959 last if $ppset_byteorder != 0xfffe; 960 last if $ppset_format != 0; 961 last if $ppset_reserved < 1; 962 last if $ppset_o[0] < 0x30; 963 $status=""; 964 } 965 } 966 return $status if $status; 967 968 get_ids_and_offsets: { 969 local($i, $id, $n, $num, $fido); 970 local($o)=$ppset_o[0]; 971 for ($n=0; $n<$ppset_reserved; $n++) { 972 973 # default dictionary and codepage 974 $ppset_fido_dict{$n*0x1000+0} = 0; 975 $ppset_fido_cp{$n*0x1000+1} = 0x4e4; 976 977 $num=&get_word($o+4, $ppset_buf); 978 for ($i=0; $i<$num; $i++) { 979 $id = &get_long($o+8+$i*8, $ppset_buf); 980 if ($n) { 981 $id = $i if $id>1; # ! hacky ! 982 } 983 $fido = &get_long($o+8+$i*8+4, $ppset_buf); 984 if ($id>1) { 985 $ppset_fido{$n*0x1000+$id} = $fido; 986 } elsif ($id==1) { 987 $ppset_fido_cp{$n*0x1000+1} = $fido; 988 } elsif ($id==0) { 989 $ppset_fido_dict{$n*0x1000} = $fido; 990 } 991 } 992 $o+=&get_word($o, $ppset_buf); 993 $ppset_o[$n+1]=$o; 994 } 995 } 996 # todo: code page 997 "ok"; 998} 999 1000sub ppset_get_property { 1001# 1002# ($type, $size, @mixed)||("error", $debuginfo) = ppset_get_property($offset) 1003# 1004 local($o_begin)=$_[0]; 1005 local($o)=$o_begin; 1006 local($type) = &get_long($o, $ppset_buf); 1007 1008 if (! ($type & 0x1000)) { 1009 return ($type, &ppset_get_var($type, $o+4)); 1010 } else { 1011 local(@mixed)=(); 1012 local($n)=&get_long($o+4, $ppset_buf); $o+=8; 1013 local($t, $l, @var); 1014 for (; $n; $n--) { 1015 @var=(); 1016 ($l, @var) = &ppset_get_var($type^0x1000, $o); 1017 push (@mixed, 1+($#var+1), $type^0x1000, @var); 1018 $o+=$l; 1019 } 1020 return ($type, $o-$o_begin, @mixed); 1021 } 1022} 1023 1024sub ppset_get_var { 1025# 1026# ($size, @var) = &ppset_get_var($type, $offset); 1027# 1028 local($type, $o)=@_; 1029 if (!$type || $type == 0x01) { # empty, null 1030 return (0, ""); 1031 } elsif ($type == 0x02) { # i2 1032 local($tmp) = &get_word($o, $ppset_buf); 1033 $tmp = - (($tmp^0xffff) +1) if ($tmp & 0x8000); 1034 return (2, $tmp); 1035 } elsif ($type == 0x03) { # i4 1036 local($tmp) = &get_long($o, $ppset_buf); 1037 $tmp = - (($tmp^0xffffffff) +1) if ($tmp & 0x80000000); 1038 return (4, $tmp); 1039 } elsif ($type == 0x04) { # real 1040 return (4, unpack("f", substr($ppset_buf, $o, 4)) ); 1041 } elsif ($type == 0x05) { # double 1042 return (8, unpack("d", substr($ppset_buf, $o, 8)) ); 1043 } elsif ($type == 0x0a) { # error 1044 return (4, &get_word($o, $ppset_buf)); 1045 } elsif ($type == 0x0b) { # bool (0==false, -1==true) 1046 return (4, &get_long($o, $ppset_buf)); 1047 } elsif ($type == 0x0c) { # variant 1048 local($t, $l, @var); 1049 $t = &get_long($o, $ppset_buf); 1050 ($l, @var) = &ppset_get_var($t, $o+4); 1051 return (4+$l, $t, @var); 1052 } elsif ($type == 0x11) { # ui1 1053 return (1, &get_byte($o, $ppset_buf)); 1054 } elsif ($type == 0x12) { # ui2 1055 return (2, &get_word($o, $ppset_buf)); 1056 } elsif ($type == 0x13) { # ui4 1057 return (4, &get_long($o, $ppset_buf)); 1058 } elsif ($type == 0x1e) { # lpstr 1059 local($l)=&get_long($o, $ppset_buf); 1060 if ($l) { 1061 return (4+$l, substr($ppset_buf, $o+4, $l-1)); 1062 } else { 1063 return (4, ""); 1064 } 1065 } elsif ($type==0x40) { # filetime 1066 return (8, &filetime_to_time(&get_nlong(2, $o, $ppset_buf)) ); 1067 } else { 1068 return ( 1069 "error", 1070 sprintf("(offset=%x, type=%x, buf[0]=%x)", 1071 $o, $type, &get_long($o+4, $ppset_buf) 1072 ) 1073 ); 1074 } 1075} 1076 1077## 1078## Basic laola data types 1079## 1080 1081sub var_init { 1082# 1083# At this work I still don't trust in signed integers, therefore I 1084# prefer the unsigned 0xffffffff to -1 (don't beat me) 1085# 1086 $vtype{"c"}="C"; $vsize{"c"}=1; # unsigned char 1087 $vtype{"w"}="v"; $vsize{"w"}=2; # 0xfe21 == 21 fe 1088 $vtype{"l"}="V"; $vsize{"l"}=4; # 0xfe21abde == de ab 21 fe 1089} 1090 1091sub get_chars { 1092# 1093# get_chars ($offset, $number, extern $sourcebuf); 1094# 1095 substr($_[2], $_[0], $_[1]); 1096} 1097 1098sub read_chars { 1099# 1100# read_chars ($offset, $number); 1101# 1102 local($tmp)=""; 1103 &myread($_[0], $_[1], $tmp) && $tmp; 1104} 1105 1106# get_thing ($offset, extern $buf); 1107sub get_byte { &get_var("c", @_); } 1108sub get_word { &get_var("w", @_); } 1109sub get_long { &get_var("l", @_); } 1110sub get_var { 1111 unpack ($vtype{$_[0]}, substr($_[2], $_[1], $vsize{$_[0]})); 1112} 1113 1114# get_nthing ($n, $offset, extern $buf); 1115sub get_nbyte { &get_nvar("c", @_); } 1116sub get_nword { &get_nvar("w", @_); } 1117sub get_nlong { &get_nvar("l", @_); } 1118sub get_nvar { 1119 unpack ($vtype{$_[0]}.$_[1], substr($_[3], $_[2], $vsize{$_[0]}*$_[1])); 1120} 1121 1122# read_thing ($offset); 1123sub read_byte { &read_var("c", @_); } 1124sub read_word { &read_var("w", @_); } 1125sub read_long { &read_var("l", @_); } 1126sub read_var { 1127 unpack ($vtype{$_[0]}, &read_chars($_[1], $vsize{$_[0]})); 1128} 1129 1130# read_nthing ($n, $offset); 1131sub read_nbyte { &read_nvar("c", @_); } 1132sub read_nword { &read_nvar("w", @_); } 1133sub read_nlong { &read_nvar("l", @_); } 1134sub read_nvar { 1135 unpack ($vtype{$_[0]}.$_[1], &read_chars($_[2], $vsize{$_[0]}*$_[1])); 1136} 1137 1138## 1139## --------------------------- IO handling ------------------------------ 1140## 1141 1142sub myio { 1143# 1144# 1||0= myio("r"||"w", $file_offset, $num_of_chars, $extern_var [,$var_offset]) 1145# 1146 $_ = shift; 1147 /^r$/i ? &myread : /^w$/i ? &mywrite : 0; 1148} 1149 1150sub myread { 1151# 1152# 1||0 = myread($file_offset, $num_of_chars, $extern_var [,$var_offset]) 1153# 1154 local($varoffset)= $_[3] || 0; 1155 if ($optional_do_iobuf && $iobuf) { 1156 substr($_[2], $varoffset, $_[1])=substr($iobuf, $_[0], $_[1]); 1157 return 1; 1158 } else { 1159 seek(IO, $_[0], 0) && (read(IO,$_[2],$_[1],$varoffset) == $_[1]); 1160 } 1161} 1162 1163sub mywrite { 1164# 1165# 1||0 = mywrite($file_offset, $num_of_chars, $extern_var [,$var_offset]) 1166# 1167 return 0 if !$io_writable; 1168 1169 local($varoffset)= $_[3] || 0; 1170 local($tmp) = substr($_[2], $varoffset, $_[1]); 1171 $tmp .= "\00" x ($_[1]-length($tmp)); 1172 if ($optional_do_iobuf && $iobuf) { 1173 substr($iobuf, $_[0], $_[1]) = $tmp; 1174 push(@iobuf_modify_a, $_[0]); 1175 push(@iobuf_modify_l, $_[1]); 1176 return 1; 1177 } else { 1178 seek(IO, $_[0], 0) && print IO $tmp; 1179 } 1180} 1181 1182sub flush_cache { 1183# 1184# void = flush_cache() 1185# 1186# flush io cache, if caching is turned on 1187# 1188 return if !($optional_do_iobuf && $iobuf); 1189 1190 &rw_iolist("w", $iobuf, 1191 &aggregate_iolist(2, @iobuf_modify_a, @iobuf_modify_l) 1192 ); 1193 1194 @iobuf_modify_a=(); @iobuf_modify_l=(); 1195} 1196 1197## 1198## The "logical" core of laola 1199## 1200 1201sub get_ppss_chain { 1202# 1203# @blocks = get_ppss_chain($ppss) 1204# 1205# !recursive! 1206# 1207 local($ppss) = @_; 1208 return if $ppss == 0xffffffff; 1209 1210 if ($chaincontrol{$ppss}) { 1211 # Recursive entry! 1212 @chain = (); 1213 print STDERR "This document is corrupt!\n"; 1214 return; 1215 } else { 1216 &read_ppss($ppss); 1217 $chaincontrol{$ppss}=1; 1218 } 1219 1220 &get_ppss_chain ( $pps_prev[$ppss] ); 1221 1222 push(@chain, $ppss); 1223 1224 &get_ppss_chain ( $pps_next[$ppss] ); 1225} 1226 1227sub read_ppss_buf { 1228# 1229# "ok" = read_ppss_buf ($i, extern $buf) 1230# 1231 local($i)=$_[0]; 1232 local($nl); 1233 return "ok" if $pps_name[$i]; 1234 return if ! ($nl = &get_word($i*0x80+0x40, $_[1])); 1235 1236 $pps_name[$i] = &pps_name_to_string($i*0x80, $nl, $_[1]); 1237 1238 ($pps_type[$i], $pps_uk0[$i], 1239 $pps_prev[$i], $pps_next[$i], $pps_dir[$i]) = 1240 unpack($vtype{"c"}."2".$vtype{"l"}."3", 1241 substr($_[1], $i*0x80+0x42, $vsize{"c"}*2+$vsize{"l"}*3)) 1242 ; 1243 1244 ($pps_ts1s[$i], $pps_ts1d[$i], $pps_ts2s[$i], $pps_ts2d[$i], 1245 $pps_sb[$i], $pps_size[$i]) = 1246 &get_nlong(6, $i*0x80+0x64, $_[1]) 1247 ; 1248 1249 "ok"; 1250} 1251 1252sub read_ppss { 1253# 1254# "ok" = read_ppss ($i) 1255# 1256 local($i)=shift; 1257 return "ok" if $pps_name[$i]; 1258 1259 local($buf)=""; 1260 &rw_iolist("r", $buf, &get_iolist(3, $i*0x80, 0x80, 0, @root_list)); 1261 1262 local($nl); 1263 return if ! ($nl = &get_word(0x40, $buf)); 1264 $pps_name[$i] = &pps_name_to_string(0, $nl, $buf); 1265 ($pps_type[$i], $pps_uk0[$i], $pps_prev[$i], $pps_next[$i], $pps_dir[$i])= 1266 unpack($vtype{"c"}."2".$vtype{"l"}."3", 1267 substr($buf, 0x42, $vsize{"c"}*2+$vsize{"l"}*3) 1268 ) 1269 ; 1270 1271 ($pps_ts1s[$i], $pps_ts1d[$i], $pps_ts2s[$i], $pps_ts2d[$i], 1272 $pps_sb[$i], $pps_size[$i]) = unpack( 1273 $vtype{"l"}."6", substr($buf, 0x64, $vsize{"l"}*6) 1274 ); 1275 1276 "ok"; 1277} 1278 1279 1280sub get_list_from_depot { 1281# 1282# @blocks = get_list_from_depot ($start, depottype) 1283# 1284# Read a block chain starting with block $start out of a either 1285# depot @bbd (for $t) or depot @sbd (for !$t). 1286# 1287 local($start, $t)=@_; 1288 local(@chain)=(); 1289 return @chain if $start == 0xfffffffe; 1290 1291 push (@chain, $start); 1292 while ( ($start = $t?$bbd[$start]:$sbd[$start]) != 0xfffffffe ) { 1293 push(@chain, $start); 1294 } 1295 @chain; 1296} 1297 1298sub get_iolist { 1299# 1300# @iolist = get_iolist ($depottype, $offset, $size, $startblock [,@depot]) 1301# 1302# This is the main IO logic. Returns the iolist for a data stream according 1303# to depot type $t. The stream may start at offset $offset and can have a 1304# size $size. If size is bigger than the total size of the stream according 1305# to its depot, it will be cut correctly. (So if you want to read until the 1306# files end without knowing how many bytes that are, take 0xffffffff as size). 1307# 1308# depottype $t: 1309# 0 small block (for @sbd) small block depot 1310# 1 big block (for @bbd) big block depot 1311# 2 small block (for @_[4..$#]) some small blocks 1312# 3 big block (for @_[4..$#]) some big blocks 1313# 4 variable (for @curfile_iolist) iolist of current file 1314# 5 variable (for @_[4..$#] == (@o, @l)) some iolist 1315# 1316 local($t, $offset, $size, $sb) = (shift||0, shift||0, shift||0, shift||0); 1317 local($di); 1318 local($bs, $max); 1319 1320 local(@empty)=(); 1321 return @empty if !$size; 1322 1323 local($begin, $done, $len); 1324 local(@o)=(); local(@l)=(); 1325 1326 $bs = ($t==1 || $t==3) ? $big_block_size : $small_block_size; 1327 1328 if ($t<2) { 1329 # To skip these offsets, stream chains would have to be resolved 1330 # before. 1331 } elsif ($t<4) { 1332 $max = $#_; 1333 # Skip whole blocks, when offset given 1334 $sb += int ($offset / $bs); 1335 $offset -= int ($offset / $bs) * $bs; 1336 } elsif ($t==4) { 1337 $max = ($#curfile_iolist-1)/2; 1338 } elsif ($t==5) { 1339 $max = ($#_-1)/2; 1340 } else { 1341 return @empty; 1342 } 1343 1344 $done = 0; 1345 for ( $di=$sb; 1346 ($t<2) ? ($di!=0xfffffffe): ($di<=$max); 1347 $di=&next_dl 1348 ) { 1349 last if ($done == $size); 1350 if ($t==4) { 1351 $bs = $curfile_iolist[$max+1+$di]; 1352 } elsif ($t==5) { 1353 $bs = $_[$max+1+$di]; 1354 } 1355 if ($offset) { 1356 if ($bs <= $offset) { 1357 $offset -= $bs; 1358 next; 1359 } else { 1360 $begin = &depot_offset + $offset; 1361 $len = $bs - $offset; 1362 $offset = 0; 1363 } 1364 } else { 1365 $begin = &depot_offset; 1366 $len = $bs; 1367 } 1368 if ( ($done+$len) > $size ) { 1369 $len = $size - $done; 1370 } 1371 if ( !@o || ($o[$#o]+$l[$#l])!=$begin ) { 1372 push(@o, $begin); 1373 push(@l, $len); 1374 } else { 1375 $l[$#l]+=$len; 1376 } 1377 $done += $len; 1378 } 1379 (@o, @l); 1380} 1381sub next_dl { # get_iolist:next_dl 1382# 1383# index = depot ($di==index, $t==depothandle) 1384# 1385# Returns next chain link of depot @bbd ($t) or @sbd (!$t) 1386# 1387 return $sbd[$di] if !$t; 1388 return $bbd[$di] if $t==1; 1389 $di+1; 1390} 1391sub depot_offset { # get_iolist:depot_offset 1392# 1393# offset = depot_offset ($di==index, $t==depottype) 1394# 1395 return (($sb_list[$di/8]+1)*8 + ($di%8))*$small_block_size if $t==0; 1396 return $header_size + $di*$big_block_size if $t==1; 1397 return (($sb_list[$_[$di]/8]+1)*8 + ($_[$di]%8))*$small_block_size if $t==2; 1398 return $header_size + $_[$di]*$big_block_size if $t==3; 1399 return ($curfile_iolist[$di]) if $t==4; 1400 return ($_[$di]) if $t==5; 1401} 1402 1403 1404sub aggregate_iolist { 1405# 1406# (@offsets, @lengths)||() = aggregate_iolist(method,@offsets,@lengths) 1407# 1408# method: 1409# 1 @offsets shall be sorted, no overlap allowed 1410# 2 @offsets shall be sorted, overlap is allowed 1411# 3 @offsets are sorted, no overlap allowed 1412# 4 @offsets are sorted, overlap is allowed 1413# 1414 local($method)=shift; 1415 local(@empty)=(); 1416 return @empty if ($method<1)||($method>4); # Don't know method! 1417 1418 local($max)=int(($#_+1)/2); 1419 1420 local($i, $j); 1421 local(@o_in)=(); local(@l_in)=(); 1422 local(%o_in)=(); 1423 local(@o_out)=(); local(@l_out)=(); 1424 local($offset, $len); 1425 1426 # 1427 # Sort 1428 # 1429 if ( ($method==1) || ($method==2)) { 1430 # sort offsets 1431 for ($i=0; $i<$max; $i++) { 1432 next if !$_[$max+$i]; 1433 if ($o_in{$_[$i]}) { 1434 return @empty if $method==1; # Data chunks overlap! 1435 $o_in{$_[$i]}=$i if $_[$max+$i]>$o_in{$_[$i]}; 1436 } else { 1437 $o_in{$_[$i]}=$i; 1438 } 1439 } 1440 foreach $key (sort {$a <=> $b} keys %o_in) { 1441 push(@o_in, $_[$o_in{$key}]); 1442 push(@l_in, $_[$max + $o_in{$key}]); 1443 } 1444 } else { 1445 @o_in=@_[0..($max-1)]; 1446 @l_in=@_[$max..$#_]; 1447 } 1448 1449 # 1450 # Aggregate 1451 # 1452 $offset=$o_in[0]; 1453 $len=$l_in[0]; 1454 1455 for ($i=1; $i<=($#o_in+1); $i++) { 1456 if ( ($i==($#o_in+1)) 1457 || ($o_in[$i]<$offset) 1458 || ($o_in[$i]>($offset+$len)) 1459 ) { 1460 push(@o_out, $offset); 1461 push(@l_out, $len); 1462 $offset=$o_in[$i]; 1463 $len=$l_in[$i]; 1464 } elsif ($o_in[$i]<($offset+$len)) { 1465 return @empty if ($type==1 || $type==3); # Data chunks overlap! 1466 if ( ($o_in[$i]+$l_in[$i]) > ($offset+$len) ) { 1467 $len=$o_in[$i]+$l_in[$i]-$offset; 1468 } 1469 } else { 1470 $len += $l_in[$i]; 1471 } 1472 } 1473 (@o_out, @l_out); 1474} 1475 1476sub rw_iolist { 1477 # 1478 # 1||0 = rw_iolist("r"||"w", extern buf, @offsets, @lengths); 1479 # . read or write global chunklist 1480 # 1481 local($done, $i, $l); 1482 local($max) = int(($#_-2+1)/2); 1483 1484 $done=0; 1485 for ($i=0; $i<$max; $i++) { 1486 next if ! ($l = $_[2+$i+$max]); 1487 if (&myio($_[0], $_[2+$i], $l, $_[1], $done)) { 1488 $done += $l; 1489 } else { 1490 # io error! 1491 return 0; 1492 } 1493 } 1494 1; 1495} 1496 1497## 1498## ---------------------- Property Set Handling -------------------------- 1499## 1500 1501sub pps_name_to_string { 1502# 1503# $string = pps_name_to_string($offset, $pps_name_len, extern $buf) 1504# 1505 local($l)=$_[1]-2; 1506 local($i); 1507 local($tmp)=""; 1508 for ($i=0; $i<$l; $i+=2) { 1509 $tmp.=substr($_[2], $_[0]+$i, 1); 1510 } 1511 $tmp; 1512} 1513 1514sub learn_guids { 1515 @guids = ("dsi", "si"); 1516 $guid_dsi="\0x5DocumentSummaryInformation"; 1517 @guid_dsi=( 0xd5cdd502, 0x2e9c, 0x101b, 1518 "\0x93\0x97\0x08\0x00\0x2b\0x2c\0xf9\0xae" ); 1519 $guid_si="\0x5SummaryInformation"; 1520 @guid_si=( 0xf29f85e0, 0x4ff9, 0x1068, 1521 "\0xab\0x91\0x08\0x00\0x2b\0x27\0xb3\0xd9" ); 1522} 1523 1524sub get_uuid { 1525 local($o)=$_[0]; 1526 ( &get_long($o, $_[1]), 1527 &get_word($o+4, $_[1]), 1528 &get_word($o+6, $_[1]), 1529 &get_chars($o+8, 8, $_[1]) 1530 ); 1531} 1532 1533# 1534# This section refers to pps_ts2 and pps_ts1, the one ore two timestamps 1535# used for each "Storage" Property Set. It seems, that the second timestamp 1536# gets actualized, when changing the storage. The first stamp is sometimes 1537# used, sometimes unused. 1538# 1539# The stamp is a 64 bit ulong. It counts every second 10 * 10 ^ 6, 1540# starting at 01/01/1601. When the 64 bit int gets evaluated as 1541# two 32 bit integers, the faster running ("least significant long") 1542# can hold just 0x100000000 / 10000000.0 (about 429.5) seconds. So the 1543# slower running ("most significant long") increments every 429.5 seconds. 1544# 1545 1546sub filetime_init { 1547 @monsum = ( 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 1548 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335 ); 1549 $a_minute = 60 * 10000000.0 / (0x10000000 * 16); 1550} 1551 1552sub is_schaltjahr { 1553 local($year)=shift; 1554 !($year%4) && ($year%100 || !($year%400) ) && 1; 1555} 1556 1557sub filetime_years_to_days { 1558 local($year)=shift; 1559 int($year-1600) * 365 1560 + int( ($year-1600) / 4 ) 1561 - int( ($year-1600) / 100 ) 1562 + int( ($year-1600) / 400 ) 1563 ; 1564} 1565 1566sub filetime_to_time { 1567 local($ds, $dd)=@_; 1568 local($day, $month, $year, $hour, $min, $sec); 1569 local($i, $m, $d, $dsum, $tmpsec); 1570 1571 $dsum = $dd + ($ds / (0x10000000 * 16.0)); 1572 1573 $d= int( $dsum/($a_minute*60*24) )+1; 1574 $m= $dsum - ($d-1)*$a_minute*60*24; 1575 1576 $year = int( $d/365.2425 ) + 1601; 1577 $d -= &filetime_years_to_days($year-1); 1578 1579 for( $i=11; $i && ($d <= $monsum[$i+&is_schaltjahr($year)*12]); $i--) {} 1580 $month = $i+1; 1581 $day = $d - $monsum[$i+&is_schaltjahr($year)*12]; 1582 1583 $hour = int( $m / ($a_minute*60) ); 1584 $min = int( $m/$a_minute - $hour*60 ); 1585 $sec = ( ($m/$a_minute - $hour*60 - $min) * 60); 1586 1587 ($day, $month, $year, $hour, $min, $sec); 1588} 1589 1590sub time_to_filetime { 1591 local($day, $month, $year, $hour, $min, $sec)=@_; 1592 local($d, $tss, $tsd); 1593 1594 $d = &filetime_years_to_days($year-1) 1595 + $monsum[$month-1 + &is_schaltjahr($year)*12] 1596 + $day-1; 1597 1598 $tsd = (24*60*$d + 60*$hour +$min +$sec/60.0) * $a_minute; 1599 1600 $tss = ($tsd-int($tsd)) * 0x10000000 * 16; 1601 1602 ( int($tss), int($tsd) ); 1603} 1604 1605 1606## 1607## ------------------------- Trash Handling ------------------------------ 1608## 1609 1610sub make_blockuse_statistic { 1611 # 1612 # block statistic: 1613 # 0 == irregular free (block depot entry != -1) (== undef) 1614 # 1 == regular free (block depot entry == -1) 1615 # 2 == used for ole system 1616 # 3 == used for ole application 1617 # 1618 return 1 if $usage_known; 1619 local($i, @list); 1620 1621 # default: all small and big blocks are undef 1622 1623 # 1624 # regular system data 1625 # 1626 1627 # ole system blocks 1628 for (@bbd_list, @sbd_list, @root_list, @sb_list) { 1629 $bb_usage[$_]=2; 1630 } 1631 1632 # free blocks according to block depots 1633 for (@bbd) { 1634 $bb_usage[$_]=1 if $bbd[$_]==0xffffffff; 1635 } 1636 for (@sbd) { 1637 $sb_usage[$_]=1 if $sbd[$_]==0xffffffff; 1638 } 1639 1640 # 1641 # OLE application blocks 1642 # 1643 foreach $file (&get_all_filehandles(0)) { 1644 if ($pps_size[$file]>=0x1000) { 1645 for (&get_list_from_depot($pps_sb[$file], 1)) { $bb_usage[$_]=3; } 1646 } else { 1647 for (&get_list_from_depot($pps_sb[$file], 0)) { $sb_usage[$_]=3; } 1648 } 1649 } 1650 1651 $usage_known=1; 1652} 1653 1654sub get_trash_info { 1655# 1656# void get_trash_info(); 1657# 1658# Trash types: 1659# 1660# 0 == all 1661# 1 == unused big blocks 1662# 2 == unused small blocks 1663# 4 == unused file space, according to sizeof pps_size (incl. root_entry) 1664# 8 == unused system space (header, sb_table, bb_table) 1665# 1666 return 1 if $trash_known; 1667 &make_blockuse_statistic(); 1668 1669 local(@o, @l); 1670 local(@list); 1671 local($size, $m); 1672 local($i); 1673 local($begin, $len); 1674 1675 unused_big_blocks: { 1676 $size=0; @list=(); 1677 for ($i=0; $i<=$maxblock; $i++) { 1678 push(@list, $i) if $bb_usage[$i]<=1; 1679 } 1680 @trash1_o = &get_iolist(3, 0, 0xfffffff, 0, @list); 1681 @trash1_l = splice(@trash1_o, ($#trash1_o+1)/2); 1682 $m=$#trash1_o; for ($i=0; $i<=$m; $i++) { $size+=$trash1_l[$i]; } 1683 $trashsize{1}=$size; 1684 } 1685 1686 unused_small_blocks: { 1687 $size=0; @list=(); 1688 for ($i=0; $i<=$maxsmallblock; $i++) { 1689 push(@list, $i) if $sb_usage[$i]<=1; 1690 } 1691 @trash2_o = &get_iolist(2, 0, 0xfffffff, 0, @list); 1692 @trash2_l = splice(@trash2_o, ($#trash2_o+1)/2); 1693 $m=$#trash2_o; for ($i=0; $i<=$m; $i++) { $size+=$trash2_l[$i]; } 1694 $trashsize{2}=$size; 1695 } 1696 1697 unused_file_space: { 1698 $size=0; 1699 1700 # 3.1. normal files 1701 foreach $file (&get_all_filehandles(0)) { 1702 @o = &get_iolist( 1703 $pps_size[$file]>=0x1000 && 1, 1704 $pps_size[$file], 0xffffffff, $pps_sb[$file] 1705 ); 1706 push(@trash3_l, splice(@o, ($#o+1)/2)); 1707 push(@trash3_o, @o); 1708 } 1709 $m=$#trash3_o; for ($i=0; $i<=$m; $i++) { $size+=$trash3_l[$i]; } 1710 1711 # 3.2. system file of root_entry (small block file) 1712 @list = (); 1713 while (($#list+$#sbd+2) % 8) { 1714 push(@list, $#list+$#sbd+2); 1715 } 1716 @o = &get_iolist(2, 0, 0xfffffff, 0, @list); 1717 @l = splice(@o, ($#o+1)/2); 1718 push(@trash3_o, @o); push(@trash3_l, @l); 1719 $m=$#o; for ($i=0; $i<=$m; $i++) { $size+=$l[$i]; } 1720 1721 $trashsize{3}=$size; 1722 } 1723 1724 unused_system_space: { 1725 $size=0; 1726 1727 # 4.1. header block 1728 $begin = 0x4c + $num_of_bbd_blocks*4; 1729 $len = $header_size - $begin; 1730 push(@trash4_o, $begin); push(@trash4_l, $len); 1731 $size+=$len; 1732 1733 # 4.2. big block depot 1734 @o = &get_iolist(3, ($maxblock+1)*4, 0xffffffff, 0, @bbd_list); 1735 @l = splice(@o, ($#o+1)/2); 1736 push(@trash4_o, @o); push(@trash4_l, @l); 1737 $m=$#o; for ($i=0; $i<=$m; $i++) { $size+=$l[$i]; } 1738 1739 # 4.3. small block depot 1740 @o = &get_iolist(3, ($maxsmallblock+1)*4, 0xffffffff, 0, @sbd_list); 1741 @l = splice(@o, ($#o+1)/2); 1742 push(@trash4_o, @o); push(@trash4_l, @l); 1743 $m=$#o; for ($i=0; $i<=$m; $i++) { $size+=$l[$i]; } 1744 1745 $trashsize{4}=$size; 1746 } 1747 1748 $trash_known=1; 1749} 1750 1751sub get_trash_size { 1752 local($type)=shift; 1753 $type = (1|2|4|8) if !$type; 1754 &get_trash_info(); 1755 1756 local($trashsize)=0; 1757 $trashsize += $trashsize{1} if $type & 1; 1758 $trashsize += $trashsize{2} if $type & 2; 1759 $trashsize += $trashsize{3} if $type & 4; 1760 $trashsize += $trashsize{4} if $type & 8; 1761 1762 $trashsize; 1763} 1764 1765sub rw_trash { 1766# 1767# "ok"||error = rw_trash("r"||"w", $type, extern $buf [,$offset,$size]) 1768# 1769 local($maxarg)=$#_; 1770 &get_trash_info(); 1771 1772 local($rw, $type) = @_[0..1]; 1773 $type = (1|2|4|8) if !$type; 1774 1775 local($status, $offset, $size) = 1776 &get_default_iosize(&laola_get_trashsize($type), $rw, @_[2..$maxarg]); 1777 return $status if $status ne "ok"; 1778 1779 local(@o)=(); local(@l)=(); 1780 if ($type & 1) { push (@o, @trash1_o); push (@l, @trash1_l); } 1781 if ($type & 2) { push (@o, @trash2_o); push (@l, @trash2_l); } 1782 if ($type & 4) { push (@o, @trash3_o); push (@l, @trash3_l); } 1783 if ($type & 8) { push (@o, @trash4_o); push (@l, @trash4_l); } 1784 1785 return "ok" if &rw_iolist( 1786 $rw, $_[2], 1787 &get_iolist(5, $offset, $size, 0, &aggregate_iolist(1, @o, @l)) 1788 ); 1789 1790 "Laola: IO Error!"; 1791} 1792 1793 1794## 1795## ----------------------------- Debugging ------------------------------- 1796## 1797 1798# 1799# Some debug information. Switch it on via $optional_do_debug=1 1800# Information will be shown directly after opening any document. 1801# 1802 1803sub debug_report_pps { 1804 local($i)=shift; 1805 local($out)=""; 1806 local($tmp, $tmp2)=""; 1807 1808 return if !$pps_name[$i]; 1809 1810 $out = sprintf ("%2x", $i); 1811 $out .= $pps_uk0[$i]==1 ? ": " : sprintf ("#%-2x", $pps_uk0[$i]); 1812 1813 if (&laola_is_directory($i)) { 1814 $out .= "--> "; 1815 } elsif (&laola_is_file($i)) { 1816 $out .= sprintf ("%-5x ", 1817 &laola_get_filesize($i)); 1818 } else { 1819 $out .= " "; 1820 } 1821 1822 if ($pps_prev[$i]==0xffffffff) { $out .= " ."; 1823 } else { $out .= sprintf ("%3x", $pps_prev[$i]); } 1824 if ($pps_next[$i]==0xffffffff) { $out .= " ."; 1825 } else { $out .= sprintf ("%3x", $pps_next[$i]); } 1826 if ($pps_dir[$i]==0xffffffff) { $out .= " ."; 1827 } else { $out .= sprintf ("%3x", $pps_dir[$i]); } 1828 1829 if (&laola_is_file_ppset($i)) { 1830 $out .= " set"; 1831 } else { 1832 $out .= " pp "; 1833 } 1834 1835 ($tmp=$pps_name[$i]) =~ s/[^_a-zA-Z0-9]/ /g; 1836 $out .= sprintf (" \"%s\"",$tmp); 1837 1838 $out .= " " x (50 - length($out)); 1839 1840 if ($pps_ts2d[$i]) { 1841 $out .= sprintf (" %d.%d.%d %02d.%02d:%02d", 1842 &filetime_to_time($pps_ts2s[$i], $pps_ts2d[$i]) 1843 ); 1844 } 1845 1846 print "$out\n"; 1847} 1848 1849sub report_blockuse_statistic { 1850 return 1; 1851 print "--- LAOLA internal, begin block statistic ---\n\n"; 1852 &make_blockuse_statistic(); 1853 local($i, $j, $m); 1854 local(@o, @l); 1855 print "Big blocks:\n"; 1856 for ($i=0; $i<4; $i++) { 1857 @o=(); @l=(); 1858 $m=$#bb_usage; for ($j=0; $j<=$m; $j++) { 1859 next if $bb_usage[$j]!=$i; 1860 push(@o, $j); push(@l, 1); 1861 } 1862 &report_blockuse_list($i, &aggregate_iolist(1, @o, @l)); 1863 } 1864 print "Small blocks:\n"; 1865 for ($i=0; $i<4; $i++) { 1866 @o=(); @l=(); 1867 $m=$#sb_usage; for ($j=0; $j<=$m; $j++) { 1868 next if $sb_usage[$j]!=$i; 1869 push(@o, $j); push(@l, 1); 1870 } 1871 &report_blockuse_list($i, &aggregate_iolist(1, @o, @l)); 1872 } 1873 print "\n--- LAOLA internal, end block statistic ---\n\n"; 1874} 1875 1876sub report_blockuse_list { 1877 local($type)=shift; 1878 return if !@_; 1879 local(%info)=(0, "Trash", 1, "Free", 2, "System", 3, "Application"); 1880 local($max)=($#_+1)/2; 1881 local($i); local($o, $l); 1882 print "Type $type {$info{$type}} = ("; 1883 for ($i=0; $i<$max; $i++) { 1884 $o=$_[$i]; $l=$_[$max+$i]; 1885 if ($l==1) { 1886 printf (" %x ", $o); 1887 } else { 1888 printf (" %x-%x ", $o, $o+$l-1); 1889 } 1890 } 1891 print ")\n"; 1892} 1893 1894sub report_trash_statistic { 1895 return; 1896 &get_trash_info(); 1897 print "Trash statistic.\n"; 1898 print "Free big block chunks: (\n"; 1899 &report_trash_list($trashsize{1}, @trash1_o, @trash1_l); 1900 print "\nFree small block chunks: (\n"; 1901 &report_trash_list($trashsize{2}, @trash2_o, @trash2_l); 1902 print "\nUnused file space: (\n"; 1903 &report_trash_list($trashsize{3}, @trash3_o, @trash3_l); 1904 print "\nUnused system space: (\n"; 1905 &report_trash_list($trashsize{4}, @trash4_o, @trash4_l); 1906 1907 print "\nSummary: (\n"; 1908 &report_trash_list( 1909 $trashsize{1}+$trashsize{2}+$trashsize{3}+$trashsize{4}, 1910 &aggregate_iolist( 1, 1911 @trash1_o, @trash2_o, @trash3_o, @trash4_o, 1912 @trash1_l, @trash2_l, @trash3_l, @trash4_l 1913 ) 1914 ); 1915} 1916 1917sub report_trash_list { 1918 local($size)=shift; 1919 local(@o)=@_; 1920 local(@l)=splice(@o, ($#o+1)/2); 1921 local($i, $m); 1922 printf (" %d elements, size=%x\n", $#o+1, $size); 1923 $m=$#o; for ($i=0; $i<=$m; $i++) { 1924 printf (" offset %5x (len %x)\n", $o[$i], $l[$i]); 1925 } 1926 print ")\n"; 1927} 1928 1929sub print_iolist { 1930 local(@o)=@_; 1931 local(@l)=splice(@o, ($#o+1)/2); 1932 local($i); 1933 $m=$#o; for ($i=0; $i<=$m; $i++) { 1934 printf(" o=%6x (%x)\n", $o[$i], $l[$i]); 1935 } 1936} 1937 1938"Atomkraft? Nein, danke!" 1939 1940