1#
2# $Id: laola.pl,v 0.5.1.5 1997/07/01 00:06:42 schwartz Rel $
3#
4# laola.pl, LAOLA filesystem.
5#
6# This perl 4 library gives raw access to "Ole/Com" documents. These are
7# documents like created by Microsoft Word 6.0+ or newer Star Divisions
8# Word by using so called "Structured Storage" technology. Write access
9# still is nearly not supported, but will be done one day. This library
10# is part of LAOLA, a distribution this file should have come along with.
11# It can be found at:
12#
13#    http://wwwwbs.cs.tu-berlin.de/~schwartz/pmh/index.html
14# or
15#    http://www.cs.tu-berlin.de/~schwartz/pmh/index.html
16#
17# Copyright (C) 1996, 1997 Martin Schwartz
18#
19#    This program is free software; you can redistribute it and/or modify
20#    it under the terms of the GNU General Public License as published by
21#    the Free Software Foundation; either version 2 of the License, or
22#    (at your option) any later version.
23#
24#    This program is distributed in the hope that it will be useful,
25#    but WITHOUT ANY WARRANTY; without even the implied warranty of
26#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
27#    GNU General Public License for more details.
28#
29#    You should have received a copy of the GNU General Public License
30#    along with this program; if not, you should find it at:
31#
32#    http://wwwwbs.cs.tu-berlin.de/~schwartz/pmh/COPYING
33#
34# Diese Ver�ffentlichung erfolgt ohne Ber�cksichtigung eines eventuellen
35# Patentschutzes. Warennamen werden ohne Gew�hrleistung einer freien
36# Verwendung benutzt. ;-)
37#
38# Contact: schwartz@cs.tu-berlin.de
39#
40
41#
42# Really important topics still MISSING until now:
43#
44#   - human rights and civil rights where _you_live_
45#   - Reformfraktion president for Technische Universit�t Berlin
46#
47#   - creating documents
48#   - sensible error handling...
49#   - many property set things:
50#     *  documentation of variable types
51#     *  code page support
52#   - opening multiple documents at a time
53#   - consistant name giving, checked against MS'
54#
55# Please refer to the Quick Reference at Laolas home page for further
56# explanations.
57#
58
59#
60# Abbreviations
61#
62#    bbd    Big Block Depot
63#    pps    Property Storage
64#    ppset  Property Set
65#    ppss   Property Set Storage
66#    sb     Start Block
67#    sbd    Small Block Depot
68#    tss    Time Stamp Seconds
69#    tsd    Time Stamp Days
70#
71
72##
73## "public"
74##
75
76sub laola_open_document        { &laola'laola_open_document; }
77sub laola_close_document       { &laola'laola_close_document; }
78
79sub laola_pps_get_name         { &laola'laola_pps_get_name; }
80sub laola_pps_get_date         { &laola'laola_pps_get_date; }
81
82sub laola_is_directory         { &laola'laola_is_directory; }
83sub laola_get_directory        { &laola'laola_get_directory; }
84sub laola_get_dirhandles       { &laola'laola_get_dirhandles; }
85
86sub laola_is_file              { &laola'laola_is_file; }
87sub laola_get_filesize         { &laola'laola_get_filesize; }
88sub laola_get_file             { &laola'laola_get_file; }
89
90sub laola_is_root              { &laola'laola_is_root; }
91
92#
93# writing
94#
95sub laola_modify_file          { &laola'laola_modify_file; }
96
97#
98# property set handling
99#
100sub laola_is_file_ppset        { &laola'laola_is_file_ppset; }
101sub laola_ppset_get_dictionary { &laola'laola_ppset_get_dictionary; }
102sub laola_ppset_get_idset      { &laola'laola_ppset_get_idset; }
103sub laola_ppset_get_property   { &laola'laola_ppset_get_property; }
104
105#
106# trash handling
107#
108sub laola_get_trashsize        { &laola'laola_get_trashsize; }
109sub laola_get_trash            { &laola'laola_get_trash; }
110sub laola_modify_trash         { &laola'laola_modify_trash; }
111
112
113package laola;
114
115$laola_date = "03/25/97";
116
117changable_options: {
118   $optional_do_iobuf=0;  # 0: don't cache  1: cache whole compound document
119   $optional_do_debug=0;  # 0: don't debug  1: print some debugging information
120}
121
122##
123## File and directory handling
124##
125
126sub laola_open_document { ##
127#
128# "ok"||$error = laola_open_document($filename [,$openmode [,$streambuf]]);
129#
130# openmode bitmask (0 is default):
131#
132# Bit 0:  0 read only   1 read and write
133# Bit 4:  0 file mode   1 buffer mode
134#
135   local($status)="";
136   open_doc1: {
137      &init_vars();
138      if ( ($status=&init_io(@_)) ne "ok") {
139         last;
140      }
141      if ( ($status=&init_doc()) ne "ok") {
142         &laola_close_document();
143         last;
144      }
145      return "ok";
146   }
147   $status;
148}
149
150sub laola_close_document { ##
151#
152# "ok" = laola_close_document([streambuf])
153#
154   if ($openmode & 0x10) {
155      if (defined $_[0]) {
156         $_[0]=$iobuf;
157      }
158   } else {
159      &flush_cache();
160      &clean_file();
161   }
162   &init_vars();
163   return "ok";
164}
165
166sub laola_is_directory { ##
167#
168# 1||0 = laola_is_directory($pps)
169#
170   local($pps)=shift;
171   (!$pps || ($pps_type[$pps] == 1));
172}
173
174sub laola_is_file { ##
175#
176# 1||0 = laola_is_file($pps)
177#
178   ($pps_type[shift] == 2);
179}
180
181sub laola_is_root { ##
182#
183# 1||0 = laola_is_root($pps)
184#
185   ($pps_type[shift] == 5);
186}
187
188sub laola_get_dirhandles { ##
189#
190# @pps = laola_get_dirhandles($pps);
191#
192   local($start)=shift;
193
194   local (@chain) = ();
195   local (%chaincontrol) = ();
196
197   (!$start || &laola_is_directory($start))
198      && &get_ppss_chain($pps_dir[$start])
199   ;
200
201   @chain;
202}
203
204sub laola_get_directory { ##
205#
206# %pps_names = laola_get_directory($pps);
207#
208   local(%pps_namehandle)=();
209   for (&laola_get_dirhandles) {
210      $pps_namehandle{&laola_pps_get_name($_)} = $_;
211   }
212   %pps_namehandle;
213}
214
215sub laola_pps_get_name { ##
216#
217# $name_of_pps = laola_pps_get_name($pps);
218#
219   $pps_name[shift];
220}
221
222sub laola_pps_get_date { ##
223#
224# ($day,$month,$year,$hour,$min,$sec)||0 = laola_pps_get_date($pps)
225# (1..31, 1..12, 1601..., 0..23, 0..59, 0.x .. 59.x)
226#
227   local($pps)=shift;
228   &laola_is_directory($pps)
229      && &filetime_to_time($pps_ts2s[$pps], $pps_ts2d[$pps]);
230}
231
232sub laola_get_filesize { ##
233#
234# $filesize || 0 = laola_get_filesize($pps);
235#
236   local($pps)=shift;
237   &laola_is_file($pps) && $pps_size[$pps];
238}
239
240sub laola_get_file { ##
241#
242# "ok"||$error = laola_get_file($pps, extern $buf [,$offset, $size]);
243#
244   &rw_file("r", @_);
245}
246
247sub laola_modify_file { ##
248#
249# "ok"||$error = laola_modify_file($pps,extern $buf, $offset, $size);
250#
251   return "Laola: File is write protected!" if !io_writable;
252   &rw_file("w", @_);
253}
254
255
256##
257## Property set handling
258##
259
260sub laola_is_file_ppset { ##
261#
262# ppset_type || 0 = laola_is_file_ppset($pps)
263# ppset_type e {1, 5}
264#
265   local($pps)=shift;
266   (&laola_is_file($pps))
267   && ( (&laola_pps_get_name($pps) =~ /^\05/) && 5
268        || (&laola_pps_get_name($pps) =~ /^\01CompObj$/) && 1
269   );
270}
271
272sub laola_ppset_get_dictionary { ##
273#
274# ("ok", %dictionary)||$error = laola_ppset_get_dictionary($pps)
275#
276   local($pps)=shift;
277   local($status) = &load_propertyset($pps);
278   if ($status ne "ok") {
279      return $status;
280   } else {
281      return ("ok", %ppset_dictionary);
282   }
283}
284
285sub laola_ppset_get_idset { ##
286#
287# ("ok", %ppset_idset) || $error = laola_ppset_get_idset($pps);
288#
289   local($pps)=shift;
290   local($status) = &load_propertyset($pps);
291   return $status if $status ne "ok";
292
293   local(%ts)=();
294   foreach $key (keys %ppset_fido) {
295      $ts{$key} = $ppset_dictionary{$key};
296   }
297   ("ok", %ts);
298}
299
300sub laola_ppset_get_property { ##
301#
302# ($type,@mixed)||("error",$error)=laola_ppset_get_property($pps, $id)
303#
304   local($pps, $id)=@_;
305   local($type, $l, $var, @var);
306   local($o, $n);
307
308   local($status)= &load_propertyset($pps);
309   return ("error", $status) if $status ne "ok";
310
311   return "" if !defined $ppset_fido{$id};
312   $n = int($id / 0x1000);
313   $o = $ppset_o[$n]+$ppset_fido{$id};
314
315   if ($ppset_type == 5) {
316      #return ("error", "Property Identifier is invalid.") if $id < 2;
317      ($type, $l, @var) = &ppset_get_property($o);
318      return ($type, @var);
319
320   } elsif ($ppset_type == 1) {
321      ($l, $var) = &ppset_get_var(0x1e, $o);
322      return (0x1e, $var);
323   }
324}
325
326
327##
328## Trash handling
329##
330
331sub laola_get_trashsize { ##
332#
333# $sizeof_trash_section = laola_get_trashsize($type)
334#
335   &get_trash_size(@_);
336}
337
338sub laola_get_trash { ##
339#
340# "ok"||$error = laola_get_trash ($type, extern $buf [,$offset,$size]);
341#
342   &rw_trash("r", @_);
343}
344
345sub laola_modify_trash { ##
346#
347# "ok"||$error = laola_modify_trash ($type, extern $buf [,$offset,$size]);
348#
349   return "Laola: File is write protected!" if !io_writable;
350   &rw_trash("w", @_);
351}
352
353
354##
355## "private"
356##
357
358global_init: {
359   &var_init();
360   &filetime_init();
361   &propertyset_type_init();
362   $[=0;
363}
364
365#
366# laola_open_document ->
367#
368
369sub init_vars {
370   # laola_open_document->init_vars
371   # laola_close_document->init_vars
372   internal: {
373      $infilename=undef;
374      $filesize=undef;
375      $openmode=undef;
376      $io_writable=undef;
377
378      $curfile=undef;
379      @curfile_iolist = ();
380
381      $iobuf=undef;
382      @iobuf_modify_a=();
383      @iobuf_modify_l=();
384   }
385
386   &init_propertyset();
387
388   OLEstructure: {
389      # unknown header things that matter:
390      # ? $version=undef;       # word(1a)
391      # ? $revision=undef;      # word(18)
392      # ? $bigunknown=undef;    # byte(1e)
393
394      # known header things that matter:
395      $header_size=0x200;
396      $big_block_size=undef;    # word(1e)
397      $small_block_size=undef;  # word(20)
398      $num_of_bbd_blocks=undef; # long(2c)
399      $root_startblock=undef;   # long(30)
400      $sbd_startblock=undef;    # long(3c)
401      $ext_startblock=undef;    # long(44)
402      $num_of_ext_blocks=undef; # long(48)
403
404      # property storage things
405      @pps_name=();		# 0 .. pps_sizeofname
406      #pps_sizeofname=();	# word(40)
407      @pps_type=();		# byte(42)
408      @pps_uk0=();		# byte(43)
409      @pps_prev=();		# long(44)
410      @pps_next=();		# long(48)
411      @pps_dir=();		# long(4c)
412      @pps_ts1s=();		# long(64)
413      @pps_ts1d=();		# long(68)
414      @pps_ts2s=();		# long(6c)
415      @pps_ts2d=();		# long(70)
416      @pps_sb=();		# long(74)
417      @pps_size=();		# long(78)
418   }
419
420   various: {
421      $maxblock=undef;
422      $maxsmallblock=undef;
423
424      # block depot blocks
425      # - these blocks are building the block depots
426      @bbd_list=();
427      @sbd_list=();
428
429      # block depot tables
430      @bbd=();
431      @sbd=();
432
433      # contents blocks
434      @root_list=();
435      @sb_list=();
436
437      blockusage: {
438         @bb_usage=();       # big blocks usage
439         @sb_usage=();       # small blocks usage
440         $usage_known=undef;
441      }
442
443      trash: {
444         %trashsize=();
445         @trash1_o=(); @trash1_l=();
446         @trash2_o=(); @trash2_l=();
447         @trash3_o=(); @trash3_l=();
448         @trash4_o=(); @trash4_l=();
449         $trash_known=undef;
450      }
451   }
452}
453
454
455sub init_io {
456   ($infilename, $openmode) = @_;
457
458   if ($openmode & 0x10) {
459      return &init_stream;
460   } else {
461      return &init_file;
462   }
463}
464
465sub init_stream {
466   return "No stream data available!" if !defined $_[2];
467   #$openmode &= 0xfffffffe; # clear writeable flag
468   $optional_do_iobuf=1;
469   $iobuf = $_[2];
470   $filesize = length($iobuf);
471   if ( (&read_long(0) != 0xe011cfd0) ||
472        (&read_long(4) != 0xe11ab1a1) ) {
473      return "\"$infilename\" is no Ole / Compound Document!\n";
474   }
475   "ok";
476}
477
478sub init_file {
479   local($status);
480   return "\"$infilename\" does not exist!"    if ! -e $infilename;
481   return "\"$infilename\" is a directory!"    if -d $infilename;
482   return "\"$infilename\" is no proper file!" if ! -f $infilename;
483   return "Cannot read \"$infilename\"!"       if ! -r $infilename;
484   if ($openmode & 1) {
485      return "\"$infilename\" is write protected!" if ! -w $infilename;
486      $io_writable = 1;
487      $status = open(IO, '+<'.$infilename);
488   } else {
489      $io_writable = 0;
490      $status = open(IO, $infilename);
491   }
492   return "Cannot open \"$infilename\"!" if !$status;
493
494   binmode(IO);
495   if ($io_writable) {
496      select(IO); $|=1; select(STDOUT);
497   }
498
499   if ( (&read_long(0) != 0xe011cfd0) ||
500        (&read_long(4) != 0xe11ab1a1) ) {
501      return "\"$infilename\" is no Ole / Compound Document!\n";
502   }
503
504   $filesize = -s $infilename;
505
506   read_iobuf: {
507      if ($optional_do_iobuf) {
508         if (!&myread(0, $filesize, $iobuf, 0)) {
509            undef $iobuf;
510         }
511      }
512   }
513
514   "ok";
515}
516
517sub init_doc {
518   # read bbd,
519   # get bbd -> root-chain,  get bbd -> sbd-chain
520   local($i, $tmp)=(undef, undef);
521   local(@tmp)=undef;
522
523   header_information: {
524      $big_block_size=1<<&read_word(0x1e);
525      $small_block_size=1<<&read_word(0x20);
526      $num_of_bbd_blocks=&read_long(0x2c);
527      $root_startblock=&read_long(0x30);
528      $sbd_startblock=&read_long(0x3c);
529      $ext_startblock=&read_long(0x44);
530      $num_of_ext_blocks=&read_long(0x48);
531      $maxsmallblock= int (
532        &read_long( $header_size + $root_startblock*$big_block_size + 0x78 )
533           / $small_block_size
534        -1
535      );
536   }
537
538   internal: {
539      $maxblock = int ( ($filesize-$header_size) / $big_block_size -1);
540      return "Document is corrupt - size is too small." if $maxblock < 1;
541   }
542
543   # read big block depot
544   read_bbd: {
545      $max_in_header = int ( ($header_size-0x4c)/4 );
546
547      $todo = $num_of_bbd_blocks;
548      $num = $todo;
549      $num = $max_in_header if $num_of_bbd_blocks > $max_in_header;
550
551      for ($i=0; $i<$num; $i++) {
552         push (@bbd_list, &read_long(0x4c+4*$i));
553      }
554      $todo -= $num;
555      $next = $ext_startblock;
556
557      while ($todo > 0) {
558         $num = $todo;
559         $num = ($big_block_size-4)/4 if $todo>(($big_block_size-4)/4);
560         $o = $header_size + $next*$big_block_size;
561         for ($i=0; $i<$num; $i++) {
562            push (@bbd_list, &read_long($o+4*$i));
563         }
564         $todo -= $num;
565         $next = &read_long($o+4*$num);
566      }
567
568      $tmp="";
569      &rw_iolist("r", $tmp,
570         &get_iolist(3, 0, 0xffffffff, 0, @bbd_list)
571      );
572      @bbd = unpack ($vtype{"l"}.($maxblock+1), $tmp);
573   }
574
575   # read small block depot
576   read_sbd: {
577      $tmp="";
578      @sbd_list=&get_list_from_depot($sbd_startblock, 1);
579      &rw_iolist("r", $tmp,
580         &get_iolist(3, 0, 0xffffffff, 0, @sbd_list)
581      );
582      @sbd = unpack ($vtype{"l"}.($maxsmallblock+1), $tmp);
583   }
584
585   root_and_sb_chains: {
586      @root_list=&get_list_from_depot($root_startblock, 1);
587      return "Document is corrupt - no root entry." if !@root_list;
588      @sb_list=&get_list_from_depot (
589         &read_long ( $header_size + $root_startblock*$big_block_size + 0x74 ),
590         1
591      );
592   }
593
594   read_PropertyStorages: {
595      &read_ppss(0);
596
597      #
598      # If there are many property storages, they will be loaded
599      # dynamically. If there are few (I randomly chosed 50), they
600      # all will be read (ditto for debugging).
601      #
602      last if $#root_list>50 || !$optional_do_debug;
603
604      local($buf)="";
605      local($i, $nl);
606      &rw_iolist("r", $buf,
607         &get_iolist(3, 0, 0xffffffff, 0, @root_list)
608      );
609      print "\n\n"
610         ."---------------------------------------------\n"
611         ."LAOLA INTERNAL start of debugging information\n\n"
612         ." n   size    chain     typ name                    date\n"
613      ;
614      for ($i=0; $i<=($#root_list+1)*4; $i++) {
615         &read_ppss_buf($i, $buf);
616         &debug_report_pps($i) if $optional_do_debug;
617      }
618      print "\n"
619         ."LAOLA INTERNAL end of debugging information\n"
620         ."-------------------------------------------\n\n"
621      ;
622   }
623
624   &report_blockuse_statistic() if $optional_do_debug;
625   "ok";
626}
627
628##
629## laola_close_document ->
630##
631
632sub clean_file {
633   close(IO);
634}
635
636
637##
638## -------------------------- File IO ------------------------------
639##
640
641sub rw_file {
642#
643# "ok"||error = rw_file("r"||"w", $pps_handle, extern $buf [,$offset, $size])
644#
645   local($maxarg)=$#_;
646   local($rw, $pps) = @_[0..1];
647   return "Laola: pps is no file!" if !&laola_is_file($pps);
648   return "Laola: no method \"$rw\"!" if !($rw =~ /^[rw]$/i);
649
650   local($status, $offset, $size) =
651      &get_default_iosize($pps_size[$pps], $rw, @_[2..$maxarg]);
652   return $status if $status ne "ok";
653
654   return "Bad document structure!" if ! &get_curfile_iolist($pps);
655   return "ok" if &rw_iolist($rw, $_[2], &get_iolist(4, $offset, $size));
656
657   $rw =~ /^r$/i ? "Laola: read error!" : "Laola: write error!";
658}
659
660sub get_default_iosize {
661#
662# ("ok", $offset, $size) || $error =
663#    get_default_iosize (defsize, "r"||"w", extern buf, offset, size)
664#
665   local($maxarg)=$#_;
666   local($defsize, $rw) = @_[0..1];
667   local($offset, $size) = @_[3..4];
668
669   if (!$size) {
670      if ($rw =~ /^r$/i) {
671         if ($maxarg < 4) {
672            # read default: read trashsize
673            $offset=0; $size=$defsize;
674         } else {
675            # read zero size: no problem
676            $_[2]="";
677         }
678      } else {
679         if ($maxarg < 4) {
680            # write default: not allowed!
681            return "Laola: write error! Unknown size.";
682         } else {
683            # write zero size: no problem
684         }
685      }
686   }
687   ("ok", $offset, $size);
688}
689
690sub get_curfile_iolist {
691#
692# 1||0 = get_curfile_iolist($pps)
693#
694# Gets the iolist for the current file $pps
695#
696   if ($curfile) {
697      return 1 if $curfile==$pps;
698   }
699   @curfile_iolist = &get_iolist(
700      $pps_size[$pps]>=0x1000, 0, $pps_size[$pps], $pps_sb[$pps]
701   );
702   $curfile = $pps;
703   1;
704}
705
706sub get_all_filehandles {
707#
708# &get_all_filehandles(starting directory)
709#
710# !recursive!
711# Recurse over all files and directories,
712# return all file handles as @files.
713#
714   local($directory_pps)=shift;
715   local(@dir)=&laola_get_dirhandles($directory_pps);
716   local(@files)=();
717   local(%filescontrol)=();
718
719   foreach $entry (@dir) {
720      if (!$filescontrol{$entry}) {
721         $filescontrol{$entry} = 1;
722         if (&laola_is_file($entry)) {
723            push (@files, $entry)
724         } elsif (&laola_is_directory($entry)) {
725            push (@files, &get_all_filehandles($entry));
726         }
727      } else {
728         print STDERR "This document is corrupt!\n";
729      }
730   }
731   @files;
732}
733
734##
735## --------------------- Property Set Handling -------------------------
736##
737
738sub propertyset_type_init {
739   %ppset_vtype = (
740      0x00, "empty",
741      0x01, "null",
742      0x02, "i2",
743      0x03, "i4",
744      0x04, "r4",
745      0x05, "r8",
746      0x06, "cy",
747      0x07, "date",
748      0x08, "bstr",
749      0x0a, "error",
750      0x0b, "bool",
751      0x0c, "variant",
752      0x11, "ui1",
753      0x12, "ui2",
754      0x13, "ui4",
755      0x14, "i8",
756      0x15, "ui8",
757      0x1e, "lpstr",
758      0x1f, "lpwstr",
759      0x40, "filetime",
760      0x41, "blob",
761      0x42, "stream",
762      0x43, "storage",
763      0x44, "streamed_object",
764      0x45, "stored_object",
765      0x46, "blobobject",
766      0x48, "clsid",
767      0x49, "cf",
768      0xfff, "typemask",
769   );
770   local(@type) = keys %ppset_vtype;
771   for (@type) {
772      $ppset_vtype{$_+0x1000} = $ppset_vtype{$_}.'[]';
773   }
774
775   # \05
776   %ppset_SummaryInformation = (
777     2, "title", 3, "subject", 4, "authress", 5, "keywords",
778     6, "comments", 7, "template", 8, "lastauthress",
779     9, "revnumber", 10, "edittime", 11, "lastprinted",
780    12, "create_dtm_ro", 13, "lastsave_dtm", 14, "pagecount",
781    15, "wordcount", 16, "charcount", 17, "thumbnail",
782    18, "appname", 19, "security"
783   );
784
785   %ppset_DocumentSummaryInformation = (
786    15, "organization"
787   );
788
789   # \01CompObj
790   %ppset_CompObj = (
791      0, "doc_long", 1, "doc_class", 2, "doc_spec"
792   );
793}
794
795
796sub load_dictionary {
797#
798# "ok"||"done"||0 = load_dictionary($pps)
799#
800   local($pps)=shift;
801   &load_dictionary_defaults($pps);
802
803   local($i, $n, $o, $ps);
804   local($did, $dname, $l);
805
806   foreach $id (keys %ppset_fido_dict) {
807      next if !$ppset_fido_dict{$id};
808
809      $ps = int($id/0x1000);
810      $o = $ppset_o[$ps]+$ppset_fido_dict{$id};
811      $n = &get_long($o, $ppset_buf); $o+=4;
812
813      for (; $n; $n--) {
814         $did = &get_long($o, $ppset_buf); $o+=4;
815         ($l, $dname) = &ppset_get_var(0x1e, $o); $o+=$l;
816         $ppset_dictionary{$did+$ps*0x1000} = $dname;
817      }
818   }
819   return "ok";
820}
821
822sub load_dictionary_defaults {
823   local($name)=&laola_pps_get_name($pps);
824   if ($name eq "\05SummaryInformation") {
825      %ppset_dictionary = %ppset_SummaryInformation;
826      return "ok";
827   } elsif ($name eq "\05DocumentSummaryInformation") {
828      %ppset_dictionary = %ppset_DocumentSummaryInformation;
829      return "ok";
830   } elsif ($name eq "\01CompObj") {
831      %ppset_dictionary = %ppset_CompObj;
832      return "ok";
833   }
834   return 0;
835}
836
837sub load_propertyset {
838   local($pps)=shift;
839   local($status)="";
840
841   check_current: {
842      if ($ppset_current && $pps && ($ppset_current == $pps)) {
843         $status="ok"; last;
844      }
845      if (!&laola_is_file_ppset($pps)) {
846         $status="This is not a property set handle."; last;
847      }
848      &init_propertyset();
849      if (!&laola_get_file($pps, $ppset_buf)) {
850         $status="Cannot load property set.";
851      }
852      $ppset_type = &laola_is_file_ppset($pps);
853   }
854   return $status if $status;
855
856   if ($ppset_type == 5) {
857      $status = &load_propertyset_05($pps);
858      return $status if $status ne "ok";
859   } elsif ($ppset_type == 1) {
860      $status = &load_propertyset_01CompObj($pps);
861      return $status if $status ne "ok";
862   } else {
863      return "Unknown property set!";
864   }
865
866   $status = &load_dictionary($pps);
867   return $status;
868}
869
870sub init_propertyset {
871   # !global! property set things
872
873   $ppset_current=undef;      # current property storage handle
874   $ppset_type=undef;         # \05, \01CompObj
875   $ppset_buf=undef;          # buffer for whole property
876   %ppset_fido=();            # $ppset_fido{Identifier}=Offset;
877                              #    Format Pairs of $ppset_current
878   %ppset_fido_dict=();       # Dictionaries
879   %ppset_fido_cp=();         # Code pages
880
881   $ppset_codepage=undef;
882   %ppset_dictionary=();
883
884   structure_05: { # 05 ppsets
885      # Header
886      $ppset_byteorder=undef; # word (0)  {0xfffe}
887      $ppset_format=undef;    # word (2)  {0}
888      $ppset_osver=undef;     # word (4)  {lbyte=version  hbyte=revision}
889      $ppset_os=undef;        # word (6)  {0=win16|1=mac|2=win32)
890      @ppset_clsid=();        # class identifier (8) {e.g. @0}
891      $ppset_reserved=undef;  # long (18) {>=1}
892
893      # FormatIDOffset
894      @ppset_fmtid=();        # format identifier (1c)
895      @ppset_o=();            # ppset_o[0]: long (2c)
896
897      # PropertySectionHeader
898      @ppset_size=();         # word ($ppset_o[])
899      @ppset_num=();          # long ($ppset_o[]+4)
900   }
901
902   #structure_01CompObj: {
903      #$ppset_uk1=undef;       # word (0)  {0x0001}
904      #$ppset_byteorder=undef; # word (2)  {0xfffe}
905      #$ppset_osver=undef;     # word (4)  {lbyte=version  hbyte=revision}
906      #$ppset_os=undef;        # word (6)  {0=win16|1=mac|2=win32)
907                               # { ff ff ff ff  00 09 02 00  00 00 00 00
908                               #   c0 00 00 00  00 00 00 46 }
909      #@ppset_o=();            # 0x1c
910   #}
911}
912
913sub load_propertyset_01CompObj {
914   local($pps)=shift;
915   set_current: {
916      $ppset_current = $pps;
917      get_structure: {
918         $ppset_byteorder = &get_word(0x02, $ppset_buf);
919         $ppset_osver =     &get_word(0x04, $ppset_buf);
920         $ppset_os =        &get_word(0x06, $ppset_buf);
921         @ppset_o =         (0x1c);
922      }
923      check_structure: {
924         if ($ppset_byteorder !=0xfffe) {
925            return "Cannot understand property set.";
926         }
927      }
928   }
929   get_offsets: {
930      local($i);
931      local($offset, $length)=(0, 0);
932      for ($i=0; $i<3; $i++) {
933         $length = &get_long($ppset_o[0] + $offset, $ppset_buf);
934         last if !$length;
935         $ppset_fido{$i} = $offset;
936         $offset = $offset + 4 + $length;
937      }
938   }
939   "ok";
940}
941
942sub load_propertyset_05 {
943   local($pps)=shift;
944   set_current: {
945      $ppset_current = $pps;
946      get_structure: {
947         ($ppset_byteorder, $ppset_format, $ppset_osver, $ppset_os) =
948            &get_nword(4, 0, $ppset_buf)
949         ;
950         @ppset_clsid =     &get_uuid(0x08, $ppset_buf);
951         $ppset_reserved =  &get_long(0x18, $ppset_buf);
952         @ppset_fmtid =     &get_uuid(0x1c, $ppset_buf);
953         $ppset_o[0] =      &get_word(0x2c, $ppset_buf);
954         $ppset_size[0] =   &get_word($ppset_o[0], $ppset_buf);
955         $ppset_num[0] =    &get_word($ppset_o[0]+4, $ppset_buf);
956      }
957      check_structure: {
958         $status="Cannot understand property set.";
959         last if $ppset_byteorder != 0xfffe;
960         last if $ppset_format != 0;
961         last if $ppset_reserved < 1;
962         last if $ppset_o[0] < 0x30;
963         $status="";
964      }
965   }
966   return $status if $status;
967
968   get_ids_and_offsets: {
969      local($i, $id, $n, $num, $fido);
970      local($o)=$ppset_o[0];
971      for ($n=0; $n<$ppset_reserved; $n++) {
972
973         # default dictionary and codepage
974         $ppset_fido_dict{$n*0x1000+0} = 0;
975         $ppset_fido_cp{$n*0x1000+1} = 0x4e4;
976
977         $num=&get_word($o+4, $ppset_buf);
978         for ($i=0; $i<$num; $i++) {
979            $id = &get_long($o+8+$i*8, $ppset_buf);
980            if ($n) {
981               $id = $i if $id>1; # ! hacky !
982            }
983            $fido = &get_long($o+8+$i*8+4, $ppset_buf);
984            if ($id>1) {
985               $ppset_fido{$n*0x1000+$id} = $fido;
986            } elsif ($id==1) {
987               $ppset_fido_cp{$n*0x1000+1} = $fido;
988            } elsif ($id==0) {
989               $ppset_fido_dict{$n*0x1000} = $fido;
990            }
991         }
992         $o+=&get_word($o, $ppset_buf);
993         $ppset_o[$n+1]=$o;
994      }
995   }
996   # todo: code page
997   "ok";
998}
999
1000sub ppset_get_property {
1001#
1002# ($type, $size, @mixed)||("error", $debuginfo) = ppset_get_property($offset)
1003#
1004   local($o_begin)=$_[0];
1005   local($o)=$o_begin;
1006   local($type) = &get_long($o, $ppset_buf);
1007
1008   if (! ($type & 0x1000)) {
1009      return ($type, &ppset_get_var($type, $o+4));
1010   } else {
1011      local(@mixed)=();
1012      local($n)=&get_long($o+4, $ppset_buf); $o+=8;
1013      local($t, $l, @var);
1014      for (; $n; $n--) {
1015         @var=();
1016         ($l, @var) = &ppset_get_var($type^0x1000, $o);
1017         push (@mixed, 1+($#var+1), $type^0x1000, @var);
1018         $o+=$l;
1019      }
1020      return ($type, $o-$o_begin, @mixed);
1021   }
1022}
1023
1024sub ppset_get_var {
1025#
1026# ($size, @var) = &ppset_get_var($type, $offset);
1027#
1028   local($type, $o)=@_;
1029   if (!$type || $type == 0x01) { # empty, null
1030      return (0, "");
1031   } elsif ($type == 0x02) {  # i2
1032      local($tmp) = &get_word($o, $ppset_buf);
1033      $tmp = - (($tmp^0xffff) +1) if ($tmp & 0x8000);
1034      return (2, $tmp);
1035   } elsif ($type == 0x03) {  # i4
1036      local($tmp) = &get_long($o, $ppset_buf);
1037      $tmp = - (($tmp^0xffffffff) +1) if ($tmp & 0x80000000);
1038      return (4, $tmp);
1039   } elsif ($type == 0x04) {  # real
1040      return (4, unpack("f", substr($ppset_buf, $o, 4)) );
1041   } elsif ($type == 0x05) {  # double
1042      return (8, unpack("d", substr($ppset_buf, $o, 8)) );
1043   } elsif ($type == 0x0a) {  # error
1044      return (4, &get_word($o, $ppset_buf));
1045   } elsif ($type == 0x0b) {  # bool (0==false, -1==true)
1046      return (4, &get_long($o, $ppset_buf));
1047   } elsif ($type == 0x0c) {  # variant
1048      local($t, $l, @var);
1049      $t = &get_long($o, $ppset_buf);
1050      ($l, @var) = &ppset_get_var($t, $o+4);
1051      return (4+$l, $t, @var);
1052   } elsif ($type == 0x11) {  # ui1
1053      return (1, &get_byte($o, $ppset_buf));
1054   } elsif ($type == 0x12) {  # ui2
1055      return (2, &get_word($o, $ppset_buf));
1056   } elsif ($type == 0x13) {  # ui4
1057      return (4, &get_long($o, $ppset_buf));
1058   } elsif ($type == 0x1e) {  # lpstr
1059      local($l)=&get_long($o, $ppset_buf);
1060      if ($l) {
1061         return (4+$l, substr($ppset_buf, $o+4, $l-1));
1062      } else {
1063         return (4, "");
1064      }
1065   } elsif ($type==0x40) {    # filetime
1066      return (8, &filetime_to_time(&get_nlong(2, $o, $ppset_buf)) );
1067   } else {
1068      return (
1069         "error",
1070         sprintf("(offset=%x, type=%x, buf[0]=%x)",
1071            $o, $type, &get_long($o+4, $ppset_buf)
1072         )
1073      );
1074   }
1075}
1076
1077##
1078## Basic laola data types
1079##
1080
1081sub var_init {
1082#
1083# At this work I still don't trust in signed integers, therefore I
1084# prefer the unsigned 0xffffffff to -1 (don't beat me)
1085#
1086   $vtype{"c"}="C"; $vsize{"c"}=1;    # unsigned char
1087   $vtype{"w"}="v"; $vsize{"w"}=2;    # 0xfe21     == 21 fe
1088   $vtype{"l"}="V"; $vsize{"l"}=4;    # 0xfe21abde == de ab 21 fe
1089}
1090
1091sub get_chars {
1092#
1093# get_chars ($offset, $number, extern $sourcebuf);
1094#
1095   substr($_[2], $_[0], $_[1]);
1096}
1097
1098sub read_chars {
1099#
1100# read_chars ($offset, $number);
1101#
1102   local($tmp)="";
1103   &myread($_[0], $_[1], $tmp) && $tmp;
1104}
1105
1106# get_thing ($offset, extern $buf);
1107sub get_byte { &get_var("c", @_); }
1108sub get_word { &get_var("w", @_); }
1109sub get_long { &get_var("l", @_); }
1110sub get_var {
1111   unpack ($vtype{$_[0]}, substr($_[2], $_[1], $vsize{$_[0]}));
1112}
1113
1114# get_nthing ($n, $offset, extern $buf);
1115sub get_nbyte { &get_nvar("c", @_); }
1116sub get_nword { &get_nvar("w", @_); }
1117sub get_nlong { &get_nvar("l", @_); }
1118sub get_nvar {
1119   unpack ($vtype{$_[0]}.$_[1], substr($_[3], $_[2], $vsize{$_[0]}*$_[1]));
1120}
1121
1122# read_thing ($offset);
1123sub read_byte { &read_var("c", @_); }
1124sub read_word { &read_var("w", @_); }
1125sub read_long { &read_var("l", @_); }
1126sub read_var {
1127   unpack ($vtype{$_[0]}, &read_chars($_[1], $vsize{$_[0]}));
1128}
1129
1130# read_nthing ($n, $offset);
1131sub read_nbyte { &read_nvar("c", @_); }
1132sub read_nword { &read_nvar("w", @_); }
1133sub read_nlong { &read_nvar("l", @_); }
1134sub read_nvar {
1135   unpack ($vtype{$_[0]}.$_[1], &read_chars($_[2], $vsize{$_[0]}*$_[1]));
1136}
1137
1138##
1139## --------------------------- IO handling ------------------------------
1140##
1141
1142sub myio {
1143#
1144# 1||0= myio("r"||"w", $file_offset, $num_of_chars, $extern_var [,$var_offset])
1145#
1146   $_ = shift;
1147   /^r$/i ? &myread : /^w$/i ? &mywrite : 0;
1148}
1149
1150sub myread {
1151#
1152# 1||0 = myread($file_offset, $num_of_chars, $extern_var [,$var_offset])
1153#
1154   local($varoffset)= $_[3] || 0;
1155   if ($optional_do_iobuf && $iobuf) {
1156      substr($_[2], $varoffset, $_[1])=substr($iobuf, $_[0], $_[1]);
1157      return 1;
1158   } else {
1159      seek(IO, $_[0], 0) && (read(IO,$_[2],$_[1],$varoffset) == $_[1]);
1160   }
1161}
1162
1163sub mywrite {
1164#
1165# 1||0 = mywrite($file_offset, $num_of_chars, $extern_var [,$var_offset])
1166#
1167   return 0 if !$io_writable;
1168
1169   local($varoffset)= $_[3] || 0;
1170   local($tmp) = substr($_[2], $varoffset, $_[1]);
1171         $tmp .= "\00" x ($_[1]-length($tmp));
1172   if ($optional_do_iobuf && $iobuf) {
1173      substr($iobuf, $_[0], $_[1]) = $tmp;
1174      push(@iobuf_modify_a, $_[0]);
1175      push(@iobuf_modify_l, $_[1]);
1176      return 1;
1177   } else {
1178      seek(IO, $_[0], 0) && print IO $tmp;
1179   }
1180}
1181
1182sub flush_cache {
1183#
1184# void = flush_cache()
1185#
1186# flush io cache, if caching is turned on
1187#
1188   return if !($optional_do_iobuf && $iobuf);
1189
1190   &rw_iolist("w", $iobuf,
1191      &aggregate_iolist(2, @iobuf_modify_a, @iobuf_modify_l)
1192   );
1193
1194   @iobuf_modify_a=(); @iobuf_modify_l=();
1195}
1196
1197##
1198## The "logical" core of laola
1199##
1200
1201sub get_ppss_chain {
1202#
1203# @blocks = get_ppss_chain($ppss)
1204#
1205# !recursive!
1206#
1207   local($ppss) = @_;
1208   return if $ppss == 0xffffffff;
1209
1210   if ($chaincontrol{$ppss}) {
1211      # Recursive entry!
1212      @chain = ();
1213      print STDERR "This document is corrupt!\n";
1214      return;
1215   } else {
1216      &read_ppss($ppss);
1217      $chaincontrol{$ppss}=1;
1218   }
1219
1220   &get_ppss_chain ( $pps_prev[$ppss] );
1221
1222   push(@chain, $ppss);
1223
1224   &get_ppss_chain ( $pps_next[$ppss] );
1225}
1226
1227sub read_ppss_buf {
1228#
1229# "ok" = read_ppss_buf ($i, extern $buf)
1230#
1231   local($i)=$_[0];
1232   local($nl);
1233   return "ok" if $pps_name[$i];
1234   return if ! ($nl = &get_word($i*0x80+0x40, $_[1]));
1235
1236   $pps_name[$i] = &pps_name_to_string($i*0x80, $nl, $_[1]);
1237
1238   ($pps_type[$i], $pps_uk0[$i],
1239    $pps_prev[$i], $pps_next[$i], $pps_dir[$i]) =
1240      unpack($vtype{"c"}."2".$vtype{"l"}."3",
1241             substr($_[1], $i*0x80+0x42, $vsize{"c"}*2+$vsize{"l"}*3))
1242   ;
1243
1244   ($pps_ts1s[$i], $pps_ts1d[$i], $pps_ts2s[$i], $pps_ts2d[$i],
1245    $pps_sb[$i], $pps_size[$i]) =
1246      &get_nlong(6, $i*0x80+0x64, $_[1])
1247   ;
1248
1249   "ok";
1250}
1251
1252sub read_ppss {
1253#
1254# "ok" = read_ppss ($i)
1255#
1256   local($i)=shift;
1257   return "ok" if $pps_name[$i];
1258
1259   local($buf)="";
1260   &rw_iolist("r", $buf, &get_iolist(3, $i*0x80, 0x80, 0, @root_list));
1261
1262   local($nl);
1263   return if ! ($nl = &get_word(0x40, $buf));
1264   $pps_name[$i] = &pps_name_to_string(0, $nl, $buf);
1265   ($pps_type[$i], $pps_uk0[$i], $pps_prev[$i], $pps_next[$i], $pps_dir[$i])=
1266      unpack($vtype{"c"}."2".$vtype{"l"}."3",
1267             substr($buf, 0x42, $vsize{"c"}*2+$vsize{"l"}*3)
1268      )
1269   ;
1270
1271   ($pps_ts1s[$i], $pps_ts1d[$i], $pps_ts2s[$i], $pps_ts2d[$i],
1272    $pps_sb[$i], $pps_size[$i]) = unpack(
1273      $vtype{"l"}."6", substr($buf, 0x64, $vsize{"l"}*6)
1274   );
1275
1276   "ok";
1277}
1278
1279
1280sub get_list_from_depot {
1281#
1282# @blocks = get_list_from_depot ($start, depottype)
1283#
1284# Read a block chain starting with block $start out of a either
1285# depot @bbd (for $t) or depot @sbd (for !$t).
1286#
1287   local($start, $t)=@_;
1288   local(@chain)=();
1289   return @chain if $start == 0xfffffffe;
1290
1291   push (@chain, $start);
1292   while ( ($start = $t?$bbd[$start]:$sbd[$start]) != 0xfffffffe ) {
1293      push(@chain, $start);
1294   }
1295   @chain;
1296}
1297
1298sub get_iolist {
1299#
1300# @iolist = get_iolist ($depottype, $offset, $size, $startblock [,@depot])
1301#
1302# This is the main IO logic. Returns the iolist for a data stream according
1303# to depot type $t. The stream may start at offset $offset and can have a
1304# size $size. If size is bigger than the total size of the stream according
1305# to its depot, it will be cut correctly. (So if you want to read until the
1306# files end without knowing how many bytes that are, take 0xffffffff as size).
1307#
1308# depottype $t:
1309#    0 small block (for @sbd)                    small block depot
1310#    1 big block   (for @bbd)                    big block depot
1311#    2 small block (for @_[4..$#])               some small blocks
1312#    3 big block   (for @_[4..$#])               some big blocks
1313#    4 variable    (for @curfile_iolist)         iolist of current file
1314#    5 variable    (for @_[4..$#] == (@o, @l))   some iolist
1315#
1316   local($t, $offset, $size, $sb) = (shift||0, shift||0, shift||0, shift||0);
1317   local($di);
1318   local($bs, $max);
1319
1320   local(@empty)=();
1321   return @empty if !$size;
1322
1323   local($begin, $done, $len);
1324   local(@o)=(); local(@l)=();
1325
1326   $bs = ($t==1 || $t==3) ? $big_block_size : $small_block_size;
1327
1328   if ($t<2) {
1329      # To skip these offsets, stream chains would have to be resolved
1330      # before.
1331   } elsif ($t<4) {
1332      $max = $#_;
1333      # Skip whole blocks, when offset given
1334      $sb += int ($offset / $bs);
1335      $offset -= int ($offset / $bs) * $bs;
1336   } elsif ($t==4) {
1337      $max = ($#curfile_iolist-1)/2;
1338   } elsif ($t==5) {
1339      $max = ($#_-1)/2;
1340   } else {
1341      return @empty;
1342   }
1343
1344   $done = 0;
1345   for ( $di=$sb;
1346         ($t<2) ? ($di!=0xfffffffe): ($di<=$max);
1347         $di=&next_dl
1348   ) {
1349      last if ($done == $size);
1350      if ($t==4) {
1351         $bs = $curfile_iolist[$max+1+$di];
1352      } elsif ($t==5) {
1353         $bs = $_[$max+1+$di];
1354      }
1355      if ($offset) {
1356         if ($bs <= $offset) {
1357            $offset -= $bs;
1358            next;
1359         } else {
1360            $begin = &depot_offset + $offset;
1361            $len   = $bs - $offset;
1362            $offset = 0;
1363         }
1364      } else {
1365         $begin = &depot_offset;
1366         $len   = $bs;
1367      }
1368      if ( ($done+$len) > $size ) {
1369         $len = $size - $done;
1370      }
1371      if ( !@o || ($o[$#o]+$l[$#l])!=$begin ) {
1372         push(@o, $begin);
1373         push(@l, $len);
1374      } else {
1375         $l[$#l]+=$len;
1376      }
1377      $done += $len;
1378   }
1379   (@o, @l);
1380}
1381sub next_dl { # get_iolist:next_dl
1382#
1383# index = depot ($di==index, $t==depothandle)
1384#
1385# Returns next chain link of depot @bbd ($t) or @sbd (!$t)
1386#
1387   return $sbd[$di] if !$t;
1388   return $bbd[$di] if $t==1;
1389   $di+1;
1390}
1391sub depot_offset { # get_iolist:depot_offset
1392#
1393# offset = depot_offset ($di==index, $t==depottype)
1394#
1395   return (($sb_list[$di/8]+1)*8 + ($di%8))*$small_block_size if $t==0;
1396   return $header_size + $di*$big_block_size if $t==1;
1397   return (($sb_list[$_[$di]/8]+1)*8 + ($_[$di]%8))*$small_block_size if $t==2;
1398   return $header_size + $_[$di]*$big_block_size if $t==3;
1399   return ($curfile_iolist[$di]) if $t==4;
1400   return ($_[$di]) if $t==5;
1401}
1402
1403
1404sub aggregate_iolist {
1405#
1406# (@offsets, @lengths)||() = aggregate_iolist(method,@offsets,@lengths)
1407#
1408# method:
1409#    1  @offsets shall be sorted, no overlap allowed
1410#    2  @offsets shall be sorted, overlap is allowed
1411#    3  @offsets are sorted, no overlap allowed
1412#    4  @offsets are sorted, overlap is allowed
1413#
1414   local($method)=shift;
1415   local(@empty)=();
1416   return @empty if ($method<1)||($method>4); # Don't know method!
1417
1418   local($max)=int(($#_+1)/2);
1419
1420   local($i, $j);
1421   local(@o_in)=(); local(@l_in)=();
1422   local(%o_in)=();
1423   local(@o_out)=(); local(@l_out)=();
1424   local($offset, $len);
1425
1426   #
1427   # Sort
1428   #
1429   if ( ($method==1) || ($method==2)) {
1430      # sort offsets
1431      for ($i=0; $i<$max; $i++) {
1432         next if !$_[$max+$i];
1433         if ($o_in{$_[$i]}) {
1434            return @empty if $method==1; # Data chunks overlap!
1435            $o_in{$_[$i]}=$i if $_[$max+$i]>$o_in{$_[$i]};
1436         } else {
1437            $o_in{$_[$i]}=$i;
1438         }
1439      }
1440      foreach $key (sort {$a <=> $b} keys %o_in) {
1441         push(@o_in, $_[$o_in{$key}]);
1442         push(@l_in, $_[$max + $o_in{$key}]);
1443      }
1444   } else {
1445      @o_in=@_[0..($max-1)];
1446      @l_in=@_[$max..$#_];
1447   }
1448
1449   #
1450   # Aggregate
1451   #
1452   $offset=$o_in[0];
1453   $len=$l_in[0];
1454
1455   for ($i=1; $i<=($#o_in+1); $i++) {
1456      if ( ($i==($#o_in+1))
1457           || ($o_in[$i]<$offset)
1458           || ($o_in[$i]>($offset+$len))
1459         ) {
1460         push(@o_out, $offset);
1461         push(@l_out, $len);
1462         $offset=$o_in[$i];
1463         $len=$l_in[$i];
1464      } elsif ($o_in[$i]<($offset+$len)) {
1465         return @empty if ($type==1 || $type==3); # Data chunks overlap!
1466         if ( ($o_in[$i]+$l_in[$i]) > ($offset+$len) ) {
1467            $len=$o_in[$i]+$l_in[$i]-$offset;
1468         }
1469      } else {
1470         $len += $l_in[$i];
1471      }
1472   }
1473   (@o_out, @l_out);
1474}
1475
1476sub rw_iolist {
1477   #
1478   # 1||0 = rw_iolist("r"||"w", extern buf, @offsets, @lengths);
1479   # . read or write global chunklist
1480   #
1481   local($done, $i, $l);
1482   local($max) = int(($#_-2+1)/2);
1483
1484   $done=0;
1485   for ($i=0; $i<$max; $i++) {
1486      next if ! ($l = $_[2+$i+$max]);
1487      if (&myio($_[0], $_[2+$i], $l, $_[1], $done)) {
1488         $done += $l;
1489      } else {
1490         # io error!
1491         return 0;
1492      }
1493   }
1494   1;
1495}
1496
1497##
1498## ---------------------- Property Set Handling --------------------------
1499##
1500
1501sub pps_name_to_string {
1502#
1503# $string = pps_name_to_string($offset, $pps_name_len, extern $buf)
1504#
1505   local($l)=$_[1]-2;
1506   local($i);
1507   local($tmp)="";
1508   for ($i=0; $i<$l; $i+=2) {
1509      $tmp.=substr($_[2], $_[0]+$i, 1);
1510   }
1511   $tmp;
1512}
1513
1514sub learn_guids {
1515    @guids = ("dsi", "si");
1516    $guid_dsi="\0x5DocumentSummaryInformation";
1517    @guid_dsi=( 0xd5cdd502, 0x2e9c, 0x101b,
1518                "\0x93\0x97\0x08\0x00\0x2b\0x2c\0xf9\0xae" );
1519    $guid_si="\0x5SummaryInformation";
1520    @guid_si=( 0xf29f85e0, 0x4ff9, 0x1068,
1521                "\0xab\0x91\0x08\0x00\0x2b\0x27\0xb3\0xd9" );
1522}
1523
1524sub get_uuid {
1525   local($o)=$_[0];
1526   ( &get_long($o, $_[1]),
1527     &get_word($o+4, $_[1]),
1528     &get_word($o+6, $_[1]),
1529     &get_chars($o+8, 8, $_[1])
1530   );
1531}
1532
1533#
1534# This section refers to pps_ts2 and pps_ts1, the one ore two timestamps
1535# used for each "Storage" Property Set. It seems, that the second timestamp
1536# gets actualized, when changing the storage. The first stamp is sometimes
1537# used, sometimes unused.
1538#
1539# The stamp is a 64 bit ulong. It counts every second 10 * 10 ^ 6,
1540# starting at 01/01/1601. When the 64 bit int gets evaluated as
1541# two 32 bit integers, the faster running ("least significant long")
1542# can hold just 0x100000000 / 10000000.0 (about 429.5) seconds. So the
1543# slower running ("most significant long") increments every 429.5 seconds.
1544#
1545
1546sub filetime_init {
1547   @monsum  = ( 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
1548                0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335 );
1549   $a_minute = 60 * 10000000.0 / (0x10000000 * 16);
1550}
1551
1552sub is_schaltjahr {
1553   local($year)=shift;
1554   !($year%4) && ($year%100 || !($year%400) ) && 1;
1555}
1556
1557sub filetime_years_to_days {
1558   local($year)=shift;
1559   int($year-1600) * 365
1560     + int( ($year-1600) / 4 )
1561     - int( ($year-1600) / 100 )
1562     + int( ($year-1600) / 400 )
1563     ;
1564}
1565
1566sub filetime_to_time {
1567   local($ds, $dd)=@_;
1568   local($day, $month, $year, $hour, $min, $sec);
1569   local($i, $m, $d, $dsum, $tmpsec);
1570
1571   $dsum = $dd + ($ds / (0x10000000 * 16.0));
1572
1573   $d= int( $dsum/($a_minute*60*24) )+1;
1574   $m= $dsum - ($d-1)*$a_minute*60*24;
1575
1576   $year  = int( $d/365.2425 ) + 1601;
1577   $d -= &filetime_years_to_days($year-1);
1578
1579   for( $i=11; $i && ($d <= $monsum[$i+&is_schaltjahr($year)*12]); $i--) {}
1580   $month = $i+1;
1581   $day   = $d - $monsum[$i+&is_schaltjahr($year)*12];
1582
1583   $hour  = int( $m / ($a_minute*60) );
1584   $min   = int( $m/$a_minute - $hour*60 );
1585   $sec   =    ( ($m/$a_minute - $hour*60 - $min) * 60);
1586
1587   ($day, $month, $year, $hour, $min, $sec);
1588}
1589
1590sub time_to_filetime {
1591   local($day, $month, $year, $hour, $min, $sec)=@_;
1592   local($d, $tss, $tsd);
1593
1594   $d = &filetime_years_to_days($year-1)
1595   + $monsum[$month-1 + &is_schaltjahr($year)*12]
1596   + $day-1;
1597
1598   $tsd = (24*60*$d + 60*$hour +$min +$sec/60.0) * $a_minute;
1599
1600   $tss = ($tsd-int($tsd)) * 0x10000000 * 16;
1601
1602   ( int($tss), int($tsd) );
1603}
1604
1605
1606##
1607## ------------------------- Trash Handling ------------------------------
1608##
1609
1610sub make_blockuse_statistic {
1611   #
1612   # block statistic:
1613   #    0 == irregular free (block depot entry != -1) (== undef)
1614   #    1 == regular free (block depot entry == -1)
1615   #    2 == used for ole system
1616   #    3 == used for ole application
1617   #
1618   return 1 if $usage_known;
1619   local($i, @list);
1620
1621   # default: all small and big blocks are undef
1622
1623   #
1624   # regular system data
1625   #
1626
1627   # ole system blocks
1628   for (@bbd_list, @sbd_list, @root_list, @sb_list) {
1629      $bb_usage[$_]=2;
1630   }
1631
1632   # free blocks according to block depots
1633   for (@bbd) {
1634      $bb_usage[$_]=1 if $bbd[$_]==0xffffffff;
1635   }
1636   for (@sbd) {
1637      $sb_usage[$_]=1 if $sbd[$_]==0xffffffff;
1638   }
1639
1640   #
1641   # OLE application blocks
1642   #
1643   foreach $file (&get_all_filehandles(0)) {
1644      if ($pps_size[$file]>=0x1000) {
1645         for (&get_list_from_depot($pps_sb[$file], 1)) { $bb_usage[$_]=3; }
1646      } else {
1647         for (&get_list_from_depot($pps_sb[$file], 0)) { $sb_usage[$_]=3; }
1648      }
1649   }
1650
1651   $usage_known=1;
1652}
1653
1654sub get_trash_info {
1655#
1656# void get_trash_info();
1657#
1658# Trash types:
1659#
1660#    0 == all
1661#    1 == unused big blocks
1662#    2 == unused small blocks
1663#    4 == unused file space, according to sizeof pps_size (incl. root_entry)
1664#    8 == unused system space (header, sb_table, bb_table)
1665#
1666   return 1 if $trash_known;
1667   &make_blockuse_statistic();
1668
1669   local(@o, @l);
1670   local(@list);
1671   local($size, $m);
1672   local($i);
1673   local($begin, $len);
1674
1675   unused_big_blocks: {
1676      $size=0; @list=();
1677      for ($i=0; $i<=$maxblock; $i++) {
1678         push(@list, $i) if $bb_usage[$i]<=1;
1679      }
1680      @trash1_o = &get_iolist(3, 0, 0xfffffff, 0, @list);
1681      @trash1_l = splice(@trash1_o, ($#trash1_o+1)/2);
1682      $m=$#trash1_o; for ($i=0; $i<=$m; $i++) { $size+=$trash1_l[$i]; }
1683      $trashsize{1}=$size;
1684   }
1685
1686   unused_small_blocks: {
1687      $size=0; @list=();
1688      for ($i=0; $i<=$maxsmallblock; $i++) {
1689         push(@list, $i) if $sb_usage[$i]<=1;
1690      }
1691      @trash2_o = &get_iolist(2, 0, 0xfffffff, 0, @list);
1692      @trash2_l = splice(@trash2_o, ($#trash2_o+1)/2);
1693      $m=$#trash2_o; for ($i=0; $i<=$m; $i++) { $size+=$trash2_l[$i]; }
1694      $trashsize{2}=$size;
1695   }
1696
1697   unused_file_space: {
1698      $size=0;
1699
1700      # 3.1. normal files
1701      foreach $file (&get_all_filehandles(0)) {
1702         @o = &get_iolist(
1703            $pps_size[$file]>=0x1000 && 1,
1704            $pps_size[$file], 0xffffffff, $pps_sb[$file]
1705         );
1706         push(@trash3_l, splice(@o, ($#o+1)/2));
1707         push(@trash3_o, @o);
1708      }
1709      $m=$#trash3_o; for ($i=0; $i<=$m; $i++) { $size+=$trash3_l[$i]; }
1710
1711      # 3.2. system file of root_entry (small block file)
1712      @list = ();
1713      while (($#list+$#sbd+2) % 8) {
1714         push(@list, $#list+$#sbd+2);
1715      }
1716      @o = &get_iolist(2, 0, 0xfffffff, 0, @list);
1717      @l = splice(@o, ($#o+1)/2);
1718      push(@trash3_o, @o); push(@trash3_l, @l);
1719      $m=$#o; for ($i=0; $i<=$m; $i++) { $size+=$l[$i]; }
1720
1721      $trashsize{3}=$size;
1722   }
1723
1724   unused_system_space: {
1725      $size=0;
1726
1727      # 4.1. header block
1728      $begin = 0x4c + $num_of_bbd_blocks*4;
1729      $len = $header_size - $begin;
1730      push(@trash4_o, $begin); push(@trash4_l, $len);
1731      $size+=$len;
1732
1733      # 4.2. big block depot
1734      @o = &get_iolist(3, ($maxblock+1)*4, 0xffffffff, 0, @bbd_list);
1735      @l = splice(@o, ($#o+1)/2);
1736      push(@trash4_o, @o); push(@trash4_l, @l);
1737      $m=$#o; for ($i=0; $i<=$m; $i++) { $size+=$l[$i]; }
1738
1739      # 4.3. small block depot
1740      @o = &get_iolist(3, ($maxsmallblock+1)*4, 0xffffffff, 0, @sbd_list);
1741      @l = splice(@o, ($#o+1)/2);
1742      push(@trash4_o, @o); push(@trash4_l, @l);
1743      $m=$#o; for ($i=0; $i<=$m; $i++) { $size+=$l[$i]; }
1744
1745      $trashsize{4}=$size;
1746   }
1747
1748   $trash_known=1;
1749}
1750
1751sub get_trash_size {
1752   local($type)=shift;
1753   $type = (1|2|4|8) if !$type;
1754   &get_trash_info();
1755
1756   local($trashsize)=0;
1757   $trashsize += $trashsize{1} if $type & 1;
1758   $trashsize += $trashsize{2} if $type & 2;
1759   $trashsize += $trashsize{3} if $type & 4;
1760   $trashsize += $trashsize{4} if $type & 8;
1761
1762   $trashsize;
1763}
1764
1765sub rw_trash {
1766#
1767# "ok"||error = rw_trash("r"||"w", $type, extern $buf [,$offset,$size])
1768#
1769   local($maxarg)=$#_;
1770   &get_trash_info();
1771
1772   local($rw, $type) = @_[0..1];
1773   $type = (1|2|4|8) if !$type;
1774
1775   local($status, $offset, $size) =
1776      &get_default_iosize(&laola_get_trashsize($type), $rw, @_[2..$maxarg]);
1777   return $status if $status ne "ok";
1778
1779   local(@o)=(); local(@l)=();
1780   if ($type & 1) { push (@o, @trash1_o); push (@l, @trash1_l); }
1781   if ($type & 2) { push (@o, @trash2_o); push (@l, @trash2_l); }
1782   if ($type & 4) { push (@o, @trash3_o); push (@l, @trash3_l); }
1783   if ($type & 8) { push (@o, @trash4_o); push (@l, @trash4_l); }
1784
1785   return "ok" if &rw_iolist(
1786      $rw, $_[2],
1787      &get_iolist(5, $offset, $size, 0, &aggregate_iolist(1, @o, @l))
1788   );
1789
1790   "Laola: IO Error!";
1791}
1792
1793
1794##
1795## ----------------------------- Debugging -------------------------------
1796##
1797
1798#
1799# Some debug information. Switch it on via $optional_do_debug=1
1800# Information will be shown directly after opening any document.
1801#
1802
1803sub debug_report_pps {
1804   local($i)=shift;
1805   local($out)="";
1806   local($tmp, $tmp2)="";
1807
1808   return if !$pps_name[$i];
1809
1810   $out = sprintf ("%2x", $i);
1811   $out .= $pps_uk0[$i]==1 ? ":  " : sprintf ("#%-2x", $pps_uk0[$i]);
1812
1813   if (&laola_is_directory($i)) {
1814      $out .= "-->    ";
1815   } elsif (&laola_is_file($i)) {
1816      $out .= sprintf ("%-5x  ",
1817                       &laola_get_filesize($i));
1818   } else {
1819      $out .= "       ";
1820   }
1821
1822   if ($pps_prev[$i]==0xffffffff) { $out .= "  .";
1823      } else { $out .= sprintf ("%3x", $pps_prev[$i]); }
1824   if ($pps_next[$i]==0xffffffff) { $out .= "  .";
1825      } else { $out .= sprintf ("%3x", $pps_next[$i]); }
1826   if ($pps_dir[$i]==0xffffffff) { $out .= "  .";
1827      } else { $out .= sprintf ("%3x", $pps_dir[$i]); }
1828
1829   if (&laola_is_file_ppset($i)) {
1830      $out .= "  set";
1831   } else {
1832      $out .= "  pp ";
1833   }
1834
1835   ($tmp=$pps_name[$i]) =~ s/[^_a-zA-Z0-9]/ /g;
1836   $out .= sprintf (" \"%s\"",$tmp);
1837
1838   $out .= " " x (50 - length($out));
1839
1840   if ($pps_ts2d[$i]) {
1841      $out .= sprintf (" %d.%d.%d %02d.%02d:%02d",
1842        &filetime_to_time($pps_ts2s[$i], $pps_ts2d[$i])
1843      );
1844   }
1845
1846   print "$out\n";
1847}
1848
1849sub report_blockuse_statistic {
1850   return 1;
1851   print "--- LAOLA internal, begin block statistic ---\n\n";
1852   &make_blockuse_statistic();
1853   local($i, $j, $m);
1854   local(@o, @l);
1855   print "Big blocks:\n";
1856   for ($i=0; $i<4; $i++) {
1857      @o=(); @l=();
1858      $m=$#bb_usage; for ($j=0; $j<=$m; $j++) {
1859         next if $bb_usage[$j]!=$i;
1860         push(@o, $j); push(@l, 1);
1861      }
1862      &report_blockuse_list($i, &aggregate_iolist(1, @o, @l));
1863   }
1864   print "Small blocks:\n";
1865   for ($i=0; $i<4; $i++) {
1866      @o=(); @l=();
1867      $m=$#sb_usage; for ($j=0; $j<=$m; $j++) {
1868         next if $sb_usage[$j]!=$i;
1869         push(@o, $j); push(@l, 1);
1870      }
1871      &report_blockuse_list($i, &aggregate_iolist(1, @o, @l));
1872   }
1873   print "\n--- LAOLA internal, end block statistic ---\n\n";
1874}
1875
1876sub report_blockuse_list {
1877   local($type)=shift;
1878   return if !@_;
1879   local(%info)=(0, "Trash", 1, "Free", 2, "System", 3, "Application");
1880   local($max)=($#_+1)/2;
1881   local($i); local($o, $l);
1882   print "Type $type {$info{$type}} = (";
1883   for ($i=0; $i<$max; $i++) {
1884      $o=$_[$i]; $l=$_[$max+$i];
1885      if ($l==1) {
1886         printf (" %x ", $o);
1887      } else {
1888         printf (" %x-%x ", $o, $o+$l-1);
1889      }
1890   }
1891   print ")\n";
1892}
1893
1894sub report_trash_statistic {
1895   return;
1896   &get_trash_info();
1897   print "Trash statistic.\n";
1898   print "Free big block chunks: (\n";
1899   &report_trash_list($trashsize{1}, @trash1_o, @trash1_l);
1900   print "\nFree small block chunks: (\n";
1901   &report_trash_list($trashsize{2}, @trash2_o, @trash2_l);
1902   print "\nUnused file space: (\n";
1903   &report_trash_list($trashsize{3}, @trash3_o, @trash3_l);
1904   print "\nUnused system space: (\n";
1905   &report_trash_list($trashsize{4}, @trash4_o, @trash4_l);
1906
1907   print "\nSummary: (\n";
1908   &report_trash_list(
1909      $trashsize{1}+$trashsize{2}+$trashsize{3}+$trashsize{4},
1910      &aggregate_iolist( 1,
1911         @trash1_o, @trash2_o, @trash3_o, @trash4_o,
1912         @trash1_l, @trash2_l, @trash3_l, @trash4_l
1913      )
1914   );
1915}
1916
1917sub report_trash_list {
1918   local($size)=shift;
1919   local(@o)=@_;
1920   local(@l)=splice(@o, ($#o+1)/2);
1921   local($i, $m);
1922   printf ("   %d elements, size=%x\n", $#o+1, $size);
1923   $m=$#o; for ($i=0; $i<=$m; $i++) {
1924      printf ("   offset %5x (len %x)\n", $o[$i], $l[$i]);
1925   }
1926   print ")\n";
1927}
1928
1929sub print_iolist {
1930   local(@o)=@_;
1931   local(@l)=splice(@o, ($#o+1)/2);
1932   local($i);
1933   $m=$#o; for ($i=0; $i<=$m; $i++) {
1934      printf("   o=%6x (%x)\n", $o[$i], $l[$i]);
1935   }
1936}
1937
1938"Atomkraft? Nein, danke!"
1939
1940