#!/usr/local/bin/perl -w use strict; # we at least try to ;) use Class::Struct; # This file is part of the wvWare 2 project # Copyright (C) 2001-2003 Werner Trobin # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Library General Public # License version 2 as published by the Free Software Foundation. # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Library General Public License for more details. # You should have received a copy of the GNU Library General Public License # along with this library; see the file COPYING.LIB. If not, write to # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, # Boston, MA 02111-1307, USA. # A small utility to generate the basic classes needed to # read an write primitive Word structures. # Usage: perl generate.pl input_file.html Word97 # The input_file.html is the document we want to process, # 'Word97' is used for various things: # - The namespace is called Word97 and all the generated # code lives in there # - word97_generated.cpp and word97_generated.h are the # filenames (note the case) # A few notes about the form of the HTML document: # 1) We expect all seven fields in the tables: # b10, b16, field, type, size, bitfield, comment # If any of them is absent just add empty ones () # 2) If you want to set an initial value for a (plain!) variable # you can add a HTML comment to the # "entry" (preferably after the "field" tag). # Note: It has to be on a separate line, else it won't get # picked up! # Note 2: We don't check the value, we just assign it, so make # sure that this is legal C++ (e.g. inital="true", initial="42+42")! # Note 3: Everything else will be set to 0 # 3) In some cases the tag has to be right after the # last tag, so better do that everywhere :) # 4) An array with a dynamic size can easily be created by editing # the "type field." If you add, say "U8[foo]" then this means: # - we create a dynamic array of size "foo", where "foo" is # some variable of that structure we have already read. # Note: We don't do any error checking here, so be careful # not to use uninitialized values we didn't read at # the time we create the array! # Note2: You can even put plain expressions there, or a # call to a function you include in the template! # Just make sure that it's legal C++ and that it # doesn't contain any '[' or ']' as it will probably # confuse the parser. # - if foo=="", i.e. if you just have "U32[]" then we will # just create a plain pointer for you and initialize it with 0. # Note: Plain destruction will work as we just delete [] it. # Attention: Copy CTOR and assignment operator won't work!!!! # (as we can't know the length) What we do is what # C++ does by default - copy the pointer :} # To allow proper comparsions (operator==) we have to know the length # of the dynamich structure. Therefore you should add a HTML comment # to such items, specifying a way to perform that check. # (e.g. ) # Everything between the quotes will be copied verbatim to an if statement # (e.g. if((lhs.cb)!=(rhs.cb)) ). # If you decide to call a function please ensure it returns something # useful we can compare :) # 5) For all structures which need a way to "apply" a grpprl (e.g. PAP, CHP) # we provide a special method you can reimplement if you want to. In the # header we simply add a declaration. # 6) If you need the possibility to share a structure (e.g. SEP, PAP, CHP,...) # you can add it to the list in sub selectShared() # 7) In case you want to use the structure in any of the PL(C)F templates # you have to add the "sizeof" comment to the .htm file between the name of # the structure and the (like for the DTTM struct) # If you want to ignore certain structures, please add them to # the 'cleanStructures' sub. # If you need the possibility to read a structure from a plain # pointer, too, please add it to the if statement in parseStructures # This structure holds one "variable" struct Item => { name => '$', # The name of this variable type => '$', # The type (e.g. U16, S32[42],...) bits => '$', # The amount of bits (e.g. 3), if any comment => '$', # The comment for this variable initial => '$', # The initial value of this field, if any len => '$', # If the item is a dynamic array we store its length # here. length can be a plain C++ expression. compareSizeLHS => '$', # If the item is a dynamic array we need to compare the # left-hand-side (lhs) and the rhs in their size. This # is a plain C++ expression returning the size of the LHS. compareSizeRHS => '$', # If the item is a dynamic array we need to compare the # left-hand-side (lhs) and the rhs in their size. This # is a plain C++ expression returning the size of the RHS. startNew => '$', # This field is used for debugging purposes. It # is set to 1 if this variable should start a new # bitfield (and close the last one). We simply # check whether we filled the last field completely here }; struct Structure => { name => '$', # The name of the structure comment => '$', # The comment for this struct items => '@', # All the data members hidden => '$', # Set to "//" if we want to comment that structure out dynamic => '$', # Do we have dynamic memory? Then we need a Copy CTOR, # DTOR, assignment op, op==,... readPtr => '$', # Do we want to be able to construct/read from a pointer? shared => '$', # Whether this structure should be derived from wvWare::Shared' sizeOf => '$', # The size of the structure (not padded, as in the file!) dumpCode => '$', # Whether dumping code should be generated }; # This array of strings contains the whole HTML # documentation file. # All the parsing subs will read/modify that global array # Note: All the tags we use are already converted to # uppercase. my @document; # The current index in the document-array (used during parsing) my $i; # This string holds the name of the namespace to create my $namespace; # This array holds all the structures we want to write out # It's filled during parsing and used heavily afterwards my @structs; # The current struct we're working on (only used during parsing) my $struct; # The current item we're working on (only used during parsing) my $item; # Parses all the structures sub parseStructures { my ($tmp); print "Parsing...\n"; $i=0; while($i<=$#document) { if($document[$i] =~ m,\,) { if($document[$i-1] =~ m/\/) { # Safe, as can't be in the first line # looks okay $struct=Structure->new(); # create a new structure element $document[$i] =~ m,^(.*)\,; $struct->comment($1); } elsif($document[$i] =~ m/\/) { # looks okay, too $struct=Structure->new(); # create a new structure element $document[$i] =~ m,$.*)\,; $struct->comment($1); } else { if($document[$i-1] !~ m/Algorithm/) { # huh? Shouldn't happen at all print "####### ERROR #######\n"; print $document[$i-1], "\n", $document[$i], "\n"; } $i++; # don't forget that one here :)) next; } $struct->comment =~ m,.*\((.*)$,; # get the name of the structure $tmp=$1; # store it in a $tmp var as I'm too clueless :) $tmp =~ s/\s/_/; # replace the spaces with underscores $struct->name($tmp); # ...and set it as name #print "found: name: '", $struct->name, "' comment: '", $struct->comment, "'\n"; $struct->hidden(""); # initialize that with a sane value # We want that readPtr function :) if($struct->name eq "BRC" || $struct->name eq "SHD" || $struct->name eq "DCS" || $struct->name eq "DTTM" || $struct->name eq "PHE" || $struct->name eq "TLP" || $struct->name eq "ANLD" || $struct->name eq "ANLV" || $struct->name eq "OLST" || $struct->name eq "TC" || $struct->name eq "PCD" || $struct->name eq "PRM" || $struct->name eq "NUMRM") { $struct->readPtr(1); } #print "Checking for a

"; while($document[$i] !~ m,\

,) { #print "found a sizeOf tag for structure " . $struct->name . ": " . $1 . "\n"; $struct->sizeOf($1); } $i++; #print "."; } #print " found\n"; # parse the

we found if(parseStructure()) { push(@structs, $struct); # append the new structure } else { print "####### ERROR #######\n"; print " name: '", $struct->name, "' comment: '", $struct->comment, "'\n"; } } $i++; } # print "Number of structures: ", $#structs+1, "\n"; print "Done.\n"; } # Parses one structure (

...

) sub parseStructure { # eat the first row (headline) while($document[$i] !~ m,^\$,) { $i++; } while($document[$i] !~ m,^\$,) { $i++; } # parse all the variables till we encounter while($document[$i] !~ m,^\$,) { if(parseItem()) { push(@{$struct->items}, $item); $i++; } else { print "####### ERROR #######\n"; print " Error while parsing an item!\n"; return 0; # uh-oh :} } } #print "count: ", $#{$struct->items}+1, "\n"; return 1; # success } # Parses one row of the table ( ... ) to get one # data item out of it. Does some trivial error checking sub parseItem { my ($myState, $tmp); $myState=0; while($document[$i] !~ m,^\$,) { $i++; } $item=Item->new(); while($document[$i] !~ m,^\$,) { if($document[$i] =~ m,^\(.*)\$,) { if($myState==0) { # this is used for debugging/sanity checking $item->startNew($1); #print " startNew: ", $1, "\n"; } # yes, I left out $myState==1 on purpose elsif($myState==2) { $item->name($1); #print " name: ", $1, "\n"; } elsif($myState==3) { $item->type($1); #print " type: ", $1, "\n"; } elsif($myState==4) { $tmp=$1; if($tmp =~ m/^:(.*)/) { $item->bits($1); #print " bits: ", $1, "\n"; } else { #print " no bits but a plain size attribute!\n"; } } # yes, I left out $myState==5 on purpose elsif($myState==6) { $item->comment($1); #print " (short) comment: ", $1, "\n"; } $myState++; } # The comment can expand across several lines elsif($document[$i] =~ m,^\(.*)$, && $myState==6) { $tmp=$1; # Insert a
for "newlines" (consistency) if($document[$i+1] !~ m,\,) { $tmp .= "
"; } $i++; while($document[$i] !~ m,(.*)\$,) { $tmp .= $document[$i]; # Insert a
for "newlines" (consistency) if($document[$i+1] !~ m,\,) { $tmp .= "
"; } $i++; } $document[$i] =~ m,(.*)\$,; $tmp .= $1; $item->comment($tmp); #print " (long) comment: ", $tmp, "\n"; $myState++; } elsif($document[$i] =~ m,\<\!--\s*initial=\"(.*?)\"\s*--\>,) { #print "initial found: ", $document[$i], " filtered: ", $1, "\n"; $item->initial($1); } elsif($document[$i] =~ m,\<\!--\s+compareSizeLHS=\"(.*?)\"\s+compareSizeRHS=\"(.*?)\"\s+--\>,) { #print "compareSize found: ", $document[$i], " filtered: ", $1, ", ", $2, "\n"; $item->compareSizeLHS($1); $item->compareSizeRHS($2); } elsif($document[$i] =~ m,^\$,) { print "Error: Found a table end where I didn't expect it!\n"; return 0; } $i++; } #print "$myState==7 ? ", $myState==7, "\n"; return $myState==7; } # Removes some structures we can't generate easily. # Note: We write out the struct in the header and just # comment it out (that you can copy it for a proper impl.). sub cleanStructures { my($index, @clean, $done); print "Cleaning up...\n"; # Feel free to add your "favorites" here # The goal, however, should be to have as much as possible # generated, so try to fix the HTML ;) @clean=("PAPXFKP", "CHPXFKP", "PAPX", "CHPX", "FLD", "PLCF", "STD", "FFN", "TBD"); foreach (@clean) { $index=0; $done=0; while($index<=$#structs && $done==0) { if($structs[$index]->name eq $_) { print "Removing: ", $structs[$index]->name, "\n"; # Better not really remove, just comment it out by setting "hidden" # That way you can copy the declaration for a real implementation #splice @structs,$index,1; $structs[$index]->hidden("//"); $done=1; } $index++; } } print "Done.\n"; } # Moves around some structures to resolve forward references # in the generated sources sub hoistStructures { my($index, @hoist, $done); print "Resolving forward references...\n"; # Feel free to add your "favorites" here # Note: LIFO, at least kind of (the last element here is first afterwards) @hoist=("TBD", "TAP", "DPPOLYLINE", "DPTXBX", "DPHEAD", "TC", "TLP", "BRC", "PHE", "SHD", "PRM", "PRM2", "DOPTYPOGRAPHY", "DTTM"); foreach (@hoist) { $index=0; $done=0; while($index<=$#structs && $done==0) { if($structs[$index]->name eq $_) { print "Moving: ", $structs[$index]->name, "\n"; #print "before: ", $#structs, "\n"; unshift @structs, $structs[$index]; $index++; #print "afterwards: ", $#structs, "\n"; #print "delete: ", $structs[$index]->name, "\n"; splice @structs,$index,1; #print "test: ", $structs[0]->name, "\n"; $done=1; } $index++; } } print "Done.\n"; } # Selects the structures we want to derive from wvWare::Shared. sub selectShared { my($index, @shared, $done); print "Selecting shared structures...\n"; @shared=("SEP", "TAP", "PAP", "CHP", "PICF"); foreach (@shared) { $index=0; $done=0; while($index<=$#structs && $done==0) { if($structs[$index]->name eq $_) { print "Sharing: ", $structs[$index]->name, "\n"; $structs[$index]->shared(1); $done=1; } $index++; } } print "Done.\n"; } # Selects the structures which should contain a dump() method sub selectDumped { my($index, @dumped, $done); print "Selecting structures with a dump() method...\n"; @dumped=("SEP", "TAP", "PAP", "CHP", "OLST", "BRC", "TLP", "SHD", "DTTM", "PHE", "TC", "ANLV", "LSPD", "DCS", "NUMRM", "ANLD", "PICF", "METAFILEPICT"); foreach (@dumped) { $index=0; $done=0; while($index<=$#structs && $done==0) { if($structs[$index]->name eq $_) { print "Adding dump() to: ", $structs[$index]->name, "\n"; $structs[$index]->dumpCode(1); $done=1; } $index++; } } print "Done.\n"; } # The "main" generator function for headers. sub generateHeader { my($tmp, $license, $includes, $before, $after, $myState); print "Generating the header file...\n"; $tmp=lc($namespace); $tmp .= "_generated.h"; open(HEADER, ">$tmp") or die "Couldn't open the header for writing: " . $!; ($license, $includes, $before, $after) = parseTemplate("template-$namespace.h"); $tmp =~ s/.h/_h/; $tmp=uc($tmp); # license section... print HEADER $license; print HEADER "\n#ifndef $tmp\n#define $tmp\n\n"; # include section... print HEADER "#include \"global.h\"\n"; print HEADER "#include \"sharedptr.h\"\n"; print HEADER "#include \"utilities.h\"\n"; print HEADER $includes; print HEADER "\nnamespace wvWare {\n\n"; print HEADER "class OLEStreamReader;\n"; print HEADER "class OLEStreamWriter;\n"; print HEADER "class StyleSheet;\n"; print HEADER "class Style;\n\n"; print HEADER "namespace $namespace {\n\n"; # pre print HEADER $before . "\n"; # Fill the empty template print HEADER generateHeaderStructs(); # post print HEADER $after; print HEADER "\n} // namespace $namespace\n\n"; print HEADER "} // namespace wvWare\n\n"; print HEADER "#endif // $tmp\n"; close(HEADER) or die $!; print "Done.\n"; } # This subroutine generates the header file's structures sub generateHeaderStructs { my($index, $string, $n, $h, $tmp); for($index=0; $index<=$#structs; $index++) { $n=$structs[$index]->name; $h=$structs[$index]->hidden; $string .= "/**\n * " . $structs[$index]->comment . "\n */\n"; if($h ne "") { $string .= "/* This structure has been commented out because we can't handle it correctly\n"; $string .= " * Please don't try to fix it here in this file, but rather copy this broken\n"; $string .= " * structure definition and fix it in some auxilliary file. If you want to\n"; $string .= " * include that aux. file here, please change the template file.\n */\n"; } $string .= $h . "struct $n "; if(defined($structs[$index]->shared)) { $string .= ": public Shared "; } $string .= "{\n"; $string .= $h . " /**\n"; $string .= $h . " * Creates an empty $n structure and sets the defaults\n"; $string .= $h . " */\n"; $string .= $h . " $n();\n"; $string .= $h . " /**\n"; $string .= $h . " * Simply calls read(...)\n"; $string .= $h . " */\n"; $string .= $h . " $n(OLEStreamReader *stream, bool preservePos=false);\n"; if(defined($structs[$index]->readPtr)) { $string .= $h . " /**\n"; $string .= $h . " * Simply calls readPtr(...)\n"; $string .= $h . " */\n"; $string .= $h . " $n(const U8 *ptr);\n"; } # From here on we first put the text into a temporary variable, as # we might have to insert some code at this place. The reason is # that we need DTOR, Copy CTOR,... if we have pointers in our struct. # Unfortunately we find that out in generateHeaderData and don't know # it here. $tmp = "\n" . $h . " /**\n"; $tmp .= $h . " * This method reads the $n structure from the stream.\n"; $tmp .= $h . " * If preservePos is true we push/pop the position of\n"; $tmp .= $h . " * the stream to save the state. If it's false the state\n"; $tmp .= $h . " * of stream will be changed!\n"; $tmp .= $h . " */\n"; $tmp .= $h . " bool read(OLEStreamReader *stream, bool preservePos=false);\n\n"; # Special readPtr() method for all the "ultra primitive" structs # we sometimes have to read from memory (SPRM parameter,...) if(defined($structs[$index]->readPtr)) { $tmp .= $h . " /**\n"; $tmp .= $h . " * This method reads the struct from a pointer\n"; $tmp .= $h . " */\n"; $tmp .= $h . " void readPtr(const U8 *ptr);\n\n"; } $tmp .= $h . " /**\n"; $tmp .= $h . " * Same as reading :)\n"; $tmp .= $h . " */\n"; $tmp .= $h . " bool write(OLEStreamWriter *stream, bool preservePos=false) const;\n\n"; $tmp .= $h . " /**\n"; $tmp .= $h . " * Set all the fields to the inital value (default is 0)\n"; $tmp .= $h . " */\n"; $tmp .= $h . " void clear();\n\n"; # Special apply() method for all the PAP, CHP,... structs # Implement that in an auxilliary file if(lc($namespace) eq "word97" && ($n eq "PAP" || $n eq "CHP" || $n eq "TAP" || $n eq "SEP" || $n eq "PICF")) { $tmp .= $h . " /**\n"; $tmp .= $h . " * This method applies a grpprl with \@param count elements\n"; $tmp .= $h . " */\n"; $tmp .= $h . " void apply(const U8 *grpprl, U16 count, const Style* style, const StyleSheet* styleSheet, OLEStreamReader* dataStream, WordVersion version);\n\n"; $tmp .= $h . " /**\n"; $tmp .= $h . " * This method applies a whole " . $n . "X to the structure.\n"; $tmp .= $h . " * The reason that we only pass a pointer to the start of the exception\n"; $tmp .= $h . " * structure is, that we don't know the type in the FKP template :}\n"; $tmp .= $h . " */\n"; if($n eq "CHP") { # More than just CHP? $tmp .= $h . " void applyExceptions(const U8* exceptions, const Style* paragraphStyle, const StyleSheet* styleSheet, OLEStreamReader* dataStream, WordVersion version);\n\n"; } else { $tmp .= $h . " void applyExceptions(const U8 *exceptions, const StyleSheet *styleSheet, OLEStreamReader* dataStream, WordVersion version);\n\n"; } $tmp .= $h . " /**\n"; $tmp .= $h . " * This method applies one single SPRM. It returns -1 if it wasn't\n"; $tmp .= $h . " * a " . $n . " SPRM and it returns the length of the applied SPRM\n"; $tmp .= $h . " * if it was successful.\n"; $tmp .= $h . " */\n"; $tmp .= $h . " S16 apply" . $n . "SPRM(const U8* ptr, const Style* style, const StyleSheet* styleSheet, OLEStreamReader* dataStream, WordVersion version);\n\n"; } # Special toPRM2 method for the PRM struct, implemented in word97_helper.cpp # This method is neccessary as we don't want to rely on a "packed" layout of # the structure so we can't just do evil casting ;) if($n eq "PRM") { $tmp .= $h . " /**\n"; $tmp .= $h . " * This method returns a PRM2 created from the current PRM\n"; $tmp .= $h . " */\n"; $tmp .= $h . " PRM2 toPRM2() const;\n\n"; } if(defined($structs[$index]->dumpCode)) { $tmp .= $h . " /**\n"; $tmp .= $h . " * Dumps all fields of this structure (for debugging)\n"; $tmp .= $h . " */\n"; $tmp .= $h . " void dump() const;\n\n"; $tmp .= $h . " /**\n"; $tmp .= $h . " * Converts the data structure to a string (for debugging)\n"; $tmp .= $h . " */\n"; $tmp .= $h . " std::string toString() const;\n\n"; } if(defined($structs[$index]->sizeOf)) { $tmp .= $h . " // Size of the structure\n"; $tmp .= $h . " static const unsigned int sizeOf;\n\n"; } $tmp .= $h . " // Data\n"; $tmp .= generateHeaderData($index); if(defined($structs[$index]->dynamic)) { # okay, now we already know what we need, so let's # add that stuff (to $string, of course ;) $string .= $h . " /**\n"; $string .= $h . " * Attention: This struct allocates memory on the heap\n"; $string .= $h . " */\n"; $string .= $h . " $n(const $n &rhs);\n"; $string .= $h . " ~" . $n . "();\n\n"; $string .= $h . " " . $n . " &operator=(const $n &rhs);\n"; } # insert the stuff from above $string .= $tmp; # If we have dynamic structures we have to be careful # with clear()! We simply define that clear() also # delete []s all the arrays and clearInternal() just sets # everything to 0 if(defined($structs[$index]->dynamic)) { $string .= $h . "private:\n"; $string .= $h . " void clearInternal();\n\n"; } $string .= $h . "}; // $n\n"; # ...and add some more code "outside" $string .= "\n" . $h . "bool operator==(const $n &lhs, const $n &rhs);\n"; $string .= $h . "bool operator!=(const $n &lhs, const $n &rhs);\n\n\n"; } return $string; } # Takes one structure and generates all the fields for it. # Checks the bit-fields for missing bits and tries to detect # arrays with non-static size. We use that information all # over the place :) sub generateHeaderData { my ($index)=@_; my ($string, $tmp, $tmp2, $sum, $bits, $h); $sum=0; # no bits counted up to now :) $bits=0; # make the first check work # write out all the data foreach (@{$structs[$index]->items}) { $h=$structs[$index]->hidden; $string .= prepareComment($_->comment, $h); # Check the completeness of the bitfields... if($_->startNew ne "") { if($bits != $sum) { print " ERROR: Last bitfield incomplete. Current position: "; print $structs[$index]->name . " - " . $_->name . "\n"; } # set up a new check (sloppy, only for U8, U16, and U32 bitfields) if($_->type =~ m/U(\d+)/ && defined($_->bits)) { #print "bitfield..." . $_->name . "\n"; $bits=$1; } else { $bits=0; } $sum=0; } # Handle XCHAR[32] by splitting it up properly if($_->type =~ m/(.*)(\[.*\])/) { $tmp = " " . $1 . " " . $_->name . $2; #print "Array: '" . $tmp . "'\n"; # Is it a fixed size array or not? if($tmp !~ m/.*\[\d+\]/) { $tmp =~ m/ (.+)\[(.*)\]/; $tmp2=$1; # get the "length" (or the C++ expression ;) $_->len($2); $tmp2 =~ s/ / \*/; $tmp = " " . $tmp2 . "; //" . $tmp; #print " --- Result: " . $tmp . "\n"; # okay, we found a dynamic array, so we need some additional # code for that struct (Copy CTOR, DTOR,...) $structs[$index]->dynamic(1); #if(defined($_->len)) { # print "Dynamic: " . $structs[$index]->name . ", length: " . $_->len . "\n"; #} } $string .= $h . $tmp; } else { $string .= $h . " " . $_->type . " " . $_->name; } if(defined($_->bits)) { $string .= ":" . $_->bits; $sum += $_->bits; } $string .= ";\n\n"; } return $string; } # This meathod gets a looong comment string. It takes the # string and splits it at the
s and creates a nice # comment out of it (not longer than, say 90 cols, as found in # the HTML spec) sub prepareComment { my($comment, $h)=@_; my($string, @tmp); if($comment eq "") { return ""; } $string = $h . " /**\n"; # "unfold" the
'ed comments @tmp=split(/\/, $comment); foreach (@tmp) { $string .= $h . " * $_\n"; } $string .= $h . " */\n"; return $string; } # Parse the template file sub parseTemplate { my($name) = @_; # name of the template my($license, $includes, $before, $after, $myState); open(TEMPLATE, "<$name") or die "Couldn't open the template: " . $!; # initialize all the template vars $myState=0; $license=""; $includes=""; $before=""; $after=""; # read in the information... while(