1#!/usr/local/bin/perl -w
2use strict;  # we at least try to ;)
3use Class::Struct;
4
5# This file is part of the wvWare 2 project
6# Copyright (C) 2001-2003 Werner Trobin <trobin@kde.org>
7
8# This library is free software; you can redistribute it and/or
9# modify it under the terms of the GNU Library General Public
10# License version 2 as published by the Free Software Foundation.
11
12# This library is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15# Library General Public License for more details.
16
17# You should have received a copy of the GNU Library General Public License
18# along with this library; see the file COPYING.LIB.  If not, write to
19# the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20# Boston, MA 02111-1307, USA.
21
22# A script to generate code which converts Word95 structures to Word97 ones
23# as good as possible.
24# If you add a convert comment to the Word 6 HTML you can "losen" the
25# restrictions a bit:
26#   - convert="string(fieldname)" converts between U8[] and XCHAR[]
27#             In case you want to limit the string size just write
28#             "string(fieldname:XY)" where XY is the length to copy.
29#   - convert="type" losens the type restrictions and simply tries to assign
30#             even if the types are not exactly the same (U32 <- U16,...)
31#   - convert="(fieldname)" relates the fieldnames and losens the type
32#             restrictions
33#   - convert="unused"  skips this field
34
35###############################################################################
36# To discuss with Shaheed:
37# - CHP::chse - I think we should map that to the Word97 CHP::fMacChs (70)
38# - I disabled (unused) DOP::fReadOnlyRecommended and DOP::fWriteReservation
39#   as this should normally go into the Word97 FIB, but well, I doubt we need
40#   that flags.
41# - I mapped the things like cpnBtePap to the "active" Word97 structures
42#   instead of mapping it to the blah_W6 compatibility ones.
43# - PAP::fAutoHyph -> ? Maybe it's Word97::PAP::fAutoWrap?
44# - PAP::rgdxaTab, PAP::rgtbd?
45# - PGD: We should create a PGD2 structure for the 2nd table and have
46#   conversion function like for PRM -> PRM2. The we can map Word95::PGD to
47#   the Word97::PGD2. For now I disabled the structure.
48# - PHE: What to do with the Height/Line field? Have a union for them?
49###############################################################################
50
51# This structure holds one "variable"
52struct Item => {
53    name => '$',         # The name of this variable
54    type => '$',         # The type (e.g. U16, S32[42],...)
55    bits => '$',         # The amount of bits (e.g. 3), if any
56    comment => '$',      # The comment for this variable
57    initial => '$',      # The initial value of this field, if any
58    len => '$',          # If the item is a dynamic array we store its length
59                         # here. length can be a plain C++ expression.
60    compareSizeLHS => '$',  # If the item is a dynamic array we need to compare the
61                            # left-hand-side (lhs) and the rhs in their size. This
62                            # is a plain C++ expression returning the size of the LHS.
63    compareSizeRHS => '$',  # If the item is a dynamic array we need to compare the
64                            # left-hand-side (lhs) and the rhs in their size. This
65                            # is a plain C++ expression returning the size of the RHS.
66    startNew => '$',     # This field is used for debugging purposes. It
67                         # is set to 1 if this variable should start a new
68                         # bitfield (and close the last one). We simply
69                         # check whether we filled the last field completely here
70    matched => '$',      # This field is used to indicate that this item already was "matched"
71    convert => '$',      # The conversion options - if any
72};
73
74struct Structure => {
75    name => '$',      # The name of the structure
76    comment => '$',   # The comment for this struct
77    items => '@',     # All the data members
78    hidden => '$',    # Set to "//" if we want to comment that structure out
79    dynamic => '$',   # Do we have dynamic memory? Then we need a Copy CTOR,
80                      # DTOR, assignment op, op==,...
81};
82
83
84# This array of strings contains the whole HTML
85# documentation file. It's used twice when reading the spec in
86# All the parsing subs will read/modify that global array
87# Note: All the tags we use are already converted to
88# uppercase.
89my @document;
90
91# The current index in the document-array (used during parsing)
92my $i;
93
94# These arrays hold all the structures we want to write out
95my @structs95;
96my @structs97;
97
98# The current struct we're working on (only used during parsing)
99my $struct;
100# The current item we're working on (only used during parsing)
101my $item;
102
103# Parses all the structures
104sub parseStructures {
105    my ($doc)=@_;
106    my ($tmp);
107
108    print "Parsing $doc...\n";
109    $i=0;
110    while($i<=$#document) {
111        if($document[$i] =~ m,\</H3\>,) {
112            if($document[$i-1] =~ m/\<H3\>/) {  # Safe, as </H3> can't be in the first line
113                # looks okay
114                $struct=Structure->new();  # create a new structure element
115                $document[$i] =~ m,^(.*)\</H3\>,;
116                $struct->comment($1);
117            }
118            elsif($document[$i] =~ m/\<H3\>/) {
119                # looks okay, too
120                $struct=Structure->new();  # create a new structure element
121                $document[$i] =~ m,\<H3\>(.*)\</H3\>,;
122                $struct->comment($1);
123            }
124            else {
125                if($document[$i-1] !~ m/Algorithm/) {
126                    # huh? Shouldn't happen at all
127                    print "####### ERROR #######\n";
128                    print $document[$i-1], "\n", $document[$i], "\n";
129                }
130                $i++; # don't forget that one here :))
131                next;
132            }
133            $struct->comment =~ m,.*\((.*)\),;  # get the name of the structure
134            $tmp=$1;              # store it in a $tmp var as I'm too clueless :)
135            $tmp =~ s/\s/_/;      # replace the spaces with underscores
136            $struct->name($tmp);  # ...and set it as name
137            #print "found: name: '", $struct->name, "' comment: '", $struct->comment, "'\n";
138            $struct->hidden("");  # initialize that with a sane value
139
140            #print "Checking for a <TABLE> ";
141            while($document[$i] !~ m,\<TABLE ,) {
142                $i++;
143                #print ".";
144            }
145            #print " found\n";
146            # parse the <TABLE> we found
147            if(parseStructure()) {
148                if($doc eq "Word95") {
149                    push(@structs95, $struct); # append the new structure
150                }
151                elsif($doc eq "Word97") {
152                    push(@structs97, $struct);
153                }
154                else {
155                    print "Error: Word95 or Word97?\n";
156                }
157            }
158            else {
159                print "####### ERROR #######\n";
160                print "   name: '", $struct->name, "' comment: '", $struct->comment, "'\n";
161            }
162        }
163        $i++;
164    }
165    print "Done.\n";
166}
167
168# Parses one structure (<table>...</table>)
169sub parseStructure {
170
171    # eat the first row (headline)
172    while($document[$i] !~ m,^\<TR\>$,) {
173        $i++;
174    }
175    while($document[$i] !~ m,^\</TR\>$,) {
176        $i++;
177    }
178
179    # parse all the variables till we encounter </TABLE>
180    while($document[$i] !~ m,^\</TABLE\>$,) {
181        if(parseItem()) {
182            push(@{$struct->items}, $item);
183            $i++;
184        }
185        else {
186            print "####### ERROR #######\n";
187            print "   Error while parsing an item!\n";
188            return 0; # uh-oh :}
189        }
190    }
191    #print "count: ", $#{$struct->items}+1, "\n";
192    return 1;  # success
193}
194
195# Parses one row of the table (<tr> ... </tr>) to get one
196# data item out of it. Does some trivial error checking
197sub parseItem {
198    my ($myState, $tmp);
199
200    $myState=0;
201    while($document[$i] !~ m,^\<TR\>$,) {
202        $i++;
203    }
204    $item=Item->new();
205    while($document[$i] !~ m,^\</TR\>$,) {
206        if($document[$i] =~ m,^\<TD\>(.*)\</TD\>$,) {
207            if($myState==0) {  # this is used for debugging/sanity checking
208                $item->startNew($1);
209                #print "   startNew: ", $1, "\n";
210            }
211            # yes, I left out $myState==1 on purpose
212            elsif($myState==2) {
213                $item->name($1);
214                #print "   name: ", $1, "\n";
215            }
216            elsif($myState==3) {
217                $item->type($1);
218                #print "   type: ", $1, "\n";
219            }
220            elsif($myState==4) {
221                $tmp=$1;
222                if($tmp =~ m/^:(.*)/) {
223                    $item->bits($1);
224                    #print "   bits: ", $1, "\n";
225                }
226                else {
227                    #print "   no bits but a plain size attribute!\n";
228                }
229            }
230            # yes, I left out $myState==5 on purpose
231            elsif($myState==6) {
232                $item->comment($1);
233                #print "   (short) comment: ", $1, "\n";
234            }
235            $myState++;
236        }
237        # The comment can expand across several lines
238        elsif($document[$i] =~ m,^\<TD\>(.*)$, && $myState==6) {
239            $tmp=$1;
240            # Insert a <BR> for "newlines" (consistency)
241            if($document[$i+1] !~ m,\<BR\>,) {
242                $tmp .= "<BR>";
243            }
244            $i++;
245            while($document[$i] !~ m,(.*)\</TD\>$,) {
246                $tmp .= $document[$i];
247                # Insert a <BR> for "newlines" (consistency)
248                if($document[$i+1] !~ m,\<BR\>,) {
249                    $tmp .= "<BR>";
250                }
251                $i++;
252            }
253            $document[$i] =~ m,(.*)\</TD\>$,;
254            $tmp .= $1;
255            $item->comment($tmp);
256            #print "  (long) comment: ", $tmp, "\n";
257            $myState++;
258        }
259        elsif($document[$i] =~ m,\<\!--\s*initial=\"(.*?)\"\s*--\>,) {
260            #print "initial found: ", $document[$i], " filtered: ", $1, "\n";
261            $item->initial($1);
262        }
263        elsif($document[$i] =~ m,\<\!--\s+compareSizeLHS=\"(.*?)\"\s+compareSizeRHS=\"(.*?)\"\s+--\>,) {
264            #print "compareSize found: ", $document[$i], " filtered: ", $1, ", ", $2, "\n";
265            $item->compareSizeLHS($1);
266            $item->compareSizeRHS($2);
267        }
268        elsif($document[$i] =~ m,\<\!--\s*convert=\"(.*?)\"\s*--\>,) {
269            #print "convert found: ", $document[$i], " filtered: ", $1, "\n";
270            $item->convert($1);
271        }
272        elsif($document[$i] =~ m,^\</TABLE\>$,) {
273            print "Error: Found a table end where I didn't expect it!\n";
274            return 0;
275        }
276        $i++;
277    }
278    #print "$myState==7 ? ", $myState==7, "\n";
279    return $myState==7;
280}
281
282# Parse the template file
283sub parseTemplate {
284    my($name) = @_;  # name of the template
285    my($license, $includes, $before, $after, $myState);
286
287    open(TEMPLATE, "<$name") or die "Couldn't open the template: " . $!;
288    # initialize all the template vars
289    $myState=0;
290    $license="";
291    $includes="";
292    $before="";
293    $after="";
294    # read in the information...
295    while(<TEMPLATE>) {
296        if(m/^\#\#\#/) {  # ignore comments
297            next;
298        }
299        if(m/^\@\@license-start\@\@$/) {  # license section
300            $myState=1;
301            next;
302        }
303        if(m/^\@\@license-end\@\@$/) {  # end of license sect.
304            $myState=0;
305            next;
306        }
307        if(m/^\@\@includes-start\@\@$/) {  # includes section
308            $myState=2;
309            next;
310        }
311        if(m/^\@\@includes-end\@\@$/) {  # end of includes sect.
312            $myState=0;
313            next;
314        }
315        if(m/^\@\@namespace-start\@\@$/) {  # namespace (before)
316            $myState=3;
317            next;
318        }
319        if(m/^\@\@generated-code\@\@$/) {  # namespace (after)
320            $myState=4;
321            next;
322        }
323        if(m/^\@\@namespace-end\@\@$/) {  # end of namespace
324            $myState=0;
325            next;
326        }
327
328        if($myState==1) {
329            $license .= $_;
330        }
331        elsif($myState==2) {
332            $includes .= $_;
333        }
334        elsif($myState==3) {
335            $before .= $_;
336        }
337        elsif($myState==4) {
338            $after .= $_;
339        }
340    }
341    close(TEMPLATE) or die $!;
342    return ($license, $includes, $before, $after);
343}
344
345# Removes some structures we can't generate easily.
346# Note: We write out the struct in the header and just
347# comment it out (that you can copy it for a proper impl.).
348sub cleanStructures {
349    my($index, @clean, $done);
350
351    print "Cleaning up...\n";
352    # Feel free to add your "favorites" here
353    # The goal, however, should be to have as much as possible
354    # generated, so try to fix the HTML ;)
355    @clean=("PAPXFKP", "CHPXFKP",
356            "PAPX", "CHPX", "FLD", "PLCF", "STD", "BRC", "PGD", "SEPX",
357	    "FFN", "STSHI", "TBD");
358    foreach (@clean) {
359        $index=0;
360        $done=0;
361        while($index<=$#structs95 && $done==0) {
362            if($structs95[$index]->name eq $_) {
363                print "Removing: ", $structs95[$index]->name, "\n";
364                # Better not really remove, just comment it out by setting "hidden"
365                # That way you can copy the declaration for a real implementation
366                #splice @structs95,$index,1;
367                $structs95[$index]->hidden("//");
368                $done=1;
369            }
370            $index++;
371        }
372    }
373    print "Done.\n";
374}
375
376# Generates the conversion header. trivial code, as we just create declarations
377# like Word97::FOO toWord97(const Word95::FOO &s), where FOO is some struct
378sub generateHeader {
379    my($license, $includes, $before, $after, $myState);
380
381    print "Generating the header file...\n";
382    open(HEADER, ">convert.h") or die "Couldn't open the header for writing: " . $!;
383
384    ($license, $includes, $before, $after) = parseTemplate("template-conv.h");
385
386    # license section...
387    print HEADER $license;
388    print HEADER "\n#ifndef CONVERT_H\n#define CONVERT_H\n\n";
389    # include section...
390    print HEADER "#include <word95_generated.h>\n";
391    print HEADER "#include <word97_generated.h>\n";
392    print HEADER $includes;
393    print HEADER "\nnamespace wvWare {\n\n";
394    print HEADER "namespace Word95 {\n";
395
396    # pre
397    print HEADER $before . "\n";
398    # Fill the empty template
399    print HEADER generateDeclarations();
400    # post
401    print HEADER $after;
402
403    print HEADER "\n} // namespace Word95\n\n";
404    print HEADER "} // namespace wvWare\n\n";
405    print HEADER "#endif // CONVERT_H\n";
406    close(HEADER) or die $!;
407    print "Done.\n";
408}
409
410# This method is used to actually generate the methods with the pattern
411# Word97::FOO toWord97(const Word95::FOO &s), where FOO is some struct
412sub generateDeclarations {
413    my($index, $string, $n, $tmp);
414
415    for($index=0; $index<=$#structs95; $index++) {
416        $n=$structs95[$index]->name;
417	if($structs95[$index]->hidden ne "//") {
418	    for($tmp=0; $tmp<=$#structs97; $tmp++) {
419              if($n eq $structs97[$tmp]->name) {
420	        $string .= "Word97::$n toWord97(const Word95::$n &s);\n";
421	        last;
422              }
423	    }
424	}
425    }
426    return $string;
427}
428
429# This is the tricky part. It first adds all the template stuff and calls the
430# generator method to fill the void ;)
431sub generateImplementation {
432    my($tmp, $license, $includes, $before, $after);
433
434    print "Generating the source file...\n";
435    open(SOURCE, ">convert.cpp") or die "Couldn't open the file for writing: " . $!;
436
437    ($license, $includes, $before, $after) = parseTemplate("template-conv.cpp");
438
439    # license section...
440    print SOURCE $license . "\n";
441    # include section...
442    print SOURCE "#include <convert.h>\n";
443    print SOURCE $includes;
444    print SOURCE "\nnamespace wvWare {\n";
445    print SOURCE "\nnamespace Word95 {\n";
446
447    # pre
448    print SOURCE $before . "\n";
449    # Fill the empty template
450    print SOURCE generateFunctions();
451    # post
452    print SOURCE $after;
453
454    print SOURCE "} // namespace Word95\n";
455    print SOURCE "\n} // namespace wvWare\n";
456    close(SOURCE) or die $!;
457    print "Done.\n";
458
459}
460
461# Creates the empty template for every conversion function
462sub generateFunctions {
463    my($index95, $index97, $string, $n, $h);
464
465    for($index95=0; $index95<=$#structs95; $index95++) {
466        $n=$structs95[$index95]->name;
467        $h=$structs95[$index95]->hidden;
468        for($index97=0; $index97<=$#structs97; $index97++) {
469            if($n eq $structs97[$index97]->name) {
470                if($h eq "//") {
471                    $string .= "/* Please check...\n";
472                }
473                $string .= "Word97::$n toWord97(const Word95::$n &s) {\n\n";
474                $string .= "    Word97::$n ret;\n\n";
475                $string .= generateConversion($index95, $index97);
476                $string .= "\n    return ret;\n";
477                if($h eq "//") {
478                    $string .= "} */\n\n";
479                }
480                else {
481                    $string .= "}\n\n";
482                }
483                last;
484            }
485        }
486    }
487    return $string;
488}
489
490# This method tries to match fields inside structures, using some basic heuristics
491# and hints inside the .html files. Check the documentation at the top of that file
492# for further information about the hints and how to use them
493sub generateConversion {
494    my($index95, $index97)=@_;
495    my($i, $j, @items95, @items97, %result, $tmp1, $tmp2, $string);
496
497    print "Trying to match the fields for " . $structs95[$index95]->name . "\n";
498    if($structs95[$index95]->hidden eq "//") {
499        print "   Note: Hidden structure, implementation will be commented out\n";
500    }
501    @items95=@{$structs95[$index95]->items};
502    @items97=@{$structs97[$index97]->items};
503    # First try to find all "direct" matches (type, name, position)
504    for($i=0; $i<=$#items95 && $i<=$#items97; $i++) {
505        if($items95[$i]->name eq $items97[$i]->name &&
506           $items95[$i]->type eq $items97[$i]->type &&
507           ((defined($items95[$i]->bits) && defined($items97[$i]->bits) &&
508             $items95[$i]->bits eq $items97[$i]->bits) ||
509            (not(defined($items95[$i]->bits)) && not(defined($items97[$i]->bits))))) {
510            #print "   Direct match for " . $items95[$i]->name . "\n";
511            $items95[$i]->matched(1);
512            $items97[$i]->matched(1);
513            $result{$items95[$i]->name}=$i;
514        }
515    }
516    # Then try to check if we find the same name/type at some other position
517    for($i=0; $i<=$#items95; $i++) {
518        if(not(defined($items95[$i]->matched))) {
519            for($j=0; $j<=$#items97; $j++) {
520                if(not(defined($items97[$j]->matched)) &&
521                   $items95[$i]->name eq $items97[$j]->name &&
522                   $items95[$i]->type eq $items97[$j]->type) {
523                    #print "   Indirect match for " . $items95[$i]->name . "\n";
524                    $items95[$i]->matched(1);
525                    $items97[$j]->matched(1);
526                    $result{$items95[$i]->name}=$j;
527                }
528            }
529        }
530    }
531    # Did the "user" add some hints for us?
532    for($i=0; $i<=$#items95; $i++) {
533        if(defined($items95[$i]->convert)) {
534            if($items95[$i]->convert =~ m/^string\((.*)\)$/) {
535                #print "   Hint: string($1)\n";
536                $tmp1=$1;
537                if($tmp1 =~ m/(.*):(\d+)/) {
538                    #print "   Additional length hint: " . $2 . "\n";
539                    $tmp1=$1;
540                    $tmp2=$2;
541                    if($items95[$i]->type =~ m/(.*)\[(.*)\]/) {
542                        #print "   Old type: " . $items95[$i]->type . "\n";
543                        $items95[$i]->type($1 . "[" . $tmp2 . "]");
544                        #print "   New type: " . $items95[$i]->type . "\n";
545                    }
546                }
547                for($j=0; $j<=$#items97; $j++) {
548                    if(not(defined($items97[$j]->matched)) &&
549                       $tmp1 eq $items97[$j]->name) {
550                        #print "   Matched due to string hint: " . $items95[$i]->name . " -> " . $1 . "\n";
551                        $items95[$i]->matched(1);
552                        $items97[$j]->matched(1);
553                        $result{$items95[$i]->name}=$j;
554                    }
555                }
556            }
557            elsif($items95[$i]->convert =~ m/^type$/) {
558                #print "   Hint: type\n";
559                for($j=0; $j<=$#items97; $j++) {
560                    if(not(defined($items97[$j]->matched)) &&
561                       $items95[$i]->name eq $items97[$j]->name) {
562                        #print "   Matched due to type hint: " . $items95[$i]->name . "\n";
563                        $items95[$i]->matched(1);
564                        $items97[$j]->matched(1);
565                        $result{$items95[$i]->name}=$j;
566                    }
567                }
568            }
569            elsif($items95[$i]->convert =~ m/^\((.*)\)$/) {
570                #print "   Hint: ($1)\n";
571                for($j=0; $j<=$#items97; $j++) {
572                    if(not(defined($items97[$j]->matched)) &&
573                       $1 eq $items97[$j]->name) {
574                        #print "   Matched due to mapping hint: " . $items95[$i]->name . " -> " . $1 . "\n";
575                        $items95[$i]->matched(1);
576                        $items97[$j]->matched(1);
577                        $result{$items95[$i]->name}=$j;
578                    }
579                }
580            }
581            elsif($items95[$i]->convert =~ m/^unused$/) {
582                #print "   Hint: unused\n";
583                $items95[$i]->matched(1);
584                $result{$items95[$i]->name}=-42; # unused
585            }
586            else {
587                print "   Hint: Didn't understand this hint.\n";
588            }
589        }
590    }
591    # What's still missing? (Information)
592    foreach(@items95) {
593        if(not(defined($_->matched))) {
594            print "   -> No match for " . $_->name . "\n";
595        }
596    }
597
598    # Now that we have a complete map (hopefully ;) let's generate the code
599    $string="";
600    foreach(@items95) {
601        $i=$result{$_->name};
602        if(not(defined($i)) || $i == -42) {
603            #print "   Skipping item " . $_->name . "\n";
604            next;
605        }
606        $string .= generateMapping($_, $items97[$i]);
607    }
608    return $string;
609}
610
611# Create "one line" of the conversion function. Depending on the type
612# this method has to generate a proper assignment operation.
613sub generateMapping {
614    my($item95, $item97)=@_;
615    my($ret, $tmp);
616
617    # is it a dyn. array we know the size of?
618    if(defined($item95->len) && $item95->len ne "") {
619      $item95->type =~ m/(.*)\[.*\]/;
620      $ret .= "    ret." . $item97->name . "=new " . $1 . "[" . $item95->len . "];\n";
621      $ret .= "    memcpy(rhs." . $item97->name . ", s." . $item95->name . ", sizeof($1)*(" . $item95->len . "));\n";
622    }
623    elsif($item95->type =~ m/(.*)\[(\d+)\]/) {
624      $ret .= "    for(int i=0;i<($2);++i)\n";
625      if(knownType($1)) {
626	$ret .= "        ret." . $item97->name . "[i]=toWord97(s." . $item95->name . "[i]);\n";
627      }
628      else {
629	$ret .= "        ret." . $item97->name . "[i]=s." . $item95->name . "[i];\n";
630      }
631    }
632    elsif(knownType($item95->type)) {
633      $ret .= "    ret." . $item97->name . "=toWord97(s." . $item95->name . ");\n";
634    }
635    else {
636      # "plain" members, no problem here
637      $ret .= "    ret." . $item97->name . "=s." . $item95->name . ";\n";
638    }
639    return $ret;
640}
641
642# Helper method to detect known Word95 structs
643sub knownType {
644    my($name)=@_;
645
646    foreach (@structs95) {
647        if($_->name eq $name) {
648            return 1;
649        }
650    }
651    return 0;
652}
653
654# Read the whole .html file into an array, line by line
655sub readDocument {
656    my($name)=@_;
657    my $ignore=1;
658
659    open(INPUT, "<$name") or die $!;
660
661    while(<INPUT>) {
662        # Detection of start for Word9x
663        if(m,^Structure Definitions\</h[12]\>$,) {
664            $ignore=0;
665        }
666        # Detection of end for Word97
667        elsif(m,^Appendix A - Reading a Macintosh PICT Graphic\</h2\>$,) {
668            $ignore=1;
669        }
670        # Detection of end for Word95
671        elsif(m,^Appendix A - Changes from version 1\.x to 2\.0\</h1\>$,) {
672            $ignore=1;
673        }
674
675        if(!$ignore) {
676            chomp;
677            # convert the important tags we use to uppercase on the fly
678            s,\<tr\>,\<TR\>,;
679            s,\</tr\>,\</TR\>,;
680            s,\<td\>,\<TD\>,;
681            s,\</td\>,\</TD\>,;
682            s,\<table ,\<TABLE ,;
683            s,\</table\>,\</TABLE\>,;
684            s,\<br\>,\<BR\>,;
685            s,\<h3\>,\<H3\>,;
686            s,\</h3\>,\</H3\>,;
687            # get rid of that ugly &nbsp; thingies
688            s/&nbsp;//g;
689
690            push(@document, $_);
691        }
692    }
693    close(INPUT) or die $!;
694}
695
696# Reads the HTML files and converts the "interesting" tags
697# to uppercase. It also cuts of areas we're not interested in
698# from the begin and the end of the file.
699sub main {
700
701    readDocument($ARGV[0]);
702    parseStructures("Word95");
703    $#document=0;
704    readDocument($ARGV[1]);
705    parseStructures("Word97");
706    $#document=0;
707    cleanStructures();    # get rid of stuff we don't want to use
708
709    generateHeader();     # generate the header file
710    generateImplementation(); # generate the source
711}
712
713# We start execution here
714if($#ARGV != 1) {
715    print "Script to generate C++ code to convert Word95 to Word97 structures";
716    print "\nfrom the HTML specs.\n";
717    print "Usage: perl converter.pl spec95.html spec97.html\n";
718    exit(1);
719}
720
721main();
722