1#!/usr/local/bin/perl -w 2use strict; # we at least try to ;) 3use Class::Struct; 4 5# This file is part of the wvWare 2 project 6# Copyright (C) 2001-2003 Werner Trobin <trobin@kde.org> 7 8# This library is free software; you can redistribute it and/or 9# modify it under the terms of the GNU Library General Public 10# License version 2 as published by the Free Software Foundation. 11 12# This library is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15# Library General Public License for more details. 16 17# You should have received a copy of the GNU Library General Public License 18# along with this library; see the file COPYING.LIB. If not, write to 19# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 20# Boston, MA 02111-1307, USA. 21 22# A script to generate code which converts Word95 structures to Word97 ones 23# as good as possible. 24# If you add a convert comment to the Word 6 HTML you can "losen" the 25# restrictions a bit: 26# - convert="string(fieldname)" converts between U8[] and XCHAR[] 27# In case you want to limit the string size just write 28# "string(fieldname:XY)" where XY is the length to copy. 29# - convert="type" losens the type restrictions and simply tries to assign 30# even if the types are not exactly the same (U32 <- U16,...) 31# - convert="(fieldname)" relates the fieldnames and losens the type 32# restrictions 33# - convert="unused" skips this field 34 35############################################################################### 36# To discuss with Shaheed: 37# - CHP::chse - I think we should map that to the Word97 CHP::fMacChs (70) 38# - I disabled (unused) DOP::fReadOnlyRecommended and DOP::fWriteReservation 39# as this should normally go into the Word97 FIB, but well, I doubt we need 40# that flags. 41# - I mapped the things like cpnBtePap to the "active" Word97 structures 42# instead of mapping it to the blah_W6 compatibility ones. 43# - PAP::fAutoHyph -> ? Maybe it's Word97::PAP::fAutoWrap? 44# - PAP::rgdxaTab, PAP::rgtbd? 45# - PGD: We should create a PGD2 structure for the 2nd table and have 46# conversion function like for PRM -> PRM2. The we can map Word95::PGD to 47# the Word97::PGD2. For now I disabled the structure. 48# - PHE: What to do with the Height/Line field? Have a union for them? 49############################################################################### 50 51# This structure holds one "variable" 52struct Item => { 53 name => '$', # The name of this variable 54 type => '$', # The type (e.g. U16, S32[42],...) 55 bits => '$', # The amount of bits (e.g. 3), if any 56 comment => '$', # The comment for this variable 57 initial => '$', # The initial value of this field, if any 58 len => '$', # If the item is a dynamic array we store its length 59 # here. length can be a plain C++ expression. 60 compareSizeLHS => '$', # If the item is a dynamic array we need to compare the 61 # left-hand-side (lhs) and the rhs in their size. This 62 # is a plain C++ expression returning the size of the LHS. 63 compareSizeRHS => '$', # If the item is a dynamic array we need to compare the 64 # left-hand-side (lhs) and the rhs in their size. This 65 # is a plain C++ expression returning the size of the RHS. 66 startNew => '$', # This field is used for debugging purposes. It 67 # is set to 1 if this variable should start a new 68 # bitfield (and close the last one). We simply 69 # check whether we filled the last field completely here 70 matched => '$', # This field is used to indicate that this item already was "matched" 71 convert => '$', # The conversion options - if any 72}; 73 74struct Structure => { 75 name => '$', # The name of the structure 76 comment => '$', # The comment for this struct 77 items => '@', # All the data members 78 hidden => '$', # Set to "//" if we want to comment that structure out 79 dynamic => '$', # Do we have dynamic memory? Then we need a Copy CTOR, 80 # DTOR, assignment op, op==,... 81}; 82 83 84# This array of strings contains the whole HTML 85# documentation file. It's used twice when reading the spec in 86# All the parsing subs will read/modify that global array 87# Note: All the tags we use are already converted to 88# uppercase. 89my @document; 90 91# The current index in the document-array (used during parsing) 92my $i; 93 94# These arrays hold all the structures we want to write out 95my @structs95; 96my @structs97; 97 98# The current struct we're working on (only used during parsing) 99my $struct; 100# The current item we're working on (only used during parsing) 101my $item; 102 103# Parses all the structures 104sub parseStructures { 105 my ($doc)=@_; 106 my ($tmp); 107 108 print "Parsing $doc...\n"; 109 $i=0; 110 while($i<=$#document) { 111 if($document[$i] =~ m,\</H3\>,) { 112 if($document[$i-1] =~ m/\<H3\>/) { # Safe, as </H3> can't be in the first line 113 # looks okay 114 $struct=Structure->new(); # create a new structure element 115 $document[$i] =~ m,^(.*)\</H3\>,; 116 $struct->comment($1); 117 } 118 elsif($document[$i] =~ m/\<H3\>/) { 119 # looks okay, too 120 $struct=Structure->new(); # create a new structure element 121 $document[$i] =~ m,\<H3\>(.*)\</H3\>,; 122 $struct->comment($1); 123 } 124 else { 125 if($document[$i-1] !~ m/Algorithm/) { 126 # huh? Shouldn't happen at all 127 print "####### ERROR #######\n"; 128 print $document[$i-1], "\n", $document[$i], "\n"; 129 } 130 $i++; # don't forget that one here :)) 131 next; 132 } 133 $struct->comment =~ m,.*\((.*)\),; # get the name of the structure 134 $tmp=$1; # store it in a $tmp var as I'm too clueless :) 135 $tmp =~ s/\s/_/; # replace the spaces with underscores 136 $struct->name($tmp); # ...and set it as name 137 #print "found: name: '", $struct->name, "' comment: '", $struct->comment, "'\n"; 138 $struct->hidden(""); # initialize that with a sane value 139 140 #print "Checking for a <TABLE> "; 141 while($document[$i] !~ m,\<TABLE ,) { 142 $i++; 143 #print "."; 144 } 145 #print " found\n"; 146 # parse the <TABLE> we found 147 if(parseStructure()) { 148 if($doc eq "Word95") { 149 push(@structs95, $struct); # append the new structure 150 } 151 elsif($doc eq "Word97") { 152 push(@structs97, $struct); 153 } 154 else { 155 print "Error: Word95 or Word97?\n"; 156 } 157 } 158 else { 159 print "####### ERROR #######\n"; 160 print " name: '", $struct->name, "' comment: '", $struct->comment, "'\n"; 161 } 162 } 163 $i++; 164 } 165 print "Done.\n"; 166} 167 168# Parses one structure (<table>...</table>) 169sub parseStructure { 170 171 # eat the first row (headline) 172 while($document[$i] !~ m,^\<TR\>$,) { 173 $i++; 174 } 175 while($document[$i] !~ m,^\</TR\>$,) { 176 $i++; 177 } 178 179 # parse all the variables till we encounter </TABLE> 180 while($document[$i] !~ m,^\</TABLE\>$,) { 181 if(parseItem()) { 182 push(@{$struct->items}, $item); 183 $i++; 184 } 185 else { 186 print "####### ERROR #######\n"; 187 print " Error while parsing an item!\n"; 188 return 0; # uh-oh :} 189 } 190 } 191 #print "count: ", $#{$struct->items}+1, "\n"; 192 return 1; # success 193} 194 195# Parses one row of the table (<tr> ... </tr>) to get one 196# data item out of it. Does some trivial error checking 197sub parseItem { 198 my ($myState, $tmp); 199 200 $myState=0; 201 while($document[$i] !~ m,^\<TR\>$,) { 202 $i++; 203 } 204 $item=Item->new(); 205 while($document[$i] !~ m,^\</TR\>$,) { 206 if($document[$i] =~ m,^\<TD\>(.*)\</TD\>$,) { 207 if($myState==0) { # this is used for debugging/sanity checking 208 $item->startNew($1); 209 #print " startNew: ", $1, "\n"; 210 } 211 # yes, I left out $myState==1 on purpose 212 elsif($myState==2) { 213 $item->name($1); 214 #print " name: ", $1, "\n"; 215 } 216 elsif($myState==3) { 217 $item->type($1); 218 #print " type: ", $1, "\n"; 219 } 220 elsif($myState==4) { 221 $tmp=$1; 222 if($tmp =~ m/^:(.*)/) { 223 $item->bits($1); 224 #print " bits: ", $1, "\n"; 225 } 226 else { 227 #print " no bits but a plain size attribute!\n"; 228 } 229 } 230 # yes, I left out $myState==5 on purpose 231 elsif($myState==6) { 232 $item->comment($1); 233 #print " (short) comment: ", $1, "\n"; 234 } 235 $myState++; 236 } 237 # The comment can expand across several lines 238 elsif($document[$i] =~ m,^\<TD\>(.*)$, && $myState==6) { 239 $tmp=$1; 240 # Insert a <BR> for "newlines" (consistency) 241 if($document[$i+1] !~ m,\<BR\>,) { 242 $tmp .= "<BR>"; 243 } 244 $i++; 245 while($document[$i] !~ m,(.*)\</TD\>$,) { 246 $tmp .= $document[$i]; 247 # Insert a <BR> for "newlines" (consistency) 248 if($document[$i+1] !~ m,\<BR\>,) { 249 $tmp .= "<BR>"; 250 } 251 $i++; 252 } 253 $document[$i] =~ m,(.*)\</TD\>$,; 254 $tmp .= $1; 255 $item->comment($tmp); 256 #print " (long) comment: ", $tmp, "\n"; 257 $myState++; 258 } 259 elsif($document[$i] =~ m,\<\!--\s*initial=\"(.*?)\"\s*--\>,) { 260 #print "initial found: ", $document[$i], " filtered: ", $1, "\n"; 261 $item->initial($1); 262 } 263 elsif($document[$i] =~ m,\<\!--\s+compareSizeLHS=\"(.*?)\"\s+compareSizeRHS=\"(.*?)\"\s+--\>,) { 264 #print "compareSize found: ", $document[$i], " filtered: ", $1, ", ", $2, "\n"; 265 $item->compareSizeLHS($1); 266 $item->compareSizeRHS($2); 267 } 268 elsif($document[$i] =~ m,\<\!--\s*convert=\"(.*?)\"\s*--\>,) { 269 #print "convert found: ", $document[$i], " filtered: ", $1, "\n"; 270 $item->convert($1); 271 } 272 elsif($document[$i] =~ m,^\</TABLE\>$,) { 273 print "Error: Found a table end where I didn't expect it!\n"; 274 return 0; 275 } 276 $i++; 277 } 278 #print "$myState==7 ? ", $myState==7, "\n"; 279 return $myState==7; 280} 281 282# Parse the template file 283sub parseTemplate { 284 my($name) = @_; # name of the template 285 my($license, $includes, $before, $after, $myState); 286 287 open(TEMPLATE, "<$name") or die "Couldn't open the template: " . $!; 288 # initialize all the template vars 289 $myState=0; 290 $license=""; 291 $includes=""; 292 $before=""; 293 $after=""; 294 # read in the information... 295 while(<TEMPLATE>) { 296 if(m/^\#\#\#/) { # ignore comments 297 next; 298 } 299 if(m/^\@\@license-start\@\@$/) { # license section 300 $myState=1; 301 next; 302 } 303 if(m/^\@\@license-end\@\@$/) { # end of license sect. 304 $myState=0; 305 next; 306 } 307 if(m/^\@\@includes-start\@\@$/) { # includes section 308 $myState=2; 309 next; 310 } 311 if(m/^\@\@includes-end\@\@$/) { # end of includes sect. 312 $myState=0; 313 next; 314 } 315 if(m/^\@\@namespace-start\@\@$/) { # namespace (before) 316 $myState=3; 317 next; 318 } 319 if(m/^\@\@generated-code\@\@$/) { # namespace (after) 320 $myState=4; 321 next; 322 } 323 if(m/^\@\@namespace-end\@\@$/) { # end of namespace 324 $myState=0; 325 next; 326 } 327 328 if($myState==1) { 329 $license .= $_; 330 } 331 elsif($myState==2) { 332 $includes .= $_; 333 } 334 elsif($myState==3) { 335 $before .= $_; 336 } 337 elsif($myState==4) { 338 $after .= $_; 339 } 340 } 341 close(TEMPLATE) or die $!; 342 return ($license, $includes, $before, $after); 343} 344 345# Removes some structures we can't generate easily. 346# Note: We write out the struct in the header and just 347# comment it out (that you can copy it for a proper impl.). 348sub cleanStructures { 349 my($index, @clean, $done); 350 351 print "Cleaning up...\n"; 352 # Feel free to add your "favorites" here 353 # The goal, however, should be to have as much as possible 354 # generated, so try to fix the HTML ;) 355 @clean=("PAPXFKP", "CHPXFKP", 356 "PAPX", "CHPX", "FLD", "PLCF", "STD", "BRC", "PGD", "SEPX", 357 "FFN", "STSHI", "TBD"); 358 foreach (@clean) { 359 $index=0; 360 $done=0; 361 while($index<=$#structs95 && $done==0) { 362 if($structs95[$index]->name eq $_) { 363 print "Removing: ", $structs95[$index]->name, "\n"; 364 # Better not really remove, just comment it out by setting "hidden" 365 # That way you can copy the declaration for a real implementation 366 #splice @structs95,$index,1; 367 $structs95[$index]->hidden("//"); 368 $done=1; 369 } 370 $index++; 371 } 372 } 373 print "Done.\n"; 374} 375 376# Generates the conversion header. trivial code, as we just create declarations 377# like Word97::FOO toWord97(const Word95::FOO &s), where FOO is some struct 378sub generateHeader { 379 my($license, $includes, $before, $after, $myState); 380 381 print "Generating the header file...\n"; 382 open(HEADER, ">convert.h") or die "Couldn't open the header for writing: " . $!; 383 384 ($license, $includes, $before, $after) = parseTemplate("template-conv.h"); 385 386 # license section... 387 print HEADER $license; 388 print HEADER "\n#ifndef CONVERT_H\n#define CONVERT_H\n\n"; 389 # include section... 390 print HEADER "#include <word95_generated.h>\n"; 391 print HEADER "#include <word97_generated.h>\n"; 392 print HEADER $includes; 393 print HEADER "\nnamespace wvWare {\n\n"; 394 print HEADER "namespace Word95 {\n"; 395 396 # pre 397 print HEADER $before . "\n"; 398 # Fill the empty template 399 print HEADER generateDeclarations(); 400 # post 401 print HEADER $after; 402 403 print HEADER "\n} // namespace Word95\n\n"; 404 print HEADER "} // namespace wvWare\n\n"; 405 print HEADER "#endif // CONVERT_H\n"; 406 close(HEADER) or die $!; 407 print "Done.\n"; 408} 409 410# This method is used to actually generate the methods with the pattern 411# Word97::FOO toWord97(const Word95::FOO &s), where FOO is some struct 412sub generateDeclarations { 413 my($index, $string, $n, $tmp); 414 415 for($index=0; $index<=$#structs95; $index++) { 416 $n=$structs95[$index]->name; 417 if($structs95[$index]->hidden ne "//") { 418 for($tmp=0; $tmp<=$#structs97; $tmp++) { 419 if($n eq $structs97[$tmp]->name) { 420 $string .= "Word97::$n toWord97(const Word95::$n &s);\n"; 421 last; 422 } 423 } 424 } 425 } 426 return $string; 427} 428 429# This is the tricky part. It first adds all the template stuff and calls the 430# generator method to fill the void ;) 431sub generateImplementation { 432 my($tmp, $license, $includes, $before, $after); 433 434 print "Generating the source file...\n"; 435 open(SOURCE, ">convert.cpp") or die "Couldn't open the file for writing: " . $!; 436 437 ($license, $includes, $before, $after) = parseTemplate("template-conv.cpp"); 438 439 # license section... 440 print SOURCE $license . "\n"; 441 # include section... 442 print SOURCE "#include <convert.h>\n"; 443 print SOURCE $includes; 444 print SOURCE "\nnamespace wvWare {\n"; 445 print SOURCE "\nnamespace Word95 {\n"; 446 447 # pre 448 print SOURCE $before . "\n"; 449 # Fill the empty template 450 print SOURCE generateFunctions(); 451 # post 452 print SOURCE $after; 453 454 print SOURCE "} // namespace Word95\n"; 455 print SOURCE "\n} // namespace wvWare\n"; 456 close(SOURCE) or die $!; 457 print "Done.\n"; 458 459} 460 461# Creates the empty template for every conversion function 462sub generateFunctions { 463 my($index95, $index97, $string, $n, $h); 464 465 for($index95=0; $index95<=$#structs95; $index95++) { 466 $n=$structs95[$index95]->name; 467 $h=$structs95[$index95]->hidden; 468 for($index97=0; $index97<=$#structs97; $index97++) { 469 if($n eq $structs97[$index97]->name) { 470 if($h eq "//") { 471 $string .= "/* Please check...\n"; 472 } 473 $string .= "Word97::$n toWord97(const Word95::$n &s) {\n\n"; 474 $string .= " Word97::$n ret;\n\n"; 475 $string .= generateConversion($index95, $index97); 476 $string .= "\n return ret;\n"; 477 if($h eq "//") { 478 $string .= "} */\n\n"; 479 } 480 else { 481 $string .= "}\n\n"; 482 } 483 last; 484 } 485 } 486 } 487 return $string; 488} 489 490# This method tries to match fields inside structures, using some basic heuristics 491# and hints inside the .html files. Check the documentation at the top of that file 492# for further information about the hints and how to use them 493sub generateConversion { 494 my($index95, $index97)=@_; 495 my($i, $j, @items95, @items97, %result, $tmp1, $tmp2, $string); 496 497 print "Trying to match the fields for " . $structs95[$index95]->name . "\n"; 498 if($structs95[$index95]->hidden eq "//") { 499 print " Note: Hidden structure, implementation will be commented out\n"; 500 } 501 @items95=@{$structs95[$index95]->items}; 502 @items97=@{$structs97[$index97]->items}; 503 # First try to find all "direct" matches (type, name, position) 504 for($i=0; $i<=$#items95 && $i<=$#items97; $i++) { 505 if($items95[$i]->name eq $items97[$i]->name && 506 $items95[$i]->type eq $items97[$i]->type && 507 ((defined($items95[$i]->bits) && defined($items97[$i]->bits) && 508 $items95[$i]->bits eq $items97[$i]->bits) || 509 (not(defined($items95[$i]->bits)) && not(defined($items97[$i]->bits))))) { 510 #print " Direct match for " . $items95[$i]->name . "\n"; 511 $items95[$i]->matched(1); 512 $items97[$i]->matched(1); 513 $result{$items95[$i]->name}=$i; 514 } 515 } 516 # Then try to check if we find the same name/type at some other position 517 for($i=0; $i<=$#items95; $i++) { 518 if(not(defined($items95[$i]->matched))) { 519 for($j=0; $j<=$#items97; $j++) { 520 if(not(defined($items97[$j]->matched)) && 521 $items95[$i]->name eq $items97[$j]->name && 522 $items95[$i]->type eq $items97[$j]->type) { 523 #print " Indirect match for " . $items95[$i]->name . "\n"; 524 $items95[$i]->matched(1); 525 $items97[$j]->matched(1); 526 $result{$items95[$i]->name}=$j; 527 } 528 } 529 } 530 } 531 # Did the "user" add some hints for us? 532 for($i=0; $i<=$#items95; $i++) { 533 if(defined($items95[$i]->convert)) { 534 if($items95[$i]->convert =~ m/^string\((.*)\)$/) { 535 #print " Hint: string($1)\n"; 536 $tmp1=$1; 537 if($tmp1 =~ m/(.*):(\d+)/) { 538 #print " Additional length hint: " . $2 . "\n"; 539 $tmp1=$1; 540 $tmp2=$2; 541 if($items95[$i]->type =~ m/(.*)\[(.*)\]/) { 542 #print " Old type: " . $items95[$i]->type . "\n"; 543 $items95[$i]->type($1 . "[" . $tmp2 . "]"); 544 #print " New type: " . $items95[$i]->type . "\n"; 545 } 546 } 547 for($j=0; $j<=$#items97; $j++) { 548 if(not(defined($items97[$j]->matched)) && 549 $tmp1 eq $items97[$j]->name) { 550 #print " Matched due to string hint: " . $items95[$i]->name . " -> " . $1 . "\n"; 551 $items95[$i]->matched(1); 552 $items97[$j]->matched(1); 553 $result{$items95[$i]->name}=$j; 554 } 555 } 556 } 557 elsif($items95[$i]->convert =~ m/^type$/) { 558 #print " Hint: type\n"; 559 for($j=0; $j<=$#items97; $j++) { 560 if(not(defined($items97[$j]->matched)) && 561 $items95[$i]->name eq $items97[$j]->name) { 562 #print " Matched due to type hint: " . $items95[$i]->name . "\n"; 563 $items95[$i]->matched(1); 564 $items97[$j]->matched(1); 565 $result{$items95[$i]->name}=$j; 566 } 567 } 568 } 569 elsif($items95[$i]->convert =~ m/^\((.*)\)$/) { 570 #print " Hint: ($1)\n"; 571 for($j=0; $j<=$#items97; $j++) { 572 if(not(defined($items97[$j]->matched)) && 573 $1 eq $items97[$j]->name) { 574 #print " Matched due to mapping hint: " . $items95[$i]->name . " -> " . $1 . "\n"; 575 $items95[$i]->matched(1); 576 $items97[$j]->matched(1); 577 $result{$items95[$i]->name}=$j; 578 } 579 } 580 } 581 elsif($items95[$i]->convert =~ m/^unused$/) { 582 #print " Hint: unused\n"; 583 $items95[$i]->matched(1); 584 $result{$items95[$i]->name}=-42; # unused 585 } 586 else { 587 print " Hint: Didn't understand this hint.\n"; 588 } 589 } 590 } 591 # What's still missing? (Information) 592 foreach(@items95) { 593 if(not(defined($_->matched))) { 594 print " -> No match for " . $_->name . "\n"; 595 } 596 } 597 598 # Now that we have a complete map (hopefully ;) let's generate the code 599 $string=""; 600 foreach(@items95) { 601 $i=$result{$_->name}; 602 if(not(defined($i)) || $i == -42) { 603 #print " Skipping item " . $_->name . "\n"; 604 next; 605 } 606 $string .= generateMapping($_, $items97[$i]); 607 } 608 return $string; 609} 610 611# Create "one line" of the conversion function. Depending on the type 612# this method has to generate a proper assignment operation. 613sub generateMapping { 614 my($item95, $item97)=@_; 615 my($ret, $tmp); 616 617 # is it a dyn. array we know the size of? 618 if(defined($item95->len) && $item95->len ne "") { 619 $item95->type =~ m/(.*)\[.*\]/; 620 $ret .= " ret." . $item97->name . "=new " . $1 . "[" . $item95->len . "];\n"; 621 $ret .= " memcpy(rhs." . $item97->name . ", s." . $item95->name . ", sizeof($1)*(" . $item95->len . "));\n"; 622 } 623 elsif($item95->type =~ m/(.*)\[(\d+)\]/) { 624 $ret .= " for(int i=0;i<($2);++i)\n"; 625 if(knownType($1)) { 626 $ret .= " ret." . $item97->name . "[i]=toWord97(s." . $item95->name . "[i]);\n"; 627 } 628 else { 629 $ret .= " ret." . $item97->name . "[i]=s." . $item95->name . "[i];\n"; 630 } 631 } 632 elsif(knownType($item95->type)) { 633 $ret .= " ret." . $item97->name . "=toWord97(s." . $item95->name . ");\n"; 634 } 635 else { 636 # "plain" members, no problem here 637 $ret .= " ret." . $item97->name . "=s." . $item95->name . ";\n"; 638 } 639 return $ret; 640} 641 642# Helper method to detect known Word95 structs 643sub knownType { 644 my($name)=@_; 645 646 foreach (@structs95) { 647 if($_->name eq $name) { 648 return 1; 649 } 650 } 651 return 0; 652} 653 654# Read the whole .html file into an array, line by line 655sub readDocument { 656 my($name)=@_; 657 my $ignore=1; 658 659 open(INPUT, "<$name") or die $!; 660 661 while(<INPUT>) { 662 # Detection of start for Word9x 663 if(m,^Structure Definitions\</h[12]\>$,) { 664 $ignore=0; 665 } 666 # Detection of end for Word97 667 elsif(m,^Appendix A - Reading a Macintosh PICT Graphic\</h2\>$,) { 668 $ignore=1; 669 } 670 # Detection of end for Word95 671 elsif(m,^Appendix A - Changes from version 1\.x to 2\.0\</h1\>$,) { 672 $ignore=1; 673 } 674 675 if(!$ignore) { 676 chomp; 677 # convert the important tags we use to uppercase on the fly 678 s,\<tr\>,\<TR\>,; 679 s,\</tr\>,\</TR\>,; 680 s,\<td\>,\<TD\>,; 681 s,\</td\>,\</TD\>,; 682 s,\<table ,\<TABLE ,; 683 s,\</table\>,\</TABLE\>,; 684 s,\<br\>,\<BR\>,; 685 s,\<h3\>,\<H3\>,; 686 s,\</h3\>,\</H3\>,; 687 # get rid of that ugly thingies 688 s/ //g; 689 690 push(@document, $_); 691 } 692 } 693 close(INPUT) or die $!; 694} 695 696# Reads the HTML files and converts the "interesting" tags 697# to uppercase. It also cuts of areas we're not interested in 698# from the begin and the end of the file. 699sub main { 700 701 readDocument($ARGV[0]); 702 parseStructures("Word95"); 703 $#document=0; 704 readDocument($ARGV[1]); 705 parseStructures("Word97"); 706 $#document=0; 707 cleanStructures(); # get rid of stuff we don't want to use 708 709 generateHeader(); # generate the header file 710 generateImplementation(); # generate the source 711} 712 713# We start execution here 714if($#ARGV != 1) { 715 print "Script to generate C++ code to convert Word95 to Word97 structures"; 716 print "\nfrom the HTML specs.\n"; 717 print "Usage: perl converter.pl spec95.html spec97.html\n"; 718 exit(1); 719} 720 721main(); 722