1############################################################################## 2# 3# This library is free software; you can redistribute it and/or 4# modify it under the terms of the GNU Library General Public 5# License as published by the Free Software Foundation; either 6# version 2 of the License, or (at your option) any later version. 7# 8# This library is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11# Library General Public License for more details. 12# 13# You should have received a copy of the GNU Library General Public 14# License along with this library; if not, write to the 15# Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16# Boston, MA 02111-1307, USA. 17# 18# Jabber 19# Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ 20# 21############################################################################## 22 23package XML::Stream::Parser::DTD; 24 25=head1 NAME 26 27 XML::Stream::Parser::DTD - XML DTD Parser and Verifier 28 29=head1 SYNOPSIS 30 31 This is a work in progress. I had need for a DTD parser and verifier 32 and so am working on it here. If you are reading this then you are 33 snooping. =) 34 35=head1 DESCRIPTION 36 37 This module provides the initial code for a DTD parser and verifier. 38 39=head1 METHODS 40 41=head1 EXAMPLES 42 43=head1 AUTHOR 44 45By Ryan Eatmon in February of 2001 for http://jabber.org/ 46 47Currently maintained by Darian Anthony Patrick. 48 49=head1 COPYRIGHT 50 51Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ 52 53This module licensed under the LGPL, version 2.1. 54 55=cut 56 57use strict; 58use warnings; 59use vars qw( $VERSION ); 60 61$VERSION = "1.24"; 62 63sub new 64{ 65 my $self = { }; 66 67 bless($self); 68 69 my %args; 70 while($#_ >= 0) { $args{ lc pop(@_) } = pop(@_); } 71 72 $self->{URI} = $args{uri}; 73 74 $self->{PARSING} = 0; 75 $self->{DOC} = 0; 76 $self->{XML} = ""; 77 $self->{CNAME} = (); 78 $self->{CURR} = 0; 79 80 $self->{ENTITY}->{"<"} = "<"; 81 $self->{ENTITY}->{">"} = ">"; 82 $self->{ENTITY}->{"""} = "\""; 83 $self->{ENTITY}->{"'"} = "'"; 84 $self->{ENTITY}->{"&"} = "&"; 85 86use Scalar::Util qw(weaken); 87 my $weak = $self; 88 weaken $weak; 89 $self->{HANDLER}->{startDocument} = sub{ $weak->startDocument(@_); }; 90 $self->{HANDLER}->{endDocument} = sub{ $weak->endDocument(@_); }; 91 $self->{HANDLER}->{startElement} = sub{ $weak->startElement(@_); }; 92 $self->{HANDLER}->{endElement} = sub{ $weak->endElement(@_); }; 93 94 $self->{STYLE} = "debug"; 95 96 open(DTD,$args{uri}); 97 my $dtd = join("",<DTD>); 98 close(DTD); 99 100 $self->parse($dtd); 101 102 return $self; 103} 104 105 106sub parse 107{ 108 my $self = shift; 109 my $xml = shift; 110 111 while($xml =~ s/<\!--.*?-->//gs) {} 112 while($xml =~ s/\n//g) {} 113 114 $self->{XML} .= $xml; 115 116 return if ($self->{PARSING} == 1); 117 118 $self->{PARSING} = 1; 119 120 if(!$self->{DOC} == 1) 121 { 122 my $start = index($self->{XML},"<"); 123 124 if (substr($self->{XML},$start,3) =~ /^<\?x$/i) 125 { 126 my $close = index($self->{XML},"?>"); 127 if ($close == -1) 128 { 129 $self->{PARSING} = 0; 130 return; 131 } 132 $self->{XML} = substr($self->{XML},$close+2,length($self->{XML})-$close-2); 133 } 134 135 &{$self->{HANDLER}->{startDocument}}($self); 136 $self->{DOC} = 1; 137 } 138 139 while(1) 140 { 141 142 if (length($self->{XML}) == 0) 143 { 144 $self->{PARSING} = 0; 145 return; 146 } 147 148 my $estart = index($self->{XML},"<"); 149 if ($estart == -1) 150 { 151 $self->{PARSING} = 0; 152 return; 153 } 154 155 my $close = index($self->{XML},">"); 156 my $dtddata = substr($self->{XML},$estart+1,$close-1); 157 my $nextspace = index($dtddata," "); 158 my $attribs; 159 160 my $type = substr($dtddata,0,$nextspace); 161 $dtddata = substr($dtddata,$nextspace+1,length($dtddata)-$nextspace-1); 162 $nextspace = index($dtddata," "); 163 164 if ($type eq "!ENTITY") 165 { 166 $self->entity($type,$dtddata); 167 } 168 else 169 { 170 my $tag = substr($dtddata,0,$nextspace); 171 $dtddata = substr($dtddata,$nextspace+1,length($dtddata)-$nextspace-1); 172 $nextspace = index($dtddata," "); 173 174 $self->element($type,$tag,$dtddata) if ($type eq "!ELEMENT"); 175 $self->attlist($type,$tag,$dtddata) if ($type eq "!ATTLIST"); 176 } 177 178 $self->{XML} = substr($self->{XML},$close+1,length($self->{XML})-$close-1); 179 next; 180 } 181} 182 183 184sub startDocument 185{ 186 my $self = shift; 187} 188 189 190sub endDocument 191{ 192 my $self = shift; 193} 194 195 196sub entity 197{ 198 my $self = shift; 199 my ($type, $data) = @_; 200 201 foreach my $entity (keys(%{$self->{ENTITY}})) 202 { 203 $data =~ s/$entity/$self->{ENTITY}->{$entity}/g; 204 } 205 206 my ($symbol,$tag,undef,$string) = ($data =~ /^\s*(\S+)\s+(\S+)\s+(\"|\')([^\3]*)\3\s*$/); 207 $self->{ENTITY}->{"${symbol}${tag}\;"} = $string; 208} 209 210sub element 211{ 212 my $self = shift; 213 my ($type, $tag, $data) = @_; 214 215 foreach my $entity (keys(%{$self->{ENTITY}})) 216 { 217 $data =~ s/$entity/$self->{ENTITY}->{$entity}/g; 218 } 219 220 $self->{COUNTER}->{$tag} = 0 unless exists($self->{COUNTER}->{$tag}); 221 222 $self->parsegrouping($tag,\$self->{ELEMENT}->{$tag},$data); 223 $self->flattendata(\$self->{ELEMENT}->{$tag}); 224 225} 226 227 228sub flattendata 229{ 230 my $self = shift; 231 my $dstr = shift; 232 233 if ($$dstr->{type} eq "list") 234 { 235 foreach my $index (0..$#{$$dstr->{list}}) 236 { 237 $self->flattendata(\$$dstr->{list}->[$index]); 238 } 239 240 if (!exists($$dstr->{repeat}) && ($#{$$dstr->{list}} == 0)) 241 { 242 $$dstr = $$dstr->{list}->[0]; 243 } 244 } 245} 246 247sub parsegrouping 248{ 249 my $self = shift; 250 my ($tag,$dstr,$data) = @_; 251 252 $data =~ s/^\s*//; 253 $data =~ s/\s*$//; 254 255 if ($data =~ /[\*\+\?]$/) 256 { 257 ($$dstr->{repeat}) = ($data =~ /(.)$/); 258 $data =~ s/.$//; 259 } 260 261 if ($data =~ /^\(.*\)$/) 262 { 263 my ($seperator) = ($data =~ /^\(\s*\S+\s*(\,|\|)/); 264 $$dstr->{ordered} = "yes" if ($seperator eq ","); 265 $$dstr->{ordered} = "no" if ($seperator eq "|"); 266 267 my $count = 0; 268 $$dstr->{type} = "list"; 269 foreach my $grouping ($self->groupinglist($data,$seperator)) 270 { 271 $self->parsegrouping($tag,\$$dstr->{list}->[$count],$grouping); 272 $count++; 273 } 274 } 275 else 276 { 277 $$dstr->{type} = "element"; 278 $$dstr->{element} = $data; 279 $self->{COUNTER}->{$data} = 0 unless exists($self->{COUNTER}->{$data}); 280 $self->{COUNTER}->{$data}++; 281 $self->{CHILDREN}->{$tag}->{$data} = 1; 282 } 283} 284 285 286sub attlist 287{ 288 my $self = shift; 289 my ($type, $tag, $data) = @_; 290 291 foreach my $entity (keys(%{$self->{ENTITY}})) 292 { 293 $data =~ s/$entity/$self->{ENTITY}->{$entity}/g; 294 } 295 296 while($data ne "") 297 { 298 my ($att) = ($data =~ /^\s*(\S+)/); 299 $data =~ s/^\s*\S+\s*//; 300 301 my $value; 302 if ($data =~ /^\(/) 303 { 304 $value = $self->getgrouping($data); 305 $data = substr($data,length($value)+1,length($data)); 306 $data =~ s/^\s*//; 307 $self->{ATTLIST}->{$tag}->{$att}->{type} = "list"; 308 foreach my $val (split(/\s*\|\s*/,substr($value,1,length($value)-2))) { 309$self->{ATTLIST}->{$tag}->{$att}->{value}->{$val} = 1; 310 } 311 } 312 else 313 { 314 ($value) = ($data =~ /^(\S+)/); 315 $data =~ s/^\S+\s*//; 316 $self->{ATTLIST}->{$tag}->{$att}->{type} = $value; 317 } 318 319 my $default; 320 if ($data =~ /^\"|^\'/) 321 { 322 my($sq,$val) = ($data =~ /^(\"|\')([^\"\']*)\1/); 323 $default = $val; 324 $data =~ s/^$sq$val$sq\s*//; 325 } 326 else 327 { 328 ($default) = ($data =~ /^(\S+)/); 329 $data =~ s/^\S+\s*//; 330 } 331 332 $self->{ATTLIST}->{$tag}->{$att}->{default} = $default; 333 } 334} 335 336 337 338sub getgrouping 339{ 340 my $self = shift; 341 my ($data) = @_; 342 343 my $count = 0; 344 my $parens = 0; 345 foreach my $char (split("",$data)) 346 { 347 $parens++ if ($char eq "("); 348 $parens-- if ($char eq ")"); 349 $count++; 350 last if ($parens == 0); 351 } 352 return substr($data,0,$count); 353} 354 355 356sub groupinglist 357{ 358 my $self = shift; 359 my ($grouping,$seperator) = @_; 360 361 my @list; 362 my $item = ""; 363 my $parens = 0; 364 my $word = ""; 365 $grouping = substr($grouping,1,length($grouping)-2) if ($grouping =~ /^\(/); 366 foreach my $char (split("",$grouping)) 367 { 368 $parens++ if ($char eq "("); 369 $parens-- if ($char eq ")"); 370 if (($parens == 0) && ($char eq $seperator)) 371 { 372 push(@list,$word); 373 $word = ""; 374 } 375 else 376 { 377 $word .= $char; 378 } 379 } 380 push(@list,$word) unless ($word eq ""); 381 return @list; 382} 383 384 385sub root 386{ 387 my $self = shift; 388 my $tag = shift; 389 my @root; 390 foreach my $tag (keys(%{$self->{COUNTER}})) 391 { 392 push(@root,$tag) if ($self->{COUNTER}->{$tag} == 0); 393 } 394 395 print "ERROR: Too many root tags... Check the DTD...\n" 396 if ($#root > 0); 397 return $root[0]; 398} 399 400 401sub children 402{ 403 my $self = shift; 404 my ($tag,$tree) = @_; 405 406 return unless exists ($self->{CHILDREN}->{$tag}); 407 return if (exists($self->{CHILDREN}->{$tag}->{EMPTY})); 408 if (defined($tree)) 409 { 410 my @current; 411 foreach my $current (&XML::Stream::GetXMLData("tree array",$tree,"*","","")) 412 { 413 push(@current,$$current[0]); 414 } 415 return $self->allowedchildren($self->{ELEMENT}->{$tag},\@current); 416 } 417 return $self->allowedchildren($self->{ELEMENT}->{$tag}); 418} 419 420 421sub allowedchildren 422{ 423 my $self = shift; 424 my ($dstr,$current) = @_; 425 426 my @allowed; 427 428 if ($dstr->{type} eq "element") 429 { 430 my $test = (defined($current) && $#{@{$current}} > -1) ? $$current[0] : ""; 431 shift(@{$current}) if ($dstr->{element} eq $test); 432 if ($self->repeatcheck($dstr,$test) == 1) 433 { 434 return $dstr->{element}; 435 } 436 } 437 else 438 { 439 foreach my $index (0..$#{$dstr->{list}}) 440 { 441 push(@allowed,$self->allowedchildren($dstr->{list}->[$index],$current)); 442 } 443 } 444 445 return @allowed; 446} 447 448 449sub repeatcheck 450{ 451 my $self = shift; 452 my ($dstr,$tag) = @_; 453 454 $dstr = $self->{ELEMENT}->{$dstr} if exists($self->{ELEMENT}->{$dstr}); 455 456# print "repeatcheck: tag($tag)\n"; 457# print "repeatcheck: repeat($dstr->{repeat})\n" 458# if exists($dstr->{repeat}); 459 460 my $return = 0; 461 $return = ((!defined($tag) || 462 ($tag eq $dstr->{element})) ? 463 0 : 464 1) 465 if (!exists($dstr->{repeat}) || 466 ($dstr->{repeat} eq "?")); 467 $return = ((defined($tag) || 468 (exists($dstr->{ordered}) && 469 ($dstr->{ordered} eq "yes"))) ? 470 1 : 471 0) 472 if (exists($dstr->{repeat}) && 473 (($dstr->{repeat} eq "+") || 474 ($dstr->{repeat} eq "*"))); 475 476# print "repeatcheck: return($return)\n"; 477 return $return; 478} 479 480 481sub required 482{ 483 my $self = shift; 484 my ($dstr,$tag,$count) = @_; 485 486 $dstr = $self->{ELEMENT}->{$dstr} if exists($self->{ELEMENT}->{$dstr}); 487 488 if ($dstr->{type} eq "element") 489 { 490 return 0 if ($dstr->{element} ne $tag); 491 return 1 if !exists($dstr->{repeat}); 492 return 1 if (($dstr->{repeat} eq "+") && ($count == 1)) ; 493 } 494 else 495 { 496 return 0 if (($dstr->{repeat} eq "*") || ($dstr->{repeat} eq "?")); 497 my $test = 0; 498 foreach my $index (0..$#{$dstr->{list}}) 499 { 500 $test = $test | $self->required($dstr->{list}->[$index],$tag,$count); 501 } 502 return $test; 503 } 504 return 0; 505} 506 507 508sub addchild 509{ 510 my $self = shift; 511 my ($tag,$child,$tree) = @_; 512 513# print "addchild: tag($tag) child($child)\n"; 514 515 my @current; 516 if (defined($tree)) 517 { 518# &Net::Jabber::printData("\$tree",$tree); 519 520 @current = &XML::Stream::GetXMLData("index array",$tree,"*","",""); 521 522# &Net::Jabber::printData("\$current",\@current); 523 } 524 525 my @newBranch = $self->addchildrecurse($self->{ELEMENT}->{$tag},$child,\@current); 526 527 return $tree unless ("@newBranch" ne ""); 528 529# &Net::Jabber::printData("\$newBranch",\@newBranch); 530 531 my $location = shift(@newBranch); 532 533 if ($location eq "end") 534 { 535 splice(@{$$tree[1]},@{$$tree[1]},0,@newBranch); 536 } 537 else 538 { 539 splice(@{$$tree[1]},$location,0,@newBranch); 540 } 541 return $tree; 542} 543 544 545sub addcdata 546{ 547 my $self = shift; 548 my ($tag,$child,$tree) = @_; 549 550# print "addchild: tag($tag) child($child)\n"; 551 552 my @current; 553 if (defined($tree)) 554 { 555# &Net::Jabber::printData("\$tree",$tree); 556 557 @current = &XML::Stream::GetXMLData("index array",$tree,"*","",""); 558 559# &Net::Jabber::printData("\$current",\@current); 560 } 561 562 my @newBranch = $self->addchildrecurse($self->{ELEMENT}->{$tag},$child,\@current); 563 564 return $tree unless ("@newBranch" ne ""); 565 566# &Net::Jabber::printData("\$newBranch",\@newBranch); 567 568 my $location = shift(@newBranch); 569 570 if ($location eq "end") 571 { 572 splice(@{$$tree[1]},@{$$tree[1]},0,@newBranch); 573 } 574 else 575 { 576 splice(@{$$tree[1]},$location,0,@newBranch); 577 } 578 return $tree; 579} 580 581 582sub addchildrecurse 583{ 584 my $self = shift; 585 my ($dstr,$child,$current) = @_; 586 587# print "addchildrecurse: child($child) type($dstr->{type})\n"; 588 589 if ($dstr->{type} eq "element") 590 { 591# print "addchildrecurse: tag($dstr->{element})\n"; 592 my $count = 0; 593 while(($#{@{$current}} > -1) && ($dstr->{element} eq $$current[0])) 594 { 595 shift(@{$current}); 596 shift(@{$current}); 597 $count++; 598 } 599 if (($dstr->{element} eq $child) && 600 ($self->repeatcheck($dstr,(($count > 0) ? $child : "")) == 1)) 601 { 602 my @return = ( "end" , $self->newbranch($child)); 603 @return = ($$current[1], $self->newbranch($child)) 604 if ($#{@{$current}} > -1); 605# print "addchildrecurse: Found the spot! (",join(",",@return),")\n"; 606 607 return @return; 608 } 609 } 610 else 611 { 612 foreach my $index (0..$#{$dstr->{list}}) 613 { 614 my @newBranch = $self->addchildrecurse($dstr->{list}->[$index],$child,$current); 615 return @newBranch if ("@newBranch" ne ""); 616 } 617 } 618# print "Let's blow....\n"; 619 return; 620} 621 622 623sub deletechild 624{ 625 my $self = shift; 626 my ($tag,$parent,$parenttree,$tree) = @_; 627 628 return $tree unless exists($self->{ELEMENT}->{$tag}); 629 return $tree if $self->required($parent,$tag,&XML::Stream::GetXMLData("count",$parenttree,$tag)); 630 631 return []; 632} 633 634 635 636sub newbranch 637{ 638 my $self = shift; 639 my $tag = shift; 640 641 $tag = $self->root() unless defined($tag); 642 643 my @tree = (); 644 645 return ("0","") if ($tag eq "#PCDATA"); 646 647 push(@tree,$tag); 648 push(@tree,[ {} ]); 649 650 foreach my $att ($self->attribs($tag)) 651 { 652 $tree[1]->[0]->{$att} = "" 653 if (($self->{ATTLIST}->{$tag}->{$att}->{default} eq "#REQUIRED") && 654 ($self->{ATTLIST}->{$tag}->{$att}->{type} eq "CDATA")); 655 } 656 657 push(@{$tree[1]},$self->recursebranch($self->{ELEMENT}->{$tag})); 658 return @tree; 659} 660 661 662sub recursebranch 663{ 664 my $self = shift; 665 my $dstr = shift; 666 667 my @tree; 668 if (($dstr->{type} eq "element") && 669 ($dstr->{element} ne "EMPTY")) 670 { 671 @tree = $self->newbranch($dstr->{element}) 672 if (!exists($dstr->{repeat}) || 673 ($dstr->{repeat} eq "+")); 674 } 675 else 676 { 677 foreach my $index (0..$#{$dstr->{list}}) 678 { 679 push(@tree,$self->recursebranch($dstr->{list}->[$index])) 680if (!exists($dstr->{repeat}) || 681 ($dstr->{repeat} eq "+")); 682 } 683 } 684 return @tree; 685} 686 687 688sub attribs 689{ 690 my $self = shift; 691 my ($tag,$tree) = @_; 692 693 return unless exists ($self->{ATTLIST}->{$tag}); 694 695 if (defined($tree)) 696 { 697 my %current = &XML::Stream::GetXMLData("attribs",$tree,"","",""); 698 return $self->allowedattribs($tag,\%current); 699 } 700 return $self->allowedattribs($tag); 701} 702 703 704sub allowedattribs 705{ 706 my $self = shift; 707 my ($tag,$current) = @_; 708 709 my %allowed; 710 foreach my $att (keys(%{$self->{ATTLIST}->{$tag}})) 711 { 712 $allowed{$att} = 1 unless (defined($current) && 713 exists($current->{$att})); 714 } 715 return sort {$a cmp $b} keys(%allowed); 716} 717 718 719sub attribvalue 720{ 721 my $self = shift; 722 my $tag = shift; 723 my $att = shift; 724 725 return $self->{ATTLIST}->{$tag}->{$att}->{type} 726 if ($self->{ATTLIST}->{$tag}->{$att}->{type} ne "list"); 727 return sort {$a cmp $b} keys(%{$self->{ATTLIST}->{$tag}->{$att}->{value}}); 728} 729 730 731sub addattrib 732{ 733 my $self = shift; 734 my ($tag,$att,$tree) = @_; 735 736 return $tree unless exists($self->{ATTLIST}->{$tag}); 737 return $tree unless exists($self->{ATTLIST}->{$tag}->{$att}); 738 739 my $default = $self->{ATTLIST}->{$tag}->{$att}->{default}; 740 $default = "" if ($default eq "#REQUIRED"); 741 $default = "" if ($default eq "#IMPLIED"); 742 743 $$tree[1]->[0]->{$att} = $default; 744 745 return $tree; 746} 747 748 749sub attribrequired 750{ 751 my $self = shift; 752 my ($tag,$att) = @_; 753 754 return 0 unless exists($self->{ATTLIST}->{$tag}); 755 return 0 unless exists($self->{ATTLIST}->{$tag}->{$att}); 756 757 return 1 if ($self->{ATTLIST}->{$tag}->{$att}->{default} eq "#REQUIRED"); 758 return 0; 759} 760 761 762sub deleteattrib 763{ 764 my $self = shift; 765 my ($tag,$att,$tree) = @_; 766 767 return $tree unless exists($self->{ATTLIST}->{$tag}); 768 return $tree unless exists($self->{ATTLIST}->{$tag}->{$att}); 769 770 return if $self->attribrequired($tag,$att); 771 772 delete($$tree[1]->[0]->{$att}); 773 774 return $tree; 775} 776 777