1##############################################################################
2#
3#  This library is free software; you can redistribute it and/or
4#  modify it under the terms of the GNU Library General Public
5#  License as published by the Free Software Foundation; either
6#  version 2 of the License, or (at your option) any later version.
7#
8#  This library is distributed in the hope that it will be useful,
9#  but WITHOUT ANY WARRANTY; without even the implied warranty of
10#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11#  Library General Public License for more details.
12#
13#  You should have received a copy of the GNU Library General Public
14#  License along with this library; if not, write to the
15#  Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16#  Boston, MA  02111-1307, USA.
17#
18#  Jabber
19#  Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/
20#
21##############################################################################
22
23package XML::Stream::Parser::DTD;
24
25=head1 NAME
26
27  XML::Stream::Parser::DTD - XML DTD Parser and Verifier
28
29=head1 SYNOPSIS
30
31  This is a work in progress.  I had need for a DTD parser and verifier
32  and so am working on it here.  If you are reading this then you are
33  snooping.  =)
34
35=head1 DESCRIPTION
36
37  This module provides the initial code for a DTD parser and verifier.
38
39=head1 METHODS
40
41=head1 EXAMPLES
42
43=head1 AUTHOR
44
45By Ryan Eatmon in February of 2001 for http://jabber.org/
46
47Currently maintained by Darian Anthony Patrick.
48
49=head1 COPYRIGHT
50
51Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/
52
53This module licensed under the LGPL, version 2.1.
54
55=cut
56
57use strict;
58use warnings;
59use vars qw( $VERSION );
60
61$VERSION = "1.24";
62
63sub new
64{
65    my $self = { };
66
67    bless($self);
68
69    my %args;
70    while($#_ >= 0) { $args{ lc pop(@_) } = pop(@_); }
71
72    $self->{URI} = $args{uri};
73
74    $self->{PARSING} = 0;
75    $self->{DOC} = 0;
76    $self->{XML} = "";
77    $self->{CNAME} = ();
78    $self->{CURR} = 0;
79
80    $self->{ENTITY}->{"&lt;"} = "<";
81    $self->{ENTITY}->{"&gt;"} = ">";
82    $self->{ENTITY}->{"&quot;"} = "\"";
83    $self->{ENTITY}->{"&apos;"} = "'";
84    $self->{ENTITY}->{"&amp;"} = "&";
85
86use Scalar::Util qw(weaken);
87    my $weak = $self;
88    weaken $weak;
89    $self->{HANDLER}->{startDocument} = sub{ $weak->startDocument(@_); };
90    $self->{HANDLER}->{endDocument} = sub{ $weak->endDocument(@_); };
91    $self->{HANDLER}->{startElement} = sub{ $weak->startElement(@_); };
92    $self->{HANDLER}->{endElement} = sub{ $weak->endElement(@_); };
93
94    $self->{STYLE} = "debug";
95
96    open(DTD,$args{uri});
97    my $dtd = join("",<DTD>);
98    close(DTD);
99
100    $self->parse($dtd);
101
102    return $self;
103}
104
105
106sub parse
107{
108    my $self = shift;
109    my $xml = shift;
110
111    while($xml =~ s/<\!--.*?-->//gs) {}
112    while($xml =~ s/\n//g) {}
113
114    $self->{XML} .= $xml;
115
116    return if ($self->{PARSING} == 1);
117
118    $self->{PARSING} = 1;
119
120    if(!$self->{DOC} == 1)
121    {
122        my $start = index($self->{XML},"<");
123
124        if (substr($self->{XML},$start,3) =~ /^<\?x$/i)
125        {
126            my $close = index($self->{XML},"?>");
127            if ($close == -1)
128            {
129                $self->{PARSING} = 0;
130                return;
131            }
132            $self->{XML} = substr($self->{XML},$close+2,length($self->{XML})-$close-2);
133        }
134
135        &{$self->{HANDLER}->{startDocument}}($self);
136        $self->{DOC} = 1;
137    }
138
139    while(1)
140    {
141
142        if (length($self->{XML}) == 0)
143        {
144            $self->{PARSING} = 0;
145            return;
146        }
147
148        my $estart = index($self->{XML},"<");
149        if ($estart == -1)
150        {
151            $self->{PARSING} = 0;
152            return;
153        }
154
155        my $close = index($self->{XML},">");
156        my $dtddata = substr($self->{XML},$estart+1,$close-1);
157        my $nextspace = index($dtddata," ");
158        my $attribs;
159
160        my $type = substr($dtddata,0,$nextspace);
161        $dtddata = substr($dtddata,$nextspace+1,length($dtddata)-$nextspace-1);
162        $nextspace = index($dtddata," ");
163
164        if ($type eq "!ENTITY")
165        {
166            $self->entity($type,$dtddata);
167        }
168        else
169        {
170            my $tag = substr($dtddata,0,$nextspace);
171            $dtddata = substr($dtddata,$nextspace+1,length($dtddata)-$nextspace-1);
172            $nextspace = index($dtddata," ");
173
174            $self->element($type,$tag,$dtddata) if ($type eq "!ELEMENT");
175            $self->attlist($type,$tag,$dtddata) if ($type eq "!ATTLIST");
176        }
177
178        $self->{XML} = substr($self->{XML},$close+1,length($self->{XML})-$close-1);
179        next;
180    }
181}
182
183
184sub startDocument
185{
186    my $self = shift;
187}
188
189
190sub endDocument
191{
192    my $self = shift;
193}
194
195
196sub entity
197{
198    my $self = shift;
199    my ($type, $data) = @_;
200
201    foreach my $entity (keys(%{$self->{ENTITY}}))
202    {
203        $data =~ s/$entity/$self->{ENTITY}->{$entity}/g;
204    }
205
206    my ($symbol,$tag,undef,$string) = ($data =~ /^\s*(\S+)\s+(\S+)\s+(\"|\')([^\3]*)\3\s*$/);
207    $self->{ENTITY}->{"${symbol}${tag}\;"} = $string;
208}
209
210sub element
211{
212    my $self = shift;
213    my ($type, $tag, $data) = @_;
214
215    foreach my $entity (keys(%{$self->{ENTITY}}))
216    {
217        $data =~ s/$entity/$self->{ENTITY}->{$entity}/g;
218    }
219
220    $self->{COUNTER}->{$tag} = 0 unless exists($self->{COUNTER}->{$tag});
221
222    $self->parsegrouping($tag,\$self->{ELEMENT}->{$tag},$data);
223    $self->flattendata(\$self->{ELEMENT}->{$tag});
224
225}
226
227
228sub flattendata
229{
230    my $self = shift;
231    my $dstr = shift;
232
233    if ($$dstr->{type} eq "list")
234    {
235        foreach my $index (0..$#{$$dstr->{list}})
236        {
237            $self->flattendata(\$$dstr->{list}->[$index]);
238        }
239
240        if (!exists($$dstr->{repeat}) && ($#{$$dstr->{list}} == 0))
241        {
242            $$dstr = $$dstr->{list}->[0];
243        }
244    }
245}
246
247sub parsegrouping
248{
249    my $self = shift;
250    my ($tag,$dstr,$data) = @_;
251
252    $data =~ s/^\s*//;
253    $data =~ s/\s*$//;
254
255    if ($data =~ /[\*\+\?]$/)
256    {
257        ($$dstr->{repeat}) = ($data =~ /(.)$/);
258        $data =~ s/.$//;
259    }
260
261    if ($data =~ /^\(.*\)$/)
262    {
263        my ($seperator) = ($data =~ /^\(\s*\S+\s*(\,|\|)/);
264        $$dstr->{ordered} = "yes" if ($seperator eq ",");
265        $$dstr->{ordered} = "no" if ($seperator eq "|");
266
267        my $count = 0;
268        $$dstr->{type} = "list";
269        foreach my $grouping ($self->groupinglist($data,$seperator))
270        {
271            $self->parsegrouping($tag,\$$dstr->{list}->[$count],$grouping);
272            $count++;
273        }
274    }
275    else
276    {
277        $$dstr->{type} = "element";
278        $$dstr->{element} = $data;
279        $self->{COUNTER}->{$data} = 0 unless exists($self->{COUNTER}->{$data});
280        $self->{COUNTER}->{$data}++;
281        $self->{CHILDREN}->{$tag}->{$data} = 1;
282    }
283}
284
285
286sub attlist
287{
288    my $self = shift;
289    my ($type, $tag, $data) = @_;
290
291    foreach my $entity (keys(%{$self->{ENTITY}}))
292    {
293        $data =~ s/$entity/$self->{ENTITY}->{$entity}/g;
294    }
295
296    while($data ne "")
297    {
298        my ($att) = ($data =~ /^\s*(\S+)/);
299        $data =~ s/^\s*\S+\s*//;
300
301        my $value;
302        if ($data =~ /^\(/)
303        {
304            $value = $self->getgrouping($data);
305            $data = substr($data,length($value)+1,length($data));
306            $data =~ s/^\s*//;
307            $self->{ATTLIST}->{$tag}->{$att}->{type} = "list";
308            foreach my $val (split(/\s*\|\s*/,substr($value,1,length($value)-2))) {
309$self->{ATTLIST}->{$tag}->{$att}->{value}->{$val} = 1;
310            }
311        }
312        else
313        {
314            ($value) = ($data =~ /^(\S+)/);
315            $data =~ s/^\S+\s*//;
316            $self->{ATTLIST}->{$tag}->{$att}->{type} = $value;
317        }
318
319        my $default;
320        if ($data =~ /^\"|^\'/)
321        {
322            my($sq,$val) = ($data =~ /^(\"|\')([^\"\']*)\1/);
323            $default = $val;
324            $data =~ s/^$sq$val$sq\s*//;
325        }
326        else
327        {
328            ($default) = ($data =~ /^(\S+)/);
329            $data =~ s/^\S+\s*//;
330        }
331
332        $self->{ATTLIST}->{$tag}->{$att}->{default} = $default;
333    }
334}
335
336
337
338sub getgrouping
339{
340    my $self = shift;
341    my ($data) = @_;
342
343    my $count = 0;
344    my $parens = 0;
345    foreach my $char (split("",$data))
346    {
347        $parens++ if ($char eq "(");
348        $parens-- if ($char eq ")");
349        $count++;
350        last if ($parens == 0);
351    }
352    return substr($data,0,$count);
353}
354
355
356sub groupinglist
357{
358    my $self = shift;
359    my ($grouping,$seperator) = @_;
360
361    my @list;
362    my $item = "";
363    my $parens = 0;
364    my $word = "";
365    $grouping = substr($grouping,1,length($grouping)-2) if ($grouping =~ /^\(/);
366    foreach my $char (split("",$grouping))
367    {
368        $parens++ if ($char eq "(");
369        $parens-- if ($char eq ")");
370        if (($parens == 0) && ($char eq $seperator))
371        {
372            push(@list,$word);
373            $word = "";
374        }
375        else
376        {
377            $word .= $char;
378        }
379    }
380    push(@list,$word) unless ($word eq "");
381    return @list;
382}
383
384
385sub root
386{
387    my $self = shift;
388    my $tag = shift;
389    my @root;
390    foreach my $tag (keys(%{$self->{COUNTER}}))
391    {
392        push(@root,$tag) if ($self->{COUNTER}->{$tag} == 0);
393    }
394
395    print "ERROR:  Too many root tags... Check the DTD...\n"
396        if ($#root > 0);
397    return $root[0];
398}
399
400
401sub children
402{
403    my $self = shift;
404    my ($tag,$tree) = @_;
405
406    return unless exists ($self->{CHILDREN}->{$tag});
407    return if (exists($self->{CHILDREN}->{$tag}->{EMPTY}));
408    if (defined($tree))
409    {
410        my @current;
411        foreach my $current (&XML::Stream::GetXMLData("tree array",$tree,"*","",""))
412        {
413            push(@current,$$current[0]);
414        }
415        return $self->allowedchildren($self->{ELEMENT}->{$tag},\@current);
416    }
417    return $self->allowedchildren($self->{ELEMENT}->{$tag});
418}
419
420
421sub allowedchildren
422{
423    my $self = shift;
424    my ($dstr,$current) = @_;
425
426    my @allowed;
427
428    if ($dstr->{type} eq "element")
429    {
430        my $test = (defined($current) && $#{@{$current}} > -1) ? $$current[0] : "";
431        shift(@{$current}) if ($dstr->{element} eq $test);
432        if ($self->repeatcheck($dstr,$test) == 1)
433        {
434            return $dstr->{element};
435        }
436    }
437    else
438    {
439        foreach my $index (0..$#{$dstr->{list}})
440        {
441            push(@allowed,$self->allowedchildren($dstr->{list}->[$index],$current));
442        }
443    }
444
445    return @allowed;
446}
447
448
449sub repeatcheck
450{
451    my $self = shift;
452    my ($dstr,$tag) = @_;
453
454    $dstr = $self->{ELEMENT}->{$dstr} if exists($self->{ELEMENT}->{$dstr});
455
456#  print "repeatcheck: tag($tag)\n";
457#  print "repeatcheck: repeat($dstr->{repeat})\n"
458#    if exists($dstr->{repeat});
459
460    my $return = 0;
461    $return = ((!defined($tag) ||
462                ($tag eq $dstr->{element})) ?
463               0 :
464               1)
465        if (!exists($dstr->{repeat}) ||
466            ($dstr->{repeat} eq "?"));
467    $return = ((defined($tag) ||
468                (exists($dstr->{ordered}) &&
469                ($dstr->{ordered} eq "yes"))) ?
470               1 :
471               0)
472        if (exists($dstr->{repeat}) &&
473            (($dstr->{repeat} eq "+") ||
474             ($dstr->{repeat} eq "*")));
475
476#  print "repeatcheck: return($return)\n";
477    return $return;
478}
479
480
481sub required
482{
483    my $self = shift;
484    my ($dstr,$tag,$count) = @_;
485
486    $dstr = $self->{ELEMENT}->{$dstr} if exists($self->{ELEMENT}->{$dstr});
487
488    if ($dstr->{type} eq "element")
489    {
490        return 0 if ($dstr->{element} ne $tag);
491        return 1 if !exists($dstr->{repeat});
492        return 1 if (($dstr->{repeat} eq "+") && ($count == 1)) ;
493    }
494    else
495    {
496        return 0 if (($dstr->{repeat} eq "*") || ($dstr->{repeat} eq "?"));
497        my $test = 0;
498        foreach my $index (0..$#{$dstr->{list}})
499        {
500            $test = $test | $self->required($dstr->{list}->[$index],$tag,$count);
501        }
502        return $test;
503    }
504    return 0;
505}
506
507
508sub addchild
509{
510    my $self = shift;
511    my ($tag,$child,$tree) = @_;
512
513#  print "addchild: tag($tag) child($child)\n";
514
515    my @current;
516    if (defined($tree))
517    {
518#    &Net::Jabber::printData("\$tree",$tree);
519
520        @current = &XML::Stream::GetXMLData("index array",$tree,"*","","");
521
522#    &Net::Jabber::printData("\$current",\@current);
523    }
524
525    my @newBranch = $self->addchildrecurse($self->{ELEMENT}->{$tag},$child,\@current);
526
527    return $tree unless ("@newBranch" ne "");
528
529#  &Net::Jabber::printData("\$newBranch",\@newBranch);
530
531    my $location = shift(@newBranch);
532
533    if ($location eq "end")
534    {
535        splice(@{$$tree[1]},@{$$tree[1]},0,@newBranch);
536    }
537    else
538    {
539        splice(@{$$tree[1]},$location,0,@newBranch);
540    }
541    return $tree;
542}
543
544
545sub addcdata
546{
547    my $self = shift;
548    my ($tag,$child,$tree) = @_;
549
550#  print "addchild: tag($tag) child($child)\n";
551
552    my @current;
553    if (defined($tree))
554    {
555#    &Net::Jabber::printData("\$tree",$tree);
556
557        @current = &XML::Stream::GetXMLData("index array",$tree,"*","","");
558
559#    &Net::Jabber::printData("\$current",\@current);
560    }
561
562    my @newBranch = $self->addchildrecurse($self->{ELEMENT}->{$tag},$child,\@current);
563
564    return $tree unless ("@newBranch" ne "");
565
566#  &Net::Jabber::printData("\$newBranch",\@newBranch);
567
568    my $location = shift(@newBranch);
569
570    if ($location eq "end")
571    {
572        splice(@{$$tree[1]},@{$$tree[1]},0,@newBranch);
573    }
574    else
575    {
576        splice(@{$$tree[1]},$location,0,@newBranch);
577    }
578    return $tree;
579}
580
581
582sub addchildrecurse
583{
584    my $self = shift;
585    my ($dstr,$child,$current) = @_;
586
587#  print "addchildrecurse: child($child) type($dstr->{type})\n";
588
589    if ($dstr->{type} eq "element")
590    {
591#    print "addchildrecurse: tag($dstr->{element})\n";
592        my $count = 0;
593        while(($#{@{$current}} > -1) && ($dstr->{element} eq $$current[0]))
594        {
595            shift(@{$current});
596            shift(@{$current});
597            $count++;
598        }
599        if (($dstr->{element} eq $child) &&
600            ($self->repeatcheck($dstr,(($count > 0) ? $child : "")) == 1))
601        {
602            my @return = ( "end" , $self->newbranch($child));
603            @return = ($$current[1], $self->newbranch($child))
604                if ($#{@{$current}} > -1);
605#      print "addchildrecurse: Found the spot! (",join(",",@return),")\n";
606
607            return @return;
608        }
609    }
610    else
611    {
612        foreach my $index (0..$#{$dstr->{list}})
613        {
614            my @newBranch = $self->addchildrecurse($dstr->{list}->[$index],$child,$current);
615            return @newBranch if ("@newBranch" ne "");
616        }
617    }
618#  print "Let's blow....\n";
619    return;
620}
621
622
623sub deletechild
624{
625    my $self = shift;
626    my ($tag,$parent,$parenttree,$tree) = @_;
627
628    return $tree unless exists($self->{ELEMENT}->{$tag});
629    return $tree if $self->required($parent,$tag,&XML::Stream::GetXMLData("count",$parenttree,$tag));
630
631    return [];
632}
633
634
635
636sub newbranch
637{
638    my $self = shift;
639    my $tag = shift;
640
641    $tag = $self->root() unless defined($tag);
642
643    my @tree = ();
644
645    return ("0","") if ($tag eq "#PCDATA");
646
647    push(@tree,$tag);
648    push(@tree,[ {} ]);
649
650    foreach my $att ($self->attribs($tag))
651    {
652        $tree[1]->[0]->{$att} = ""
653            if (($self->{ATTLIST}->{$tag}->{$att}->{default} eq "#REQUIRED") &&
654                ($self->{ATTLIST}->{$tag}->{$att}->{type} eq "CDATA"));
655    }
656
657    push(@{$tree[1]},$self->recursebranch($self->{ELEMENT}->{$tag}));
658    return @tree;
659}
660
661
662sub recursebranch
663{
664    my $self = shift;
665    my $dstr = shift;
666
667    my @tree;
668    if (($dstr->{type} eq "element") &&
669        ($dstr->{element} ne "EMPTY"))
670    {
671        @tree = $self->newbranch($dstr->{element})
672            if (!exists($dstr->{repeat}) ||
673                ($dstr->{repeat} eq "+"));
674    }
675    else
676    {
677        foreach my $index (0..$#{$dstr->{list}})
678        {
679            push(@tree,$self->recursebranch($dstr->{list}->[$index]))
680if (!exists($dstr->{repeat}) ||
681        ($dstr->{repeat} eq "+"));
682        }
683    }
684    return @tree;
685}
686
687
688sub attribs
689{
690    my $self = shift;
691    my ($tag,$tree) = @_;
692
693    return unless exists ($self->{ATTLIST}->{$tag});
694
695    if (defined($tree))
696    {
697        my %current = &XML::Stream::GetXMLData("attribs",$tree,"","","");
698        return $self->allowedattribs($tag,\%current);
699    }
700    return $self->allowedattribs($tag);
701}
702
703
704sub allowedattribs
705{
706    my $self = shift;
707    my ($tag,$current) = @_;
708
709    my %allowed;
710    foreach my $att (keys(%{$self->{ATTLIST}->{$tag}}))
711    {
712        $allowed{$att} = 1 unless (defined($current) &&
713                                   exists($current->{$att}));
714    }
715    return sort {$a cmp $b} keys(%allowed);
716}
717
718
719sub attribvalue
720{
721    my $self = shift;
722    my $tag = shift;
723    my $att = shift;
724
725    return $self->{ATTLIST}->{$tag}->{$att}->{type}
726        if ($self->{ATTLIST}->{$tag}->{$att}->{type} ne "list");
727    return sort {$a cmp $b} keys(%{$self->{ATTLIST}->{$tag}->{$att}->{value}});
728}
729
730
731sub addattrib
732{
733    my $self = shift;
734    my ($tag,$att,$tree) = @_;
735
736    return $tree unless exists($self->{ATTLIST}->{$tag});
737    return $tree unless exists($self->{ATTLIST}->{$tag}->{$att});
738
739    my $default = $self->{ATTLIST}->{$tag}->{$att}->{default};
740    $default = "" if ($default eq "#REQUIRED");
741    $default = "" if ($default eq "#IMPLIED");
742
743    $$tree[1]->[0]->{$att} = $default;
744
745    return $tree;
746}
747
748
749sub attribrequired
750{
751    my $self = shift;
752    my ($tag,$att) = @_;
753
754    return 0 unless exists($self->{ATTLIST}->{$tag});
755    return 0 unless exists($self->{ATTLIST}->{$tag}->{$att});
756
757    return 1 if ($self->{ATTLIST}->{$tag}->{$att}->{default} eq "#REQUIRED");
758    return 0;
759}
760
761
762sub deleteattrib
763{
764    my $self = shift;
765    my ($tag,$att,$tree) = @_;
766
767    return $tree unless exists($self->{ATTLIST}->{$tag});
768    return $tree unless exists($self->{ATTLIST}->{$tag}->{$att});
769
770    return if $self->attribrequired($tag,$att);
771
772    delete($$tree[1]->[0]->{$att});
773
774    return $tree;
775}
776
777