1------------------------------------------------------------------------------
2--                     XML/Ada - An XML suite for Ada95                     --
3--                                                                          --
4--                     Copyright (C) 2001-2020, AdaCore                     --
5--                                                                          --
6-- This library is free software;  you can redistribute it and/or modify it --
7-- under terms of the  GNU General Public License  as published by the Free --
8-- Software  Foundation;  either version 3,  or (at your  option) any later --
9-- version. This library is distributed in the hope that it will be useful, --
10-- but WITHOUT ANY WARRANTY;  without even the implied warranty of MERCHAN- --
11-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE.                            --
12--                                                                          --
13-- As a special exception under Section 7 of GPL version 3, you are granted --
14-- additional permissions described in the GCC Runtime Library Exception,   --
15-- version 3.1, as published by the Free Software Foundation.               --
16--                                                                          --
17-- You should have received a copy of the GNU General Public License and    --
18-- a copy of the GCC Runtime Library Exception along with this program;     --
19-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
20-- <http://www.gnu.org/licenses/>.                                          --
21--                                                                          --
22------------------------------------------------------------------------------
23
24pragma Ada_05;
25
26with Ada.Exceptions;            use Ada.Exceptions;
27with Ada.Text_IO;               use Ada.Text_IO;
28with GNAT.Directory_Operations; use GNAT.Directory_Operations;
29with Input_Sources.File;        use Input_Sources.File;
30with Input_Sources.Strings;     use Input_Sources.Strings;
31with Input_Sources;             use Input_Sources;
32with Interfaces;                use Interfaces;
33with Sax.Attributes;            use Sax.Attributes;
34with Sax.Encodings;             use Sax.Encodings;
35with Sax.Exceptions;            use Sax.Exceptions;
36with Sax.Locators;              use Sax.Locators;
37with Sax.Models;                use Sax.Models;
38with Sax.Symbols;               use Sax.Symbols;
39with Unchecked_Deallocation;
40with Unicode.CES;               use Unicode.CES;
41with Unicode.CES.Basic_8bit;    use Unicode.CES.Basic_8bit;
42with Unicode.Names.Basic_Latin; use Unicode.Names.Basic_Latin;
43with Unicode;                   use Unicode;
44
45package body Sax.Readers is
46
47   use Entity_Table, Attributes_Table, Notations_Table;
48   use Symbol_Table_Pointers;
49
50   Debug_Lexical  : constant Boolean := False;
51   Debug_Input    : constant Boolean := False;
52   Debug_Internal : constant Boolean := False;
53   --  Set to True if you want to debug this package
54
55   Initial_Buffer_Length : constant := 10000;
56   --  Initial length of the internal buffer that stores CDATA, tag names,...
57
58   --------------------
59   -- Error messages --
60   --------------------
61   --  The comment indicates the section of the XML or Namespaces specification
62   --  relevant for that error
63
64   Error_Attlist_DefaultDecl       : constant String :=
65     "Invalid default declaration for the attribute";  --  3.3.2
66   Error_Attlist_Invalid_Enum      : constant String :=
67     "Invalid character ',' in ATTLIST enumeration";  --  3.3.1
68   Error_Attlist_Type              : constant String :=
69     "Invalid type for attribute"; --  WF
70   Error_Attribute_External_Entity : constant String :=
71     "Attribute values cannot reference external entities";
72   Error_Attribute_Is_Name : constant String :=
73     "Attribute must contain Names: ";  --  NS 6 and 3.3.1
74   Error_Attribute_Is_Ncname : constant String :=
75     "Attribute must contain Names with no colon: ";  --  NS 6 and 3.3.1
76   Error_Attribute_Is_Nmtoken : constant String :=
77     "Attribute must contain Nmtokens: ";  --  2.3 and 3.3.1
78   Error_Attribute_Less_Than : constant String :=
79     "'<' not authorized in attribute values";  --  2.3
80   Error_Attribute_Less_Than_Suggests : constant String :=  --  2.3
81     "'<' not authorized in attribute values. Possible end of value at ";
82   Error_Attribute_Ref_Unparsed_Entity : constant String :=
83     "Attribute must reference an existing unparsed entity: ";
84   Error_Cdata_End            : constant String :=
85     "CDATA sections must end with ']]>'"; --  2.7
86   Error_Cdata_Unterminated   : constant String :=
87     "CDATA must be followed immediately by '['";
88   Error_Charref_Toplevel     : constant String :=
89     "Character references cannot appear at top-level";  --  2.1
90   Error_Charref_Invalid_Char : constant String :=
91     "Invalid character in character reference: "; -- 4.1
92   Error_Comment_End          : constant String :=
93     "Comments must end with '-->'";  --  2.5
94   Error_Comment_Unterminated : constant String :=
95     "Unterminated comment in stream";  --  WF
96   Error_Comment_Dash_Dash    : constant String :=
97     "'--' cannot appear in comments";  --  2.5
98   Error_Conditional_Location : constant String :=  --  3.4
99     "INCLUDE and IGNORE sections only allowed in the external DTD subset";
100   Error_Conditional_Syntax : constant String :=
101     "Conditional sections need '[' after INCLUDE or IGNORE";  --  3.4
102   Error_Content_Model_Closing_Paren : constant String :=
103     "Closing parenthesis must be followed by '*' in mixed content"; --  3.2.2
104   Error_Content_Model_Empty_List : constant String :=
105     "Invalid content model: list of choices cannot be empty";
106   Error_Content_Model_Expect_Operator : constant String :=
107     "Expecting operator in content model";
108   Error_Content_Model_Invalid : constant String :=
109     "Invalid content model";
110   Error_Content_Model_Invalid_Multiplier : constant String :=
111     "Invalid location for '+', '?' or '*' operators"; --  3.2.1
112   Error_Content_Model_Invalid_Name : constant String :=
113     "Invalid name in content model: ";
114   Error_Content_Model_Invalid_Seq : constant String :=
115     "Missing content particle in sequence"; --  3.2.1
116   Error_Content_Model_Invalid_Start : constant String :=
117     "Invalid content model, cannot start with #";
118   Error_Content_Model_Mixing : constant String :=
119     "Cannot mix ',' and '|' in content model";
120   Error_Content_Model_Nested_Groups : constant String :=
121     "Nested groups and occurrence operators not allowed in mixed content";
122   --  3.3.2
123   Error_Content_Model_Pcdata : constant String :=
124     "#PCDATA can only be used with '|' connectors"; --  3.2.2
125   Error_Content_Model_Pcdata_First : constant String :=
126     "#PCDATA must be first in list";  --  3.2.2
127   Error_Content_Model_Pcdata_Occurrence : constant String :=
128     "Occurrence on #PCDATA must be '*'"; --  3.2.2
129   Error_Entity_Definition  : constant String :=
130     "Invalid definition for ENTITY";
131   Error_Entity_Definition_Unterminated  : constant String :=
132     "Expecting end of ENTITY definition";
133   Error_Entity_Name        : constant String := "Invalid entity name"; --  4.1
134   Error_Entity_Not_Standalone    : constant String :=
135     "Entity declared in external subset, but document is standalone"; --  4.1
136   Error_Entity_Self_Ref  : constant String :=
137     "Entity cannot reference itself"; --  4.1
138   Error_Entity_Toplevel  : constant String :=
139     "Entity references cannot appear at top-level"; --  2.1
140   Error_Entity_Undefined : constant String := "Undefined entity"; --  4.1
141   Error_Entityref_Unterminated : constant String :=
142     "Entity references must end with ';'." & ASCII.LF
143       & "Did you want to use &amp;?"; --  4.1
144   Error_Entity_Nested : constant String :=
145     "Replacement text for entities must be properly nested"; --  3.2.1
146   Error_Entity_Self_Contained : constant String :=
147     "Entity values must be self-contained";  --  4.5 or 4.3.2
148   Error_Expecting_Space : constant String :=
149     "Expecting a space"; --  WF or 3.3
150   Error_External_Entity_Not_Found : constant String :=
151     "External entity not found: ";
152   Error_Invalid_Char    : constant String :=
153     "Invalid character code:";  --  2.2 or 4.1
154   Error_Invalid_Declaration : constant String := "Invalid declaration";
155   Error_Invalid_Encoding    : constant String := "Invalid character encoding";
156   Error_Invalid_Content_Model : constant String := "Invalid content model";
157   Error_Invalid_Language    : constant String :=
158     "Invalid language specification";  --  2.12
159   Error_Invalid_Name : constant String :=
160     "Invalid name: "; --  3.1
161   Error_Invalid_Notation_Decl : constant String :=
162     "Invalid notation declaration";  --  WF
163   Error_Invalid_Space         : constant String :=
164     "Value of xml:space must be (default|preserve)";  --  2.10
165   Error_Is_Name  : constant String := "Expecting a Name"; --  3.3.1
166   Error_Is_Ncname  : constant String :=
167     "Expecting a Name with no colon"; --  NS 6 and 3.3.1
168   Error_Missing_Operand       : constant String :=
169     "Missing operand before this operator";
170   Error_Mixed_Contents        : constant String :=
171     "Mixed contents cannot be used in a list or a sequence"; --  3.2.1
172   Error_Ndata_ParamEntity : constant String := --  4.2
173     "NDATA annotation not allowed for parameter entities";
174   Error_Ndata_Space : constant String := --  4.2.2
175     "Expecting space before NDATA declaration";
176   Error_Ndata_String : constant String :=
177     "Expecting string after NDATA";
178   Error_ParamEntity_In_Attribute : constant String :=
179     "Parameter entities cannot occur in attribute values";
180   --  WF PE in internal subset
181   Error_Notation_Undeclared : constant String :=
182     "Notation must be declared: "; --  VC 4.2.2 or 3.3.1
183   Error_Prefix_Not_Declared   : constant String :=
184     "Prefix must be declared before its use: ";   --  WF
185   Error_Public_String : constant String :=
186     "Expecting a string after PUBLIC";
187   Error_Public_Sysid : constant String :=
188     "Expecting SystemID after PUBLIC";
189   Error_Public_Sysid_Space : constant String :=
190     "Require whitespace between public and system IDs"; --  4.2.2
191   Error_Public_Invalid : constant String :=
192     "Invalid PubID character: ";
193   Error_System_String : constant String :=
194     "Expecting a string after SYSTEM";
195   Error_System_URI : constant String :=  --  4.2.2
196     "SYSTEM identifiers may not contain URI fragments starting with #";
197   Error_Unknown_Declaration   : constant String :=
198     "Unknown declaration in DTD"; --  WF
199   Error_Unexpected_Chars1     : constant String :=
200     "Invalid characters '<!-' in stream"; --  WF
201   Error_Unexpected_Chars2     : constant String :=
202     "Unexpected characters between ']' and '>' in the DTD";  --  2.8
203   Error_Unexpected_Chars3     : constant String :=
204     "Text may not contain the litteral ']]>'"; --  2.4
205   Error_Unterminated_String   : constant String :=
206     "Unterminated string";  --  2.3
207   Error_Unterminated_String_Suggests : constant String :=
208     "Unterminated string, possible end at "; --  2.3
209
210   ------------
211   -- Tokens --
212   ------------
213
214   type Token_Type is
215     (Double_String_Delimiter, --  "
216      Single_String_Delimiter, --  '
217      Comment,                 --  <!--...--> (Data is the comment)
218      Start_Of_Tag,            --  <
219      Start_Of_End_Tag,        --  </
220      End_Of_Start_Tag,        --  />
221      Start_Of_PI,             --  <?
222      End_Of_PI,               --  ?>
223      End_Of_Tag,              --  >
224      Equal,                   --  =  (in tags)
225      Colon,                   --  :  (in tags)
226      Open_Paren,              --  (  (while parsing content model in ATTLIST)
227      Internal_DTD_Start,      --  [  (while in DTD)
228      Internal_DTD_End,        --  ]  (while in DTD)
229      Include,                 --  <![INCLUDE[
230      Ignore,                  --  <![IGNORE[
231      Start_Conditional,       --  <![
232      End_Conditional,         --  ]]>
233      Space,                   --  Any number of spaces (Data is the spaces)
234      Text,                    --  any text  (Data is the identifier)
235      Name,                    --  same as text, but contains only valid
236      --  name characters
237      Char_Ref,                --  A character reference. Data is the character
238      Cdata_Section,           --  <![CDATA
239      Doctype_Start,           --  <!DOCTYPE
240      System,                  --  SYSTEM  (while in DTD)
241      Public,                  --  PUBLIC  (while in DTD)
242      Ndata,                   --  NDATA   (while in DTD)
243      Any,                     --  ANY (while in DTD)
244      Empty,                   --  EMPTY (while in DTD)
245      Notation,                --  NOTATION (while in DTD or ATTLIST)
246      Entity_Def,              --  <!ENTITY (while in DTD)
247      Element_Def,             --  <!ELEMENT (while in DTD)
248      Attlist_Def,             --  <!ATTLIST (while in DTD)
249      Id_Type,                 --  ID (while in ATTLIST)       Data is "ID"
250      Idref,                   --  IDREF (while in ATTLIST)    Data is "IDREF"
251      Idrefs,                  --  IDREFS (while in ATTLIST)   Data is "IDREFS"
252      Cdata,                   --  CDATA (while in ATTLIST)    Data is "CDATA"
253      Entity,                  --  ENTITY (while in ATTLIST)   Data is "ENTITY"
254      Entities,                --  ENTITIES (while in ATTLIST) Data="ENTITIES"
255      Nmtoken,                 --  NMTOKEN (while in ATTLIST)  Data="NMTOKEN"
256      Nmtokens,                --  NMTOKENS (while in ATTLIST) Data="NMTOKENS"
257      Required,                --  REQUIRED (while in ATTLIST) Data="#REQUIRED"
258      Implied,                 --  IMPLIED (while in ATTLIST)  Data="#IMPLIED"
259      Fixed,                   --  FIXED (while in ATTLIST)    Data="#FIXED"
260      End_Of_Input             --  End of input was seen.
261     );
262
263   type Token is record
264      Typ         : Token_Type;
265      First, Last : Natural;  --   Indexes in the buffer
266      Location    : Sax.Locators.Location;
267      From_Entity : Boolean;  --   Whether the characters come from the
268                              --   expansion of an entity.
269   end record;
270
271   Null_Token : constant Token := (End_Of_Input, 1, 0, No_Location, False);
272
273   Default_State : constant Parser_State :=
274     (Name => "Def",
275      Ignore_Special => False,
276      Detect_End_Of_PI => False,
277      Greater_Special => False,
278      Less_Special => False,
279      Expand_Param_Entities => False,
280      Expand_Entities => True,
281      Report_Character_Ref => False,
282      Expand_Character_Ref => True,
283      In_DTD => False,
284      Recognize_External => False,
285      Handle_Strings => False,
286      In_Tag => False,
287      Report_Parenthesis => False,
288      In_Attlist => False);
289   Attr_Value_State : constant Parser_State :=
290     (Name => "Att",
291      Ignore_Special => True,
292      Detect_End_Of_PI => False,
293      Greater_Special => False,
294      Less_Special => True,
295      Expand_Param_Entities => False,
296      Expand_Entities => True,
297      Report_Character_Ref => True,
298      Expand_Character_Ref => False,
299      In_DTD => False,
300      Recognize_External => False,
301      Handle_Strings => True,
302      In_Tag => False,
303      Report_Parenthesis => False,
304      In_Attlist => False);
305   Non_Interpreted_String_State : constant Parser_State :=
306     (Name => "Str",
307      Ignore_Special => True,
308      Detect_End_Of_PI => False,
309      Greater_Special => False,
310      Less_Special => False,
311      Expand_Param_Entities => False,
312      Expand_Entities => False,
313      Report_Character_Ref => False,
314      Expand_Character_Ref => False,
315      In_DTD => False,
316      Recognize_External => False,
317      Handle_Strings => True,
318      In_Tag => False,
319      Report_Parenthesis => False,
320      In_Attlist => False);
321   DTD_State : constant Parser_State :=
322     (Name => "DTD",
323      Ignore_Special => False,
324      Detect_End_Of_PI => False,
325      Greater_Special => True,
326      Less_Special => False,
327      Expand_Param_Entities => True,
328      Expand_Entities => True,
329      Report_Character_Ref => False,
330      Expand_Character_Ref => True,
331      In_DTD => True,
332      Recognize_External => True,
333      Handle_Strings => True,
334      In_Tag => False,
335      Report_Parenthesis => False,
336      In_Attlist => False);
337   PI_State : constant Parser_State :=
338     (Name => "PI ",
339      Ignore_Special => True,
340      Detect_End_Of_PI => True,
341      Greater_Special => False,
342      Less_Special => False,
343      Expand_Param_Entities => False,
344      Expand_Entities => False,
345      Report_Character_Ref => False,
346      Expand_Character_Ref => False,
347      In_DTD => False,
348      Recognize_External => False,
349      Handle_Strings => True,
350      In_Tag => False,
351      Report_Parenthesis => False,
352      In_Attlist => False);
353   Entity_Def_State : constant Parser_State :=
354     (Name => "Ent",
355      Ignore_Special => False,
356      Detect_End_Of_PI => False,
357      Greater_Special => True,
358      Less_Special => False,
359      Expand_Param_Entities => False,
360      Expand_Entities => False,
361      Report_Character_Ref => False,
362      Expand_Character_Ref => True,
363      In_DTD => True,
364      Recognize_External => True,
365      Handle_Strings => True,
366      In_Tag => False,
367      Report_Parenthesis => False,
368      In_Attlist => False);
369   Element_Def_State : constant Parser_State :=
370     (Name => "Ele",
371      Ignore_Special => False,
372      Detect_End_Of_PI => False,
373      Greater_Special => True,
374      Less_Special => False,
375      Expand_Param_Entities => True,
376      Expand_Entities => False,
377      Report_Character_Ref => False,
378      Expand_Character_Ref => True,
379      In_DTD => True,
380      Recognize_External => True,
381      Handle_Strings => True,
382      In_Tag => True,
383      Report_Parenthesis => True,
384      In_Attlist => False);
385   Attribute_Def_State : constant Parser_State :=
386     (Name => "AtD",
387      Ignore_Special => False,
388      Detect_End_Of_PI => False,
389      Greater_Special => True,
390      Less_Special => False,
391      Expand_Param_Entities => True,
392      Expand_Entities => False,
393      Report_Character_Ref => False,
394      Expand_Character_Ref => True,
395      In_DTD => True,
396      Recognize_External => False,
397      Handle_Strings => True,
398      In_Tag => True,
399      Report_Parenthesis => True,
400      In_Attlist => True);
401   Attribute_Def_Name_State : constant Parser_State :=
402     (Name => "ADN",
403      Ignore_Special => False,
404      Detect_End_Of_PI => False,
405      Greater_Special => True,
406      Less_Special => False,
407      Expand_Param_Entities => True,
408      Expand_Entities => False,
409      Report_Character_Ref => False,
410      Expand_Character_Ref => True,
411      In_DTD => True,
412      Recognize_External => False,
413      Handle_Strings => True,
414      In_Tag => True,
415      Report_Parenthesis => True,
416      In_Attlist => False);
417   Entity_Str_Def_State : constant Parser_State :=
418     (Name => "EtS",
419      Ignore_Special => True,
420      Detect_End_Of_PI => False,
421      Greater_Special => False,
422      Less_Special => False,
423      Expand_Param_Entities => True,
424      Expand_Entities => False,
425      Report_Character_Ref => False,
426      Expand_Character_Ref => True,
427      In_DTD => True,
428      Recognize_External => False,
429      Handle_Strings => True,
430      In_Tag => False,
431      Report_Parenthesis => False,
432      In_Attlist => False);
433   Attlist_Str_Def_State : constant Parser_State :=
434     (Name => "AtS",
435      Ignore_Special => True,
436      Detect_End_Of_PI => False,
437      Greater_Special => False,
438      Less_Special => False,
439      Expand_Param_Entities => False,
440      Expand_Entities => True,
441      Report_Character_Ref => False,
442      Expand_Character_Ref => True,
443      In_DTD => True,
444      Recognize_External => False,
445      Handle_Strings => True,
446      In_Tag => False,
447      Report_Parenthesis => False,
448      In_Attlist => False);
449   Tag_State : constant Parser_State :=
450     (Name => "Tag",
451      Ignore_Special => False,
452      Greater_Special => True,
453      Less_Special => False,
454      Detect_End_Of_PI => False,
455      Expand_Param_Entities => False,
456      Expand_Entities => False,
457      Report_Character_Ref => False,
458      Expand_Character_Ref => True,
459      In_DTD => False,
460      Recognize_External => False,
461      Handle_Strings => True,
462      In_Tag => True,
463      Report_Parenthesis => False,
464      In_Attlist => False);
465
466   --------------------------
467   -- Internal subprograms --
468   --------------------------
469
470   procedure Unchecked_Free is new Unchecked_Deallocation
471     (Input_Source'Class, Input_Source_Access);
472   procedure Unchecked_Free is new Unchecked_Deallocation
473     (Hook_Data'Class, Hook_Data_Access);
474   procedure Unchecked_Free is new Ada.Unchecked_Deallocation
475     (Sax_Attribute_Array, Sax_Attribute_Array_Access);
476
477   function Debug_Encode (C : Unicode_Char) return Byte_Sequence;
478   --  Return an encoded string matching C (matching Sax.Encodins.Encoding)
479
480   procedure Test_Valid_Char
481     (Parser : in out Sax_Reader'Class; C : Unicode_Char; Loc : Token);
482   --  Raise an error if C is not valid in XML. The error is reported at
483   --  location Loc.
484
485   function Is_Pubid_Char (C : Unicode_Char) return Boolean;
486   --  Return True if C is a valid character for a Public ID (2.3 specs)
487
488   procedure Test_Valid_Lang
489     (Parser : in out Sax_Reader'Class; Lang : Byte_Sequence);
490   --  Return True if Lang matches the rules for languages
491
492   procedure Test_Valid_Space
493     (Parser : in out Sax_Reader'Class; Space : Byte_Sequence);
494   --  Return True if Space matches the rules for the xml:space attribute
495
496   procedure Next_Char
497     (Input   : in out Input_Source'Class;
498      Parser  : in out Sax_Reader'Class);
499   --  Return the next character, and increments the locators.
500   --  If there are no more characters in the input streams, Parser is setup
501   --  so that End_Of_Stream (Parser) returns True.
502
503   procedure Lookup_Char
504     (Input   : in out Input_Source'Class;
505      Parser  : in out Sax_Reader'Class;
506      Char    : out Unicode_Char);
507   --  Lookup one character, but put it back in the input so that the next call
508   --  to Next_Char will return it again. This does not change
509   --  Parser.Last_Read.
510
511   function End_Of_Stream (Parser : Sax_Reader'Class) return Boolean;
512   pragma Inline (End_Of_Stream);
513   --  Return True if there are no more characters in the parser.
514   --  Note that this indicates that no more character remains to be read, and
515   --  is different from checking Eof on the current input (since for instance
516   --  a new input is open for an entity).
517
518   function Create_Attribute_List
519     (Attrs  : Sax_Attribute_List) return Sax.Attributes.Attributes;
520   --  Create the list of attributes from Parser.Attributes.
521   --  This function has the side effect of resetting
522   --  Parser.Attributes_Count to 0, and freeing memory as appropriate
523
524   procedure Put_In_Buffer
525     (Parser : in out Sax_Reader'Class; Char : Unicode_Char);
526   pragma Inline (Put_In_Buffer);
527
528   procedure Put_In_Buffer
529     (Parser : in out Sax_Reader'Class; Str : Byte_Sequence);
530   pragma Inline (Put_In_Buffer);
531   --  Put the last character read in the internal buffer
532
533   procedure Next_Token
534     (Input  : in out Input_Sources.Input_Source'Class;
535      Parser : in out Sax_Reader'Class;
536      Id     : out Token;
537      Coalesce_Space : Boolean := False);
538   --  Return the next identifier in the input stream.
539   --  Locator is modified accordingly (line and column).
540   --  If Coalesce_Space is True, then all the Name or Text tokens preceded or
541   --  followed by Space tokens are grouped together and returned as a single
542   --  Text token.
543   --  Id.Typ is set to End_Of_Input if there are no more token to be read.
544
545   procedure Next_Token_Skip_Spaces
546     (Input  : in out Input_Sources.Input_Source'Class;
547      Parser : in out Sax_Reader'Class;
548      Id     : out Token;
549      Must_Have : Boolean := False);
550   --  Same as Next_Token, except it skips spaces. If Must_Have is True,
551   --  then the first token read must be a space, or an error is raised
552   --  Id.Typ is set to End_Of_Input if there are no more token to be read.
553
554   procedure Next_NS_Token_Skip_Spaces
555     (Input   : in out Input_Sources.Input_Source'Class;
556      Parser  : in out Sax_Reader'Class;
557      NS_Id   : out Token;
558      Name_Id : out Token);
559   --  Skip spaces, if any, then read a "ns:name" or "name" token.
560
561   function Find_Symbol (Parser : Sax_Reader'Class; T : Token) return Symbol;
562   function Find_Symbol
563     (Parser : Sax_Reader'Class; First, Last : Token) return Symbol;
564   --  Return the value of the symbol
565
566   procedure Reset_Buffer
567     (Parser : in out Sax_Reader'Class; Id : Token := Null_Token);
568   --  Clears the internal buffer in Parser.
569   --  If Id is not Null_Token, then only the characters starting from
570   --  Id.First are removed
571
572   procedure Set_State
573     (Parser : in out Sax_Reader'Class; State : Parser_State);
574   --  Set the current state for the parser
575
576   function Get_State (Parser : Sax_Reader'Class) return Parser_State;
577   --  Return the current state.
578
579   procedure Close_Namespaces
580     (Parser : in out Sax_Reader'Class; List : XML_NS);
581   --  Close all namespaces in the list, and report appropriate SAX events
582
583   procedure Check_Valid_Name_Or_NCname
584     (Parser : in out Sax_Reader'Class;
585      Name   : Token);
586   --  Check that Name is a valid Name (if namespaces are not supported) or
587   --  a NCname if namespaces are supported.
588
589   procedure Check_Attribute_Value
590     (Parser     : in out Sax_Reader'Class;
591      Local_Name : Symbol;
592      Typ        : Attribute_Type;
593      Value      : Symbol;
594      Error_Loc  : Token);
595   --  Check Validity Constraints for a single attribute. Only call this
596   --  subprogram for a validating parser
597
598   procedure Syntactic_Parse
599     (Parser : in out Sax_Reader'Class;
600      Input  : in out Input_Sources.Input_Source'Class);
601   --  Internal syntactical parser.
602
603   procedure Find_NS
604     (Parser  : in out Sax_Reader'Class;
605      Prefix  : Token;
606      NS      : out XML_NS;
607      Include_Default_NS : Boolean := True);
608   --  Internal version of Find_NS
609
610   function Qname_From_Name
611     (Parser : Sax_Reader'Class; Prefix, Local_Name : Token)
612      return Byte_Sequence;
613   function Qname_From_Name (Prefix, Local_Name : Symbol) return Byte_Sequence;
614   --  Create the qualified name from the namespace URI and the local name.
615
616   procedure Add_Namespace
617     (Parser       : in out Sax_Reader'Class;
618      Node         : Element_Access;
619      Prefix       : Symbol;
620      URI          : Symbol;
621      Report_Event : Boolean := True);
622   --  Same as above, with strings
623
624   procedure Add_Namespace_No_Event
625     (Parser : in out Sax_Reader'Class;
626      Prefix, URI : Symbol);
627   --  Create a new default namespace in the parser
628
629   procedure Free (Parser : in out Sax_Reader'Class);
630   --  Free the memory allocated for the parser, including the namespaces,
631   --  entities,...
632
633   procedure Free (Elem : in out Element_Access);
634   --  Free the memory of Elem (and its contents). Note that this doesn't free
635   --  the parent of Elem).
636   --  On Exit, Elem is set to its parent.
637
638   procedure Parse_Element_Model
639     (Input   : in out Input_Sources.Input_Source'Class;
640      Parser  : in out Sax_Reader'Class;
641      Result  : out Element_Model_Ptr;
642      Attlist : Boolean := False;
643      Open_Was_Read : Boolean);
644   --  Parse the following characters in the stream so as to create an
645   --  element or attribute contents model, ie the tree matching an
646   --  expression like "(foo|bar)+".
647   --  Nmtokens should be true if the names in the model should follow the
648   --  Nmtoken rule in XML specifications rather than the Name rule.
649   --  If Open_Was_Read, then the opening parenthesis is considered to have
650   --  been read already and is automatically inserted into the stack.
651   --  Attlist should be set to true if this is the model in <!ELEMENT>
652
653   procedure Fatal_Error
654     (Parser : in out Sax_Reader'Class;
655      Msg    : String;
656      Loc    : Sax.Locators.Location := No_Location);
657   procedure Fatal_Error
658     (Parser : in out Sax_Reader'Class;
659      Msg    : String;
660      Loc    : Token);
661   --  Raises a fatal error.
662   --  The error is reported at location Id (or the current parser location
663   --  if Id is Null_Token).
664   --  The user application should not return from this call. Thus, a
665   --  Program_Error is raised if it does return.
666
667   procedure Error
668     (Parser : in out Sax_Reader'Class;
669      Msg    : String;
670      Loc    : Sax.Locators.Location);
671   procedure Error
672     (Parser : in out Sax_Reader'Class;
673      Msg    : String;
674      Id     : Token);
675   --  Same as Fatal_Error, but reports an error instead
676
677   procedure Warning
678     (Parser : in out Sax_Reader'Class;
679      Msg    : String;
680      Loc    : Sax.Locators.Location);
681   procedure Warning
682     (Parser : in out Sax_Reader'Class;
683      Msg    : String;
684      Id     : Token := Null_Token);
685   --  Same as Fatal_Error, but reports a warning instead
686
687   function Location
688     (Parser : Sax_Reader'Class;
689      Loc    : Sax.Locators.Location) return Byte_Sequence;
690   --  Return the location of the start of Id as a string.
691
692   function Resolve_URI
693     (Parser    : Sax_Reader'Class;
694      System_Id : Symbol;
695      URI       : Symbol) return Symbol;
696   --  Return a fully resolved URI, based on the system identifier set for
697   --  Machine, and URI.
698   --  [System_Id] should be the result of [System_Id (Parser)] at the time the
699   --  URI was found.
700
701   function System_Id (Parser : Sax_Reader'Class) return Symbol;
702   function Public_Id (Parser : Sax_Reader'Class) return Symbol;
703   pragma Inline (System_Id, Public_Id);
704   --  Return the current system id that we are parsing
705
706   procedure Close_Inputs
707     (Parser : in out Sax_Reader'Class;
708      Inputs : in out Entity_Input_Source_Access);
709   --  Close the inputs that have been completely read. This should be
710   --  called every time one starts an entity, so that calls to
711   --  Start_Entity/End_Entity are properly nested, and error messages
712   --  point to the right entity.
713
714   procedure Debug_Print (Parser : Sax_Reader'Class; Id : Token);
715   --  Print the contents of Id
716
717   -----------------
718   -- Find_Symbol --
719   -----------------
720
721   function Find_Symbol
722     (Parser : Sax_Reader'Class; Str : Byte_Sequence) return Symbol is
723   begin
724      return Find (Get (Parser.Symbols), Str);
725   end Find_Symbol;
726
727   -----------------
728   -- Find_Symbol --
729   -----------------
730
731   function Find_Symbol (Parser : Sax_Reader'Class; T : Token) return Symbol is
732   begin
733      return Find (Get (Parser.Symbols), Parser.Buffer (T.First .. T.Last));
734   end Find_Symbol;
735
736   -----------------
737   -- Find_Symbol --
738   -----------------
739
740   function Find_Symbol
741     (Parser : Sax_Reader'Class; First, Last : Token) return Symbol is
742   begin
743      return Find (Get (Parser.Symbols),
744                   Parser.Buffer (First.First .. Last.Last));
745   end Find_Symbol;
746
747   -------------------
748   -- End_Of_Stream --
749   -------------------
750
751   function End_Of_Stream (Parser : Sax_Reader'Class) return Boolean is
752   begin
753      return not Parser.Last_Read_Is_Valid
754        and Parser.Last_Read = 16#FFFF#;
755   end End_Of_Stream;
756
757   ------------------
758   -- Debug_Encode --
759   ------------------
760
761   function Debug_Encode (C : Unicode_Char) return Byte_Sequence is
762      Buffer : Byte_Sequence (1 .. 20);
763      Index  : Natural := Buffer'First - 1;
764   begin
765      Encoding.Encode (C, Buffer, Index);
766      return Buffer (Buffer'First .. Index);
767   end Debug_Encode;
768
769   ---------------
770   -- System_Id --
771   ---------------
772
773   function System_Id (Parser : Sax_Reader'Class) return Symbol is
774   begin
775      if Parser.Inputs = null then
776         return Parser.System_Id;
777      else
778         return Parser.Inputs.System_Id;
779      end if;
780   end System_Id;
781
782   ---------------
783   -- Public_Id --
784   ---------------
785
786   function Public_Id (Parser : Sax_Reader'Class) return Symbol is
787   begin
788      if Parser.Inputs = null then
789         return Parser.Public_Id;
790      else
791         return Parser.Inputs.Public_Id;
792      end if;
793   end Public_Id;
794
795   ----------
796   -- Free --
797   ----------
798
799   procedure Free (Elem : in out Element_Access) is
800      procedure Free_Element is new Unchecked_Deallocation
801        (Element, Element_Access);
802      Tmp : constant Element_Access := Elem.Parent;
803   begin
804      Free (Elem.Namespaces);
805      Free_Element (Elem);
806      Elem := Tmp;
807   end Free;
808
809   ---------------------------
810   -- Create_Attribute_List --
811   ---------------------------
812
813   function Create_Attribute_List
814     (Attrs  : Sax_Attribute_List) return Sax.Attributes.Attributes
815   is
816      function Get_Or_Null (S : Symbol) return String;
817      function Get_Or_Null (S : Symbol) return String is
818      begin
819         if S = No_Symbol then
820            return "";
821         else
822            return Get (S).all;
823         end if;
824      end Get_Or_Null;
825
826      Attributes : Sax.Attributes.Attributes;
827   begin
828      for J in 1 .. Attrs.Count loop
829         Add_Attribute
830           (Attr       => Attributes,
831            URI        => Get_Or_Null (Attrs.List (J).URI),
832            Local_Name => Get (Attrs.List (J).Local_Name).all,
833            Qname      =>
834              Qname_From_Name
835                (Prefix     => Attrs.List (J).Prefix,
836                 Local_Name => Attrs.List (J).Local_Name),
837            Att_Type   => Attrs.List (J).Att_Type,
838            Content    => Unknown_Model, --  not needed anyway
839            Value      => Get (Attrs.List (J).Value).all,
840            Default_Decl => Attrs.List (J).Default_Decl);
841      end loop;
842
843      return Attributes;
844
845   exception
846      when others =>
847         Clear (Attributes);
848         raise;
849   end Create_Attribute_List;
850
851   -----------------
852   -- Resolve_URI --
853   -----------------
854
855   function Resolve_URI
856     (Parser    : Sax_Reader'Class;
857      System_Id : Symbol;
858      URI       : Symbol) return Symbol
859   is
860      C : Unicode_Char;
861      URI_Str : constant Cst_Byte_Sequence_Access := Get (URI);
862      URI_Index : Positive := URI_Str'First;
863   begin
864      pragma Assert (URI /= No_Symbol);
865
866      if URI = Empty_String then
867         return System_Id;
868      end if;
869
870      --  ??? Only resolve paths for now
871      Encoding.Read (URI_Str.all, URI_Index, C);
872      if C = Solidus then
873         return URI;
874      else
875         declare
876            System_Str : constant Cst_Byte_Sequence_Access := Get (System_Id);
877            Index : Natural := System_Str'First;
878            Basename_Start : Natural := System_Str'First;
879         begin
880            while Index <= System_Str'Last loop
881               Encoding.Read (System_Str.all, Index, C);
882               if C = Solidus or else C = Reverse_Solidus then
883                  Basename_Start := Index;
884               end if;
885            end loop;
886            return Find_Symbol
887              (Parser,
888               System_Str (System_Str'First .. Basename_Start - 1)
889               & URI_Str.all);
890         end;
891      end if;
892   end Resolve_URI;
893
894   --------------
895   -- Location --
896   --------------
897
898   function Location (Parser : Sax_Reader'Class; Loc : Sax.Locators.Location)
899      return Byte_Sequence
900   is
901      Line : constant Byte_Sequence := Natural'Image (Loc.Line);
902      Col : constant Byte_Sequence := Natural'Image (Loc.Column);
903   begin
904      if Parser.Close_Inputs = null then
905         if Use_Basename_In_Error_Messages (Parser) then
906            return Base_Name (Get (Get_Public_Id (Parser.Locator)).all) & ':'
907              & Line (Line'First + 1 .. Line'Last)
908              & ':' & Col (Col'First + 1 .. Col'Last);
909         else
910            return Get (Get_Public_Id (Parser.Locator)).all & ':'
911              & Line (Line'First + 1 .. Line'Last)
912              & ':' & Col (Col'First + 1 .. Col'Last);
913         end if;
914      else
915         if Use_Basename_In_Error_Messages (Parser) then
916            return Base_Name (Get_Public_Id (Parser.Close_Inputs.Input.all))
917              & ':' & Line (Line'First + 1 .. Line'Last)
918              & ':' & Col (Col'First + 1 .. Col'Last);
919         else
920            return Get_Public_Id (Parser.Close_Inputs.Input.all) & ':'
921              & Line (Line'First + 1 .. Line'Last)
922              & ':' & Col (Col'First + 1 .. Col'Last);
923         end if;
924      end if;
925   end Location;
926
927   -----------------
928   -- Fatal_Error --
929   -----------------
930
931   procedure Fatal_Error
932     (Parser  : in out Sax_Reader'Class;
933      Msg     : String;
934      Loc     : Sax.Locators.Location := No_Location)
935   is
936      Id2  : Sax.Locators.Location := Loc;
937   begin
938      if Id2 = No_Location then
939         Id2 := Parser.Current_Location;
940      end if;
941      Parser.Buffer_Length := 0;
942
943      --  So that when calling Close_Inputs, we do generate an End_Entity
944      Parser.State.Ignore_Special := True;
945
946      begin
947         --  Must be called before End_Document, as per the SAX standard
948         Fatal_Error
949           (Parser, Create (Location (Parser, Id2) & ": " & Msg, Id2));
950         End_Document (Parser);
951      exception
952         when E : others =>
953            begin
954               End_Document (Parser);
955            exception
956               when others => null;
957            end;
958
959            --  Priority is given to the Fatal_Error, whatever
960            --  End_Document raises
961            Reraise_Occurrence (E);
962      end;
963
964      raise Program_Error;
965   end Fatal_Error;
966
967   -----------------
968   -- Fatal_Error --
969   -----------------
970
971   procedure Fatal_Error
972     (Parser : in out Sax_Reader'Class;
973      Msg    : String;
974      Loc    : Token) is
975   begin
976      Fatal_Error (Parser, Msg, Loc.Location);
977   end Fatal_Error;
978
979   -----------
980   -- Error --
981   -----------
982
983   procedure Error
984     (Parser : in out Sax_Reader'Class;
985      Msg    : String;
986      Loc    : Sax.Locators.Location)
987   is
988      Id2  : Sax.Locators.Location := Loc;
989   begin
990      if Id2 = No_Location then
991         Id2 := Parser.Current_Location;
992      end if;
993      Error (Parser, Create (Location (Parser, Id2) & ": " & Msg, Id2));
994   end Error;
995
996   procedure Error
997     (Parser : in out Sax_Reader'Class;
998      Msg    : String;
999      Id     : Token) is
1000   begin
1001      Error (Parser, Msg, Id.Location);
1002   end Error;
1003
1004   -----------
1005   -- Error --
1006   -----------
1007
1008   procedure Error (Parser  : in out Sax_Reader'Class; Msg : String) is
1009   begin
1010      Error (Parser, Msg, No_Location);
1011   end Error;
1012
1013   -------------
1014   -- Warning --
1015   -------------
1016
1017   procedure Warning
1018     (Parser : in out Sax_Reader'Class;
1019      Msg    : String;
1020      Loc    : Sax.Locators.Location)
1021   is
1022      Id2  : Sax.Locators.Location := Loc;
1023   begin
1024      if Id2 = No_Location then
1025         Id2 := Parser.Current_Location;
1026      end if;
1027      Warning (Parser, Create (Location (Parser, Id2) & ": " & Msg, Id2));
1028   end Warning;
1029
1030   procedure Warning
1031     (Parser : in out Sax_Reader'Class;
1032      Msg    : String;
1033      Id     : Token := Null_Token)
1034   is
1035      Id2 : Sax.Locators.Location := Id.Location;
1036   begin
1037      if Id2 = No_Location then
1038         Id2 := Parser.Current_Location;
1039      end if;
1040
1041      Warning (Parser, Create (Location (Parser, Id2) & ": " & Msg, Id2));
1042   end Warning;
1043
1044   -----------------
1045   -- Lookup_Char --
1046   -----------------
1047
1048   procedure Lookup_Char
1049     (Input   : in out Input_Source'Class;
1050      Parser  : in out Sax_Reader'Class;
1051      Char    : out Unicode_Char)
1052   is
1053   begin
1054      if Parser.Inputs /= null then
1055         if Eof (Parser.Inputs.Input.all) then
1056            if Debug_Input then
1057               Put_Line ("++Input Lookup_Char: <at end of stream>");
1058            end if;
1059            Char := Unicode_Char'Last;
1060         else
1061            Input_Sources.Next_Char (Parser.Inputs.Input.all, Char);
1062         end if;
1063      else
1064         if Eof (Input) then
1065            if Debug_Input then
1066               Put_Line ("++Input Lookup_Char 2: <at end of stream>");
1067            end if;
1068            Char := Unicode_Char'Last;
1069         else
1070            Input_Sources.Next_Char (Input, Char);
1071         end if;
1072      end if;
1073
1074      if Debug_Input then
1075         Put_Line ("++Input Lookup_Char: " & Unicode_Char'Image (Char));
1076      end if;
1077
1078      Parser.Lookup_Char := Char;
1079   end Lookup_Char;
1080
1081   ---------------
1082   -- Next_Char --
1083   ---------------
1084
1085   procedure Next_Char
1086     (Input   : in out Input_Source'Class;
1087      Parser  : in out Sax_Reader'Class)
1088   is
1089      procedure Internal (Stream : in out Input_Source'Class);
1090      pragma Inline (Internal);
1091
1092      --------------
1093      -- Internal --
1094      --------------
1095
1096      procedure Internal (Stream : in out Input_Source'Class) is
1097         C : Unicode_Char;
1098      begin
1099         if Parser.Lookup_Char /= Unicode_Char'Last then
1100            C := Parser.Lookup_Char;
1101            Parser.Lookup_Char := Unicode_Char'Last;
1102         else
1103            Next_Char (Stream, C);
1104         end if;
1105
1106         --  XML specs say that #xD#xA must be converted to one single #xA.
1107         --  A single #xD must be converted to one single #xA
1108
1109         if C = Carriage_Return then
1110            Parser.Previous_Char_Was_CR := True;
1111
1112            --  When expanding an internal entity, do not normalize the
1113            --  character (which has already been normalized when creating the
1114            --  entity, and therefore comes from a &#13; character ref
1115            if Parser.Inputs = null
1116              or else Parser.Inputs.External
1117            then
1118               Parser.Last_Read := Line_Feed;
1119            else
1120               Parser.Last_Read := Carriage_Return;
1121            end if;
1122
1123         elsif C = Line_Feed and then Parser.Previous_Char_Was_CR then
1124            Parser.Previous_Char_Was_CR := False;
1125
1126            --  When expanding an internal entity, do not strip the CRLF
1127            --  sequences: since they have already been stripped when the
1128            --  entity was created, the sequences that remain were created
1129            --  through character references &#13;&#10; and should therefore
1130            --  be kept as is.
1131            if Parser.Inputs = null
1132              or else Parser.Inputs.External
1133            then
1134               Next_Char (Stream, Parser);
1135            end if;
1136
1137         else
1138            Parser.Last_Read := C;
1139
1140            if Parser.Feature_Test_Valid_Chars then
1141               Test_Valid_Char (Parser, Parser.Last_Read, Null_Token);
1142            end if;
1143         end if;
1144      end Internal;
1145
1146      Input_A : Entity_Input_Source_Access;
1147
1148   begin
1149      --  First thing is to take into account location changes due to the
1150      --  previous character.
1151      if Parser.Last_Read_Is_Valid then
1152         if Parser.Last_Read = Line_Feed
1153           and then not Parser.Previous_Char_Was_CR
1154         then
1155            Set_Column_Number (Parser.Locator, 0);
1156            Increase_Line_Number (Parser.Locator);
1157         end if;
1158
1159      elsif Parser.Inputs /= null then
1160         Set_Location (Parser.Locator, Parser.Inputs.Save_Loc);
1161
1162         if Parser.Inputs.External then
1163            Parser.In_External_Entity := False;
1164            --  ??? Should test whether we are still in an external entity.
1165            --  However, this is only used for the <?xml?> PI, and at this
1166            --  point we have already read and discarded it, so it doesn't
1167            --  really matter.
1168         end if;
1169
1170         --  Insert the closed input at the end of the Close_Input list, so
1171         --  that the next call to Next_Token properly closes the entity.
1172         --  This can not be done here, otherwise End_Entity is called too
1173         --  early, and the error messages do not point to the right entity.
1174         if Parser.Close_Inputs = null then
1175            Parser.Close_Inputs := Parser.Inputs;
1176         else
1177            Input_A := Parser.Close_Inputs;
1178            while Input_A.Next /= null loop
1179               Input_A := Input_A.Next;
1180            end loop;
1181            Input_A.Next := Parser.Inputs;
1182         end if;
1183
1184         Input_A := Parser.Inputs;
1185         Parser.Inputs := Parser.Inputs.Next;
1186         Input_A.Next := null;
1187      end if;
1188
1189      --  Read the text of the entity if there is any
1190
1191      if Parser.Inputs /= null then
1192         if Parser.Inputs.Input = null
1193           or else Eof (Parser.Inputs.Input.all)
1194         then
1195            if Debug_Input then
1196               Put_Line ("++Input END OF INPUT");
1197            end if;
1198
1199            Parser.Last_Read := Unicode_Char'Val (16#00#);
1200            Parser.Last_Read_Is_Valid := False;
1201            return;
1202         end if;
1203
1204         Parser.Last_Read_Is_Valid := True;
1205         Increase_Column_Number (Parser.Locator);
1206         Internal (Parser.Inputs.Input.all);
1207
1208      --  Else read from the initial input stream
1209      elsif Eof (Input) then
1210         if Debug_Input then
1211            Put_Line
1212              ("++Input " & To_String (Parser.Locator) & " END_OF_INPUT");
1213         end if;
1214         Parser.Last_Read := 16#FFFF#;
1215         Parser.Last_Read_Is_Valid := False;
1216
1217      else
1218         Parser.Last_Read_Is_Valid := True;
1219         Increase_Column_Number (Parser.Locator);
1220         Internal (Input);
1221      end if;
1222
1223      if Debug_Input and then Parser.Last_Read_Is_Valid then
1224         Put ("++Input " & To_String (Parser.Locator)
1225              & "(" & Unicode_Char'Image (Parser.Last_Read) & ")= ");
1226         if Parser.Last_Read /= Line_Feed then
1227            Put_Line (Debug_Encode (Parser.Last_Read));
1228         else
1229            Put_Line ("Line_Feed");
1230         end if;
1231      end if;
1232
1233   exception
1234      when Unicode.CES.Invalid_Encoding =>
1235         Fatal_Error (Parser, Error_Invalid_Encoding);
1236   end Next_Char;
1237
1238   -------------------
1239   -- Put_In_Buffer --
1240   -------------------
1241
1242   procedure Put_In_Buffer
1243     (Parser : in out Sax_Reader'Class; Char : Unicode_Char)
1244   is
1245      W : constant Natural := Encoding.Width (Char);
1246      Tmp : Byte_Sequence_Access;
1247   begin
1248      --  Loop until we have enough memory to store the string
1249      while Parser.Buffer_Length + W > Parser.Buffer'Last loop
1250         Tmp := Parser.Buffer;
1251         Parser.Buffer := new Byte_Sequence
1252           (1 .. Tmp'Length * 2);
1253         Parser.Buffer (1 .. Tmp'Length) := Tmp.all;
1254         Free (Tmp);
1255      end loop;
1256
1257      Encoding.Encode (Char, Parser.Buffer.all, Parser.Buffer_Length);
1258   end Put_In_Buffer;
1259
1260   -------------------
1261   -- Put_In_Buffer --
1262   -------------------
1263
1264   procedure Put_In_Buffer
1265     (Parser : in out Sax_Reader'Class; Str : Byte_Sequence)
1266   is
1267      Tmp : Byte_Sequence_Access;
1268   begin
1269      --  Loop until we have enough memory to store the string
1270      while Parser.Buffer_Length + Str'Length > Parser.Buffer'Last loop
1271         Tmp := Parser.Buffer;
1272         Parser.Buffer := new Byte_Sequence (1 .. Tmp'Length * 2);
1273         Parser.Buffer (1 .. Tmp'Length) := Tmp.all;
1274         Free (Tmp);
1275      end loop;
1276
1277      Parser.Buffer
1278        (Parser.Buffer_Length + 1 .. Parser.Buffer_Length + Str'Length) := Str;
1279      Parser.Buffer_Length := Parser.Buffer_Length + Str'Length;
1280   end Put_In_Buffer;
1281
1282   ---------------------
1283   -- Test_Valid_Lang --
1284   ---------------------
1285
1286   procedure Test_Valid_Lang
1287     (Parser : in out Sax_Reader'Class; Lang : Byte_Sequence) is
1288   begin
1289      --  XML Errata 41: An empty xml:lang attribute is valid
1290      if Lang /= "" and then not Is_Valid_Language_Name (Lang) then
1291         Error (Parser, Error_Invalid_Language);
1292      end if;
1293   end Test_Valid_Lang;
1294
1295   ----------------------
1296   -- Test_Valid_Space --
1297   ----------------------
1298
1299   procedure Test_Valid_Space
1300     (Parser : in out Sax_Reader'Class; Space : Byte_Sequence) is
1301   begin
1302      if Space /= Default_Sequence
1303        and then Space /= Preserve_Sequence
1304      then
1305         Error (Parser, Error_Invalid_Space);
1306      end if;
1307   end Test_Valid_Space;
1308
1309   -------------------
1310   -- Is_Pubid_Char --
1311   -------------------
1312
1313   function Is_Pubid_Char (C : Unicode_Char) return Boolean is
1314   begin
1315      return C = Unicode.Names.Basic_Latin.Space
1316        or else C = Line_Feed
1317        or else C in Latin_Small_Letter_A .. Latin_Small_Letter_Z
1318        or else C in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z
1319        or else C in Digit_Zero .. Digit_Nine
1320        or else C = Hyphen_Minus
1321        or else C = Apostrophe
1322        or else C = Left_Parenthesis
1323        or else C = Right_Parenthesis
1324        or else C = Plus_Sign
1325        or else C = Comma
1326        or else C = Full_Stop
1327        or else C = Solidus
1328        or else C = Unicode.Names.Basic_Latin.Colon
1329        or else C = Equals_Sign
1330        or else C = Question_Mark
1331        or else C = Semicolon
1332        or else C = Exclamation_Mark
1333        or else C = Asterisk
1334        or else C = Number_Sign
1335        or else C = Commercial_At
1336        or else C = Dollar_Sign
1337        or else C = Low_Line
1338        or else C = Percent_Sign;
1339   end Is_Pubid_Char;
1340
1341   ---------------------
1342   -- Test_Valid_Char --
1343   ---------------------
1344
1345   procedure Test_Valid_Char
1346     (Parser : in out Sax_Reader'Class; C : Unicode_Char; Loc : Token)
1347   is
1348      Id : Sax.Locators.Location;
1349   begin
1350      if not (C = 16#9#
1351              or else C = 16#A#
1352              or else C = 16#D#
1353              or else C in Unicode.Names.Basic_Latin.Space .. 16#D7FF#
1354              or else C in 16#E000# .. 16#FFFD#
1355              or else C in 16#10000# .. 16#10FFFF#)
1356      then
1357         if Loc /= Null_Token then
1358            Id := Loc.Location;
1359         else
1360            Id := No_Location;
1361            Id.Line := Get_Line_Number (Parser.Locator);
1362            Id.Column := Get_Column_Number (Parser.Locator);
1363         end if;
1364         Fatal_Error (Parser, Error_Invalid_Char & Unicode_Char'Image (C), Id);
1365      end if;
1366   end Test_Valid_Char;
1367
1368   -------------
1369   -- Find_NS --
1370   -------------
1371
1372   procedure Find_NS
1373     (Parser  : in out Sax_Reader'Class;
1374      Prefix  : Token;
1375      NS      : out XML_NS;
1376      Include_Default_NS : Boolean := True) is
1377   begin
1378      Find_NS
1379        (Parser,
1380         Find_Symbol (Parser, Parser.Buffer (Prefix.First .. Prefix.Last)),
1381         NS, Include_Default_NS);
1382      if NS = No_XML_NS then
1383         Fatal_Error
1384           (Parser, Error_Prefix_Not_Declared &
1385            Parser.Buffer (Prefix.First .. Prefix.Last));
1386      end if;
1387   end Find_NS;
1388
1389   -------------
1390   -- Find_NS --
1391   -------------
1392
1393   procedure Find_NS
1394     (Parser             : Sax_Reader'Class;
1395      Prefix             : Sax.Symbols.Symbol;
1396      NS                 : out XML_NS;
1397      Include_Default_NS : Boolean := True)
1398   is
1399      E : Element_Access := Parser.Current_Node;
1400   begin
1401      loop
1402         if E = null then
1403            NS := Find_NS_In_List
1404              (Parser.Default_Namespaces, Prefix, Include_Default_NS, False);
1405         else
1406            NS := Find_NS_In_List
1407              (E.Namespaces, Prefix, Include_Default_NS, True);
1408         end if;
1409
1410         exit when NS /= No_XML_NS or else E = null;
1411         E := E.Parent;
1412      end loop;
1413   end Find_NS;
1414
1415   ----------------------
1416   -- Find_NS_From_URI --
1417   ----------------------
1418
1419   procedure Find_NS_From_URI
1420     (Parser             : in out Sax_Reader'Class;
1421      URI                : Symbol;
1422      NS                 : out XML_NS)
1423   is
1424      E      : Element_Access := Parser.Current_Node;
1425   begin
1426      loop
1427         --  Search in the default namespaces
1428         if E = null then
1429            NS := Find_NS_From_URI_In_List (Parser.Default_Namespaces, URI);
1430         else
1431            NS := Find_NS_From_URI_In_List (E.Namespaces, URI);
1432         end if;
1433
1434         exit when NS /= No_XML_NS or else E = null;
1435         E := E.Parent;
1436      end loop;
1437   end Find_NS_From_URI;
1438
1439   ---------------------
1440   -- Qname_From_Name --
1441   ---------------------
1442
1443   function Qname_From_Name
1444     (Parser : Sax_Reader'Class; Prefix, Local_Name : Token)
1445      return Byte_Sequence is
1446   begin
1447      if Prefix = Null_Token then
1448         return Parser.Buffer (Local_Name.First .. Local_Name.Last);
1449      else
1450         return Parser.Buffer (Prefix.First .. Prefix.Last)
1451           & Colon_Sequence
1452           & Parser.Buffer (Local_Name.First .. Local_Name.Last);
1453      end if;
1454   end Qname_From_Name;
1455
1456   ---------------------
1457   -- Qname_From_Name --
1458   ---------------------
1459
1460   function Qname_From_Name
1461     (Prefix, Local_Name : Symbol) return Byte_Sequence is
1462   begin
1463      if Prefix = No_Symbol or else Prefix = Empty_String then
1464         return Get (Local_Name).all;
1465      else
1466         return Get (Prefix).all & Colon_Sequence & Get (Local_Name).all;
1467      end if;
1468   end Qname_From_Name;
1469
1470   -----------------------
1471   -- Prefix_From_Qname --
1472   -----------------------
1473
1474   function Prefix_From_Qname (Qname : Byte_Sequence) return Byte_Sequence is
1475      Index    : Natural := Qname'First;
1476      C        : Unicode_Char;
1477      Previous : Natural;
1478   begin
1479      while Index <= Qname'Last loop
1480         Previous := Index;
1481         Encoding.Read (Qname, Index, C);
1482         if C = Unicode.Names.Basic_Latin.Colon then
1483            return Qname (Qname'First .. Previous - 1);
1484         end if;
1485      end loop;
1486      return "";
1487   end Prefix_From_Qname;
1488
1489   ----------------------------
1490   -- Add_Namespace_No_Event --
1491   ----------------------------
1492
1493   procedure Add_Namespace_No_Event
1494     (Parser : in out Sax_Reader'Class;
1495      Prefix, URI : Symbol) is
1496   begin
1497      Add_Namespace (Parser, null, Prefix, URI, Report_Event => False);
1498   end Add_Namespace_No_Event;
1499
1500   -------------------
1501   -- Add_Namespace --
1502   -------------------
1503
1504   procedure Add_Namespace
1505     (Parser       : in out Sax_Reader'Class;
1506      Node         : Element_Access;
1507      Prefix       : Symbol;
1508      URI          : Symbol;
1509      Report_Event : Boolean := True)
1510   is
1511      Same_As : XML_NS := No_XML_NS;
1512   begin
1513      --  Was there a previous definition of this namespace ?
1514      Find_NS_From_URI (Parser, URI, Same_As);
1515
1516      if Node = null then
1517         Add_NS_To_List (Parser.Default_Namespaces, Same_As, Prefix, URI);
1518      else
1519         Add_NS_To_List (Node.Namespaces, Same_As, Prefix, URI);
1520      end if;
1521
1522      if Report_Event then
1523         Start_Prefix_Mapping (Parser, Prefix => Prefix, URI => URI);
1524      end if;
1525   end Add_Namespace;
1526
1527   ------------------
1528   -- Close_Inputs --
1529   ------------------
1530
1531   procedure Close_Inputs
1532     (Parser : in out Sax_Reader'Class;
1533      Inputs : in out Entity_Input_Source_Access)
1534   is
1535      procedure Free is new Unchecked_Deallocation
1536        (Entity_Input_Source, Entity_Input_Source_Access);
1537      Input_A : Entity_Input_Source_Access;
1538   begin
1539      while Inputs /= null loop
1540         --  ??? Could use Input_Sources.Locator.Free
1541         if Inputs.Input /= null then
1542            Close (Inputs.Input.all);
1543            Unchecked_Free (Inputs.Input);
1544         end if;
1545
1546         --  not in string context
1547         if not Parser.State.Ignore_Special then
1548            End_Entity (Parser, Inputs.Name);
1549         end if;
1550
1551         Input_A := Inputs;
1552         Inputs := Inputs.Next;
1553         Free (Input_A);
1554      end loop;
1555   end Close_Inputs;
1556
1557   -----------------
1558   -- Debug_Print --
1559   -----------------
1560
1561   procedure Debug_Print (Parser : Sax_Reader'Class; Id : Token) is
1562   begin
1563      Put ("++Lex (" & Parser.State.Name & ") at "
1564           & To_String (Parser.Locator)
1565           & " (" & Token_Type'Image (Id.Typ) & ") at "
1566           & To_String (Id.Location));
1567      if Parser.State.Ignore_Special then
1568         Put (" (in string)");
1569      end if;
1570
1571      if Id.Typ = Space then
1572         declare
1573            J : Natural := Id.First;
1574            C : Unicode_Char;
1575         begin
1576            Put (" --");
1577            while J <= Id.Last loop
1578               Encoding.Read (Parser.Buffer.all, J, C);
1579               Put (Unicode_Char'Image (C));
1580            end loop;
1581            Put ("--");
1582         end;
1583
1584      elsif Id.Last >= Id.First then
1585         Put (" --" & Parser.Buffer (Id.First .. Id.Last) & "--");
1586      end if;
1587
1588      Put_Line
1589        (" buffer="
1590         & Parser.Buffer (Parser.Buffer'First .. Parser.Buffer_Length)
1591         & "--");
1592   end Debug_Print;
1593
1594   ----------------
1595   -- Next_Token --
1596   ----------------
1597
1598   procedure Next_Token
1599     (Input          : in out Input_Source'Class;
1600      Parser         : in out Sax_Reader'Class;
1601      Id             : out Token;
1602      Coalesce_Space : Boolean := False)
1603   is
1604      function Looking_At (Str : Byte_Sequence) return Boolean;
1605      --  True if the next characters read (including the current one) in the
1606      --  stream match Str. Characters read are stored in the buffer
1607
1608      procedure Handle_Comments;
1609      --  <!- has been seen in the buffer, check if this is a comment and
1610      --  handle it appropriately. The first character after '<!-' has
1611      --  already been read on calling this subprogram.
1612      --  Raise an error message when the end of the input stream is seen.
1613
1614      procedure Handle_Character_Ref;
1615      --  '&#' has been seen in the buffer, check if this is a character
1616      --  entity reference and handle it appropriately
1617
1618      procedure Handle_Less_Than_Sign;
1619      --  Handle '<', '<!', '<!--', '<![',... sequences
1620
1621      procedure Handle_Entity_Ref;
1622      --  '&' has been read (as well as the following character). Skips till
1623      --  the end of the entity, ie ';'. Saves the name of the entity in the
1624      --  buffer.
1625      --  Parser.Last_Read is left to ';', but it is not put in the buffer.
1626
1627      ----------------
1628      -- Looking_At --
1629      ----------------
1630
1631      function Looking_At (Str : Byte_Sequence) return Boolean is
1632         C : Unicode_Char;
1633         Index : Natural := Str'First;
1634      begin
1635         while Index <= Str'Last loop
1636            Encoding.Read (Str, Index, C);
1637
1638            if C /= Parser.Last_Read
1639              or else not Parser.Last_Read_Is_Valid
1640            then
1641               return False;
1642            end if;
1643            Put_In_Buffer (Parser, Parser.Last_Read);
1644            Next_Char (Input, Parser);
1645         end loop;
1646         return True;
1647      end Looking_At;
1648
1649      ---------------------
1650      -- Handle_Comments --
1651      ---------------------
1652
1653      procedure Handle_Comments is
1654      begin
1655         if not Eof (Input) then
1656            Next_Char (Input, Parser);
1657            if Parser.Last_Read = Hyphen_Minus then
1658               Id.Typ := Comment;  --  In case we reach the eof in the loop
1659               --  Note that if the file ends exactly with '<!--', we get
1660               --  an empty text. But at least we will detect the error.
1661               --  It also fails if we have a non-terminated comment and the
1662               --  last character in the file is '-'. Doesn't seem worth
1663               --  paying the cost for some extra tests to handle this.
1664               loop
1665                  Next_Char (Input, Parser);
1666                  if End_Of_Stream (Parser) then
1667                     Fatal_Error (Parser, Error_Comment_End, Id);
1668                     Id.Typ := End_Of_Input;
1669                     return;
1670
1671                  elsif Parser.Last_Read = Hyphen_Minus then
1672                     Next_Char (Input, Parser);
1673                     if End_Of_Stream (Parser) then
1674                        Fatal_Error (Parser, Error_Comment_Unterminated);
1675                        Id.Typ := End_Of_Input;
1676                        return;
1677
1678                     elsif Parser.Last_Read = Hyphen_Minus then
1679                        if Parser.Last_Read_Is_Valid then
1680                           Next_Char (Input, Parser);
1681                           if Parser.Last_Read = Greater_Than_Sign then
1682                              exit;
1683                           end if;
1684                        end if;
1685                        Parser.Buffer_Length := Id.First - 1;
1686                        Id.Location.Line := Get_Line_Number (Parser.Locator);
1687                        Id.Location.Column :=
1688                          Get_Column_Number (Parser.Locator) - 2;
1689                        --  2 = 2 * Hyphen_Minus
1690                        Fatal_Error (Parser, Error_Comment_Dash_Dash, Id);
1691                     else
1692                        Put_In_Buffer (Parser, Hyphen_Minus);
1693                        Put_In_Buffer (Parser, Parser.Last_Read);
1694                     end if;
1695                  else
1696                     Put_In_Buffer (Parser, Parser.Last_Read);
1697                  end if;
1698               end loop;
1699
1700               if Parser.Feature_Validation
1701                 and then System_Id (Parser) /= Id.Location.System_Id
1702               then
1703                  Error (Parser, Error_Entity_Self_Contained, Id);
1704               end if;
1705
1706               Next_Char (Input, Parser);
1707               return;
1708            end if;
1709         end if;
1710         Fatal_Error (Parser, Error_Unexpected_Chars1);
1711         Id.Typ := End_Of_Input;
1712      end Handle_Comments;
1713
1714      --------------------------
1715      -- Handle_Character_Ref --
1716      --------------------------
1717
1718      procedure Handle_Character_Ref is
1719         Val : Unicode_Char := 0;
1720      begin
1721         if Parser.State.Expand_Character_Ref then
1722            Id.Typ := Text;
1723         else
1724            Id.Typ := Char_Ref;
1725         end if;
1726
1727         if Parser.Current_Node = null
1728           and then Parser.State.Name = Default_State.Name
1729         then
1730            Fatal_Error (Parser, Error_Charref_Toplevel, Id);
1731         end if;
1732
1733         Next_Char (Input, Parser);
1734         if Parser.Last_Read = Latin_Small_Letter_X then
1735            Next_Char (Input, Parser);
1736
1737            while Parser.Last_Read_Is_Valid
1738              and then Parser.Last_Read /= Semicolon
1739            loop
1740               if Parser.Last_Read in Digit_Zero .. Digit_Nine then
1741                  Val := Val * 16 + Parser.Last_Read - Digit_Zero;
1742
1743               elsif Parser.Last_Read in
1744                 Latin_Capital_Letter_A .. Latin_Capital_Letter_F
1745               then
1746                  Val := Val * 16 + Parser.Last_Read - Latin_Capital_Letter_A
1747                    + 10;
1748
1749               elsif Parser.Last_Read in
1750                 Latin_Small_Letter_A .. Latin_Small_Letter_F
1751               then
1752                  Val := Val * 16 + Parser.Last_Read - Latin_Small_Letter_A
1753                    + 10;
1754
1755               else
1756                  Id.Location.Line := Get_Line_Number (Parser.Locator);
1757                  Id.Location.Column := Get_Column_Number (Parser.Locator);
1758                  Fatal_Error
1759                    (Parser, Error_Charref_Invalid_Char
1760                     & Debug_Encode (Parser.Last_Read), Id);
1761               end if;
1762               Next_Char (Input, Parser);
1763            end loop;
1764         else
1765            while Parser.Last_Read_Is_Valid
1766              and then Parser.Last_Read /= Semicolon
1767            loop
1768               if Parser.Last_Read in Digit_Zero .. Digit_Nine then
1769                  Val := Val * 10 + Parser.Last_Read - Digit_Zero;
1770               else
1771                  Id.Location.Line := Get_Line_Number (Parser.Locator);
1772                  Id.Location.Column := Get_Column_Number (Parser.Locator);
1773                  Fatal_Error
1774                    (Parser, Error_Charref_Invalid_Char
1775                     & Debug_Encode (Parser.Last_Read), Id);
1776               end if;
1777               Next_Char (Input, Parser);
1778            end loop;
1779         end if;
1780
1781         if Parser.Feature_Test_Valid_Chars then
1782            Test_Valid_Char (Parser, Val, Id);
1783         end if;
1784         Put_In_Buffer (Parser, Val);
1785         Next_Char (Input, Parser);
1786         Id.From_Entity := True;
1787      end Handle_Character_Ref;
1788
1789      ---------------------------
1790      -- Handle_Less_Than_Sign --
1791      ---------------------------
1792
1793      procedure Handle_Less_Than_Sign is
1794         Num_Closing_Bracket : Natural;
1795         Id2 : Token;
1796      begin
1797         Id.Typ := Start_Of_Tag;
1798         Next_Char (Input, Parser);
1799         case Parser.Last_Read is
1800            when Solidus =>
1801               Id.Typ := Start_Of_End_Tag;
1802               Next_Char (Input, Parser);
1803
1804            when Exclamation_Mark =>
1805               Next_Char (Input, Parser);
1806               if Parser.Last_Read = Hyphen_Minus then
1807                  Handle_Comments;
1808
1809               elsif Looking_At (Doctype_Sequence) then
1810                  Reset_Buffer (Parser, Id);
1811                  Id.Typ := Doctype_Start;
1812
1813               elsif Parser.Last_Read = Left_Square_Bracket then
1814                  Next_Char (Input, Parser);
1815
1816                  if Parser.Last_Read = Latin_Capital_Letter_C then
1817
1818                     if not Looking_At (Cdata_Sequence) then
1819                        Fatal_Error (Parser, Error_Invalid_Declaration, Id);
1820                     end if;
1821
1822                     if Parser.Last_Read /= Left_Square_Bracket then
1823                        Fatal_Error (Parser, Error_Cdata_Unterminated, Id);
1824                     end if;
1825
1826                     Reset_Buffer (Parser, Id);
1827                     Id.Typ := Cdata_Section;
1828                     Num_Closing_Bracket := 1;
1829                     loop
1830                        Next_Char (Input, Parser);
1831
1832                        if End_Of_Stream (Parser) then
1833                           Id.Typ := End_Of_Input;
1834                           Fatal_Error (Parser, Error_Cdata_End, Id);
1835                           return;
1836
1837                        elsif Parser.Last_Read_Is_Valid then
1838                           Put_In_Buffer (Parser, Parser.Last_Read);
1839
1840                           if Parser.Last_Read = Right_Square_Bracket then
1841                              Num_Closing_Bracket := Num_Closing_Bracket + 1;
1842
1843                           elsif Parser.Last_Read = Greater_Than_Sign
1844                             and then Num_Closing_Bracket >= 2
1845                           then
1846                              Parser.Buffer_Length := Parser.Buffer_Length
1847                                - 2 * Encoding.Width (Right_Square_Bracket)
1848                                - Encoding.Width (Greater_Than_Sign);
1849                              exit;
1850
1851                           else
1852                              Num_Closing_Bracket := 0;
1853                           end if;
1854                        end if;
1855                     end loop;
1856
1857                     if Id.Location.System_Id /= System_Id (Parser) then
1858                        Fatal_Error (Parser, Error_Entity_Self_Contained, Id);
1859                     end if;
1860
1861                     if not Eof (Input) then
1862                        Next_Char (Input, Parser);
1863                     else
1864                        Parser.Last_Read := 16#FFFF#;
1865                     end if;
1866
1867                  else
1868                     while Is_White_Space (Parser.Last_Read) loop
1869                        Next_Char (Input, Parser);
1870                     end loop;
1871
1872                     if Parser.Last_Read = Latin_Capital_Letter_I
1873                       or else Parser.Last_Read = Percent_Sign
1874                     then
1875                        --  Skip spaces: if we are expending a parameter
1876                        --  entity, it must start with spaces (4.4.8)
1877                        Next_Token_Skip_Spaces (Input, Parser, Id2);
1878                        if Parser.Buffer (Id2.First .. Id2.Last) =
1879                          Include_Sequence
1880                        then
1881                           Reset_Buffer (Parser, Id2);
1882                           Id.Typ := Include;
1883                        elsif Parser.Buffer (Id2.First .. Id2.Last) =
1884                          Ignore_Sequence
1885                        then
1886                           Reset_Buffer (Parser, Id2);
1887                           Id.Typ := Ignore;
1888                        else
1889                           Fatal_Error (Parser, Error_Invalid_Declaration, Id);
1890                        end if;
1891
1892                        if not Parser.State.In_DTD
1893                          or else not Parser.In_External_Entity
1894                        then
1895                           Fatal_Error
1896                             (Parser, Error_Conditional_Location, Id);
1897                        end if;
1898
1899                        Next_Token_Skip_Spaces (Input, Parser, Id2);
1900                        if Id2.Typ /= Internal_DTD_Start then
1901                           Fatal_Error (Parser, Error_Conditional_Syntax, Id2);
1902                        end if;
1903
1904                     elsif Parser.State.In_DTD then
1905                        Id.Typ := Start_Conditional;
1906                     else
1907                        Fatal_Error (Parser, Error_Unexpected_Chars1, Id);
1908                     end if;
1909                  end if;
1910
1911               elsif not Parser.State.In_DTD then
1912                  Fatal_Error (Parser, Error_Unexpected_Chars1, Id);
1913
1914               elsif Looking_At (Attlist_Sequence)
1915               --  Since parameter entities are expanded with spaces, we can
1916               --  have one following ATTLIST immediately
1917                 and then (Is_White_Space (Parser.Last_Read)
1918                           or else Parser.Last_Read = Percent_Sign)
1919               then
1920                  Reset_Buffer (Parser, Id);
1921                  Id.Typ := Attlist_Def;
1922
1923               elsif Parser.Last_Read = Latin_Capital_Letter_E then
1924                  Next_Char (Input, Parser);
1925                  if Looking_At (Ntity_Sequence) then
1926                     Reset_Buffer (Parser, Id);
1927                     Id.Typ := Entity_Def;
1928
1929                  elsif Looking_At (Element_Sequence) then
1930                     Reset_Buffer (Parser, Id);
1931                     Id.Typ := Element_Def;
1932
1933                  else
1934                     Fatal_Error (Parser, Error_Unknown_Declaration);
1935                  end if;
1936
1937               elsif Looking_At (Notation_Sequence)
1938               --  Since parameter entities are expanded with spaces, we can
1939               --  have one following NOTATION immediately
1940                 and then (Is_White_Space (Parser.Last_Read)
1941                           or else Parser.Last_Read = Percent_Sign)
1942               then
1943                  Reset_Buffer (Parser, Id);
1944                  Id.Typ := Notation;
1945
1946               else
1947                  Put_In_Buffer (Parser, Less_Than_Sign);
1948                  Put_In_Buffer (Parser, Exclamation_Mark);
1949                  Id.Typ := Text;
1950               end if;
1951
1952            when Question_Mark =>
1953               Id.Typ := Start_Of_PI;
1954               Next_Char (Input, Parser);
1955
1956            when others => null;
1957         end case;
1958      end Handle_Less_Than_Sign;
1959
1960      -----------------------
1961      -- Handle_Entity_Ref --
1962      -----------------------
1963
1964      procedure Handle_Entity_Ref is
1965      begin
1966         if not Parser.Last_Read_Is_Valid
1967           or else Is_Valid_Name_Startchar
1968             (Parser.Last_Read, Parser.XML_Version)
1969         then
1970            while Parser.Last_Read_Is_Valid
1971              and then Parser.Last_Read /= Semicolon
1972              and then Is_Valid_Name_Char
1973                (Parser.Last_Read, Parser.XML_Version)
1974            loop
1975               Put_In_Buffer (Parser, Parser.Last_Read);
1976               Next_Char (Input, Parser);
1977            end loop;
1978
1979            if not Parser.Last_Read_Is_Valid
1980              or else System_Id (Parser) /= Id.Location.System_Id
1981            then
1982               Fatal_Error (Parser, Error_Entity_Self_Contained, Id);
1983            end if;
1984
1985            if Parser.Last_Read /= Semicolon then
1986               Fatal_Error (Parser, Error_Entityref_Unterminated, Id);
1987            end if;
1988
1989            Id.From_Entity := True;
1990
1991         else
1992            Fatal_Error (Parser, Error_Entity_Name, Id);
1993         end if;
1994      end Handle_Entity_Ref;
1995
1996      type Entity_Ref is (None, Entity, Param_Entity);
1997      Is_Entity_Ref : Entity_Ref := None;
1998      Old_System_Id : Symbol;
1999   begin
2000      if not Parser.Last_Read_Is_Valid then
2001         Next_Char (Input, Parser);
2002      end if;
2003
2004      Id.First := Parser.Buffer_Length + 1;
2005      Id.Last := Parser.Buffer_Length;
2006      Id.Typ := End_Of_Input;
2007      Id.Location.System_Id := System_Id (Parser);
2008      Id.Location.Public_Id := Public_Id (Parser);
2009      Id.Location.Line      := Get_Line_Number (Parser.Locator);
2010      Id.Location.Column    := Get_Column_Number (Parser.Locator);
2011      Id.From_Entity := False;
2012
2013      Close_Inputs (Parser, Parser.Close_Inputs);
2014
2015      if Eof (Input) and then Parser.Last_Read = 16#FFFF# then
2016         Id.Location.Column := Id.Location.Column + 1;
2017         return;
2018      end if;
2019
2020      if Is_White_Space (Parser.Last_Read) then
2021         Id.Typ := Space;
2022         loop
2023            Put_In_Buffer (Parser, Parser.Last_Read);
2024            Next_Char (Input, Parser);
2025            exit when not Is_White_Space (Parser.Last_Read);
2026         end loop;
2027
2028      --  If we are ignoring special characters
2029      elsif Id.Typ = End_Of_Input
2030        and then (Parser.Ignore_State_Special
2031                  or else Parser.State.Ignore_Special)
2032        and then not Parser.State.Detect_End_Of_PI
2033      then
2034         Id.Typ := Text;
2035         Parser.Ignore_State_Special := True;
2036         while Parser.Last_Read_Is_Valid loop
2037            exit when Parser.Last_Read = Ampersand
2038              and then (Parser.State.Expand_Entities
2039                        or else Parser.State.Expand_Character_Ref);
2040            exit when Parser.Last_Read = Percent_Sign
2041              and then Parser.State.Expand_Param_Entities;
2042            exit when (Parser.Last_Read = Apostrophe
2043                       or else Parser.Last_Read = Quotation_Mark)
2044              and then Parser.State.Handle_Strings
2045              and then (Parser.Inputs = null
2046                        or else Parser.Inputs.Handle_Strings);
2047            exit when Parser.Last_Read = Less_Than_Sign
2048              and then Parser.State.Less_Special;
2049            Put_In_Buffer (Parser, Parser.Last_Read);
2050            Next_Char (Input, Parser);
2051         end loop;
2052      end if;
2053
2054      --  If we haven't found a non-empty token yet
2055      if Id.Typ = End_Of_Input
2056        or else Id.First > Parser.Buffer_Length
2057      then
2058         case Parser.Last_Read is
2059            when Less_Than_Sign =>
2060               if Parser.State.Less_Special then
2061                  Id.Typ := Start_Of_Tag;
2062                  Next_Char (Input, Parser);
2063               elsif Parser.State.Detect_End_Of_PI then
2064                  Put_In_Buffer (Parser, Parser.Last_Read);
2065                  Id.Typ := Text;
2066                  Next_Char (Input, Parser);
2067               else
2068                  Handle_Less_Than_Sign;
2069               end if;
2070
2071            when Question_Mark =>
2072               if Eof (Input) then
2073                  Put_In_Buffer (Parser, Parser.Last_Read);
2074                  Id.Typ := Text;
2075               else
2076                  Next_Char (Input, Parser);
2077                  if Parser.Last_Read = Greater_Than_Sign then
2078                     Id.Typ := End_Of_PI;
2079                     Next_Char (Input, Parser);
2080                  elsif Parser.Last_Read = Question_Mark then
2081                     Put_In_Buffer (Parser, Question_Mark);
2082                     Id.Typ := Text;
2083                  else
2084                     Put_In_Buffer (Parser, Question_Mark);
2085                     Id.Typ := Text;
2086                  end if;
2087               end if;
2088
2089            when Greater_Than_Sign =>
2090               if Parser.State.Greater_Special then
2091                  Id.Typ := End_Of_Tag;
2092               else
2093                  Put_In_Buffer (Parser, Parser.Last_Read);
2094                  Id.Typ := Text;
2095               end if;
2096               Next_Char (Input, Parser);
2097
2098            when Equals_Sign =>
2099               if Parser.State.In_Tag then
2100                  Id.Typ := Equal;
2101               else
2102                  Put_In_Buffer (Parser, Parser.Last_Read);
2103                  Id.Typ := Text;
2104               end if;
2105               Next_Char (Input, Parser);
2106
2107            when Unicode.Names.Basic_Latin.Colon =>
2108               if Parser.State.In_Tag then
2109                  if Parser.Feature_Namespace then
2110                     Id.Typ := Colon;
2111                  else
2112                     Put_In_Buffer (Parser, Parser.Last_Read);
2113                     Id.Typ := Name;
2114                  end if;
2115               else
2116                  Put_In_Buffer (Parser, Parser.Last_Read);
2117                  Id.Typ := Text;
2118               end if;
2119               Next_Char (Input, Parser);
2120
2121            when Ampersand =>
2122               Id.Typ := Text; --  So that eof would at least report an error
2123               if Eof (Input)
2124                 and then Parser.State.Expand_Entities
2125               then
2126                  Fatal_Error (Parser, Error_Entityref_Unterminated, Id);
2127               end if;
2128
2129               Next_Char (Input, Parser);
2130               if Parser.Last_Read = Number_Sign
2131                 and then (Parser.State.Expand_Character_Ref
2132                           or Parser.State.Report_Character_Ref)
2133               then
2134                  Handle_Character_Ref;
2135                  if System_Id (Parser) /= Id.Location.System_Id then
2136                     Fatal_Error (Parser, Error_Entity_Self_Contained, Id);
2137                  end if;
2138
2139               elsif Parser.Last_Read /= Number_Sign
2140                 and then Parser.State.Expand_Entities
2141               then
2142                  Handle_Entity_Ref;
2143                  Is_Entity_Ref := Entity;
2144
2145               elsif Parser.Last_Read /= Number_Sign
2146                 and then Parser.State.Ignore_Special   --  string context
2147                 and then not Parser.State.Detect_End_Of_PI  --  not in PI
2148               then
2149                  --  Inside a string (entity value), we still need to check
2150                  --  that the '&' marks the beginning of an entity reference.
2151                  Put_In_Buffer (Parser, Ampersand);
2152                  Handle_Entity_Ref;
2153                  Put_In_Buffer (Parser, Parser.Last_Read);
2154                  Next_Char (Input, Parser);
2155
2156               else
2157                  Put_In_Buffer (Parser, Ampersand);
2158               end if;
2159
2160            when Percent_Sign =>
2161               Put_In_Buffer (Parser, Parser.Last_Read);
2162               Id.Typ := Text;
2163
2164               Next_Char (Input, Parser);
2165               if Parser.State.Expand_Param_Entities then
2166                  while Parser.Last_Read /= Semicolon
2167                    and then Is_Valid_Name_Char
2168                      (Parser.Last_Read, Parser.XML_Version)
2169                  loop
2170                     Put_In_Buffer (Parser, Parser.Last_Read);
2171                     Next_Char (Input, Parser);
2172                  end loop;
2173
2174                  if Parser.Last_Read /= Semicolon then
2175                     Fatal_Error (Parser, Error_Entityref_Unterminated);
2176                  end if;
2177                  Is_Entity_Ref := Param_Entity;
2178               end if;
2179
2180            when Quotation_Mark =>
2181               if Parser.State.Handle_Strings then
2182                  Id.Typ := Double_String_Delimiter;
2183                  Next_Char (Input, Parser);
2184               else
2185                  Id.Typ := Text;
2186                  Put_In_Buffer (Parser, Parser.Last_Read);
2187                  Next_Char (Input, Parser);
2188               end if;
2189
2190            when Apostrophe =>
2191               if Parser.State.Handle_Strings then
2192                  Id.Typ := Single_String_Delimiter;
2193                  Next_Char (Input, Parser);
2194               else
2195                  Id.Typ := Text;
2196                  Put_In_Buffer (Parser, Parser.Last_Read);
2197                  Next_Char (Input, Parser);
2198               end if;
2199
2200            when Left_Square_Bracket =>
2201               if Parser.State.In_DTD then
2202                  Id.Typ := Internal_DTD_Start;
2203               else
2204                  Put_In_Buffer (Parser, Parser.Last_Read);
2205                  Id.Typ := Text;
2206               end if;
2207               Next_Char (Input, Parser);
2208
2209            when Right_Square_Bracket =>
2210               if Parser.State.In_DTD
2211                 and then not Parser.In_External_Entity
2212               then
2213                  Id.Typ := Internal_DTD_End;
2214                  loop
2215                     Next_Char (Input, Parser);
2216                     exit when Parser.Last_Read = Greater_Than_Sign;
2217
2218                     if Parser.Last_Read_Is_Valid
2219                       and then not Is_White_Space (Parser.Last_Read)
2220                     then
2221                        Fatal_Error (Parser, Error_Unexpected_Chars2, Id);
2222                     end if;
2223                  end loop;
2224                  Next_Char (Input, Parser);
2225
2226               --  In string context ?
2227               elsif Parser.State.Ignore_Special then
2228                  Id.Typ := Text;
2229                  Put_In_Buffer (Parser, Parser.Last_Read);
2230                  Next_Char (Input, Parser);
2231
2232               else
2233                  declare
2234                     Num_Bracket : Natural := 1;
2235                  begin
2236                     Id.Typ := Text;
2237
2238                     loop
2239                        Put_In_Buffer (Parser, Parser.Last_Read);
2240                        Next_Char (Input, Parser);
2241
2242                        if Parser.Last_Read = Right_Square_Bracket then
2243                           Num_Bracket := Num_Bracket + 1;
2244
2245                        elsif Num_Bracket >= 2
2246                          and Parser.Last_Read = Greater_Than_Sign
2247                        then
2248                           if Parser.State.In_DTD
2249                             and then Parser.In_External_Entity
2250                           then
2251                              Id.Typ := End_Conditional;
2252                              Reset_Buffer (Parser, Id);
2253                              Next_Char (Input, Parser);
2254                              exit;
2255                           else
2256                              Id.Location.Column :=
2257                                Id.Location.Column + Num_Bracket - 2;
2258                              Fatal_Error
2259                                (Parser, Error_Unexpected_Chars3, Id);
2260                           end if;
2261                        else
2262                           exit;
2263                        end if;
2264                     end loop;
2265                  end;
2266               end if;
2267
2268            when Solidus =>
2269               Id.Typ := Text;
2270               Next_Char (Input, Parser);
2271               if Parser.State.Greater_Special
2272                 and then Parser.Last_Read = Greater_Than_Sign
2273               then
2274                  Id.Typ := End_Of_Start_Tag;
2275                  Next_Char (Input, Parser);
2276               else
2277                  Put_In_Buffer (Parser, Solidus);
2278               end if;
2279
2280            when others =>
2281               if Parser.State.Recognize_External then
2282
2283                  if Parser.Last_Read = Latin_Capital_Letter_A then
2284                     if Looking_At (Any_Sequence) then
2285                        Reset_Buffer (Parser, Id);
2286                        Id.Typ := Any;
2287                     else
2288                        Id.Typ := Name;
2289                     end if;
2290
2291                  elsif Parser.Last_Read = Latin_Capital_Letter_E then
2292                     if Looking_At (Empty_Sequence) then
2293                        Reset_Buffer (Parser, Id);
2294                        Id.Typ := Empty;
2295                     else
2296                        Id.Typ := Name;
2297                     end if;
2298
2299                  elsif Parser.Last_Read = Latin_Capital_Letter_N then
2300                     if Looking_At (Ndata_Sequence) then
2301                        Reset_Buffer (Parser, Id);
2302                        Id.Typ := Ndata;
2303                     else
2304                        Id.Typ := Name;
2305                     end if;
2306
2307                  elsif Parser.Last_Read = Latin_Capital_Letter_P then
2308                     if Looking_At (Public_Sequence) then
2309                        Reset_Buffer (Parser, Id);
2310                        Id.Typ := Public;
2311                     else
2312                        Id.Typ := Name;
2313                     end if;
2314
2315                  elsif Parser.Last_Read = Latin_Capital_Letter_S then
2316                     if Looking_At (System_Sequence) then
2317                        Reset_Buffer (Parser, Id);
2318                        Id.Typ := System;
2319                     else
2320                        Id.Typ := Name;
2321                     end if;
2322                  end if;
2323               end if;
2324
2325               if Parser.State.Report_Parenthesis
2326                 and then Parser.Last_Read = Left_Parenthesis
2327               then
2328                  Reset_Buffer (Parser, Id);
2329                  Id.Typ := Open_Paren;
2330                  Next_Char (Input, Parser);
2331                  return;
2332               end if;
2333
2334               if Parser.State.In_Attlist then
2335                  if Parser.Last_Read = Latin_Capital_Letter_C then
2336                     if Looking_At (Cdata_Sequence) then
2337                        Id.Typ := Cdata;
2338                     else
2339                        Id.Typ := Name;
2340                     end if;
2341
2342                  elsif Parser.Last_Read = Latin_Capital_Letter_E
2343                    and then Looking_At (Entit_Sequence)
2344                  then
2345                     if Looking_At (Ies_Sequence) then
2346                        Id.Typ := Entities;
2347                     elsif Parser.Last_Read = Latin_Capital_Letter_Y then
2348                        Id.Typ := Entity;
2349                        Put_In_Buffer (Parser, Parser.Last_Read);
2350                        Next_Char (Input, Parser);
2351                     else
2352                        Fatal_Error (Parser, Error_Attlist_Type);
2353                     end if;
2354
2355                  elsif Parser.Last_Read = Latin_Capital_Letter_I
2356                    and then Looking_At (Id_Sequence)
2357                  then
2358                     if Looking_At (Ref_Sequence) then
2359                        if Parser.Last_Read = Latin_Capital_Letter_S then
2360                           Id.Typ := Idrefs;
2361                           Put_In_Buffer (Parser, Parser.Last_Read);
2362                           Next_Char (Input, Parser);
2363                        else
2364                           Id.Typ := Idref;
2365                        end if;
2366                     else
2367                        Id.Typ := Id_Type;
2368                     end if;
2369
2370                  elsif Parser.Last_Read = Latin_Capital_Letter_N then
2371                     Next_Char (Input, Parser);
2372                     if Looking_At (Mtoken_Sequence) then
2373                        if Parser.Last_Read = Latin_Capital_Letter_S then
2374                           Id.Typ := Nmtokens;
2375                           Next_Char (Input, Parser);
2376                        else
2377                           Id.Typ := Nmtoken;
2378                        end if;
2379                     elsif Looking_At (Otation_Sequence) then
2380                        Id.Typ := Notation;
2381                     else
2382                        Fatal_Error (Parser, Error_Attlist_Type);
2383                     end if;
2384
2385                  elsif Parser.Last_Read = Number_Sign then
2386                     Put_In_Buffer (Parser, Parser.Last_Read);
2387                     Next_Char (Input, Parser);
2388                     if Looking_At (Implied_Sequence) then
2389                        Id.Typ := Implied;
2390                     elsif Looking_At (Required_Sequence) then
2391                        Id.Typ := Required;
2392                     elsif Looking_At (Fixed_Sequence) then
2393                        Id.Typ := Fixed;
2394                     else
2395                        Fatal_Error (Parser, Error_Attlist_DefaultDecl);
2396                     end if;
2397                  end if;
2398               end if;
2399         end case;
2400
2401         --  try to coalesce as many things as possible into a single
2402         --  text event
2403         if Id.Typ = End_Of_Input then
2404            if Is_Valid_Name_Startchar (Parser.Last_Read, Parser.XML_Version)
2405              or else Parser.Last_Read = Low_Line
2406            then
2407               Id.Typ := Name;
2408               Put_In_Buffer (Parser, Parser.Last_Read);
2409               Next_Char (Input, Parser);
2410            else
2411               Id.Typ := Text;
2412            end if;
2413         end if;
2414
2415         if Id.Typ = Name and then not Coalesce_Space then
2416            while
2417              (Parser.Last_Read /= Unicode.Names.Basic_Latin.Colon
2418               or else not Parser.Feature_Namespace)
2419              and then
2420                Is_Valid_NCname_Char (Parser.Last_Read, Parser.XML_Version)
2421            loop
2422               Put_In_Buffer (Parser, Parser.Last_Read);
2423               Next_Char (Input, Parser);
2424            end loop;
2425
2426         elsif Is_Entity_Ref = None
2427           and then (Id.Typ = Text
2428                     or else (Coalesce_Space and then Id.Typ = Name))
2429         then
2430            if not Parser.Last_Read_Is_Valid then
2431               Next_Char (Input, Parser);
2432
2433            else
2434               loop
2435                  if Is_White_Space (Parser.Last_Read) then
2436                     exit when not Coalesce_Space;
2437
2438                  else
2439                     case Parser.Last_Read is
2440                     when Greater_Than_Sign =>
2441                        exit when Parser.State.Greater_Special;
2442
2443                     when Less_Than_Sign             --  Start of new tag
2444                        | Ampersand                  --  for Entities
2445                        | Right_Square_Bracket       --  for CData   ]]>
2446                        | Quotation_Mark             --  for attributes a="..."
2447                        | Apostrophe                 --  for attributes a='...'
2448                        | Equals_Sign =>             --  for attributes
2449                        exit;
2450
2451                     when Solidus =>                   --  For <NODE/>
2452                        declare
2453                           C : Unicode_Char;
2454                        begin
2455                           Lookup_Char (Input, Parser, C);
2456                           exit when C = Greater_Than_Sign
2457                             or else Id.Typ = Name;
2458                        end;
2459
2460                     when Percent_Sign =>
2461                        exit when Parser.State.Expand_Param_Entities;
2462
2463                     when Question_Mark =>
2464                        exit when Parser.State.Detect_End_Of_PI;
2465
2466                     when others =>
2467                        null;
2468                     end case;
2469                  end if;
2470
2471                  Put_In_Buffer (Parser, Parser.Last_Read);
2472                  Next_Char (Input, Parser);
2473                  exit when not Parser.Last_Read_Is_Valid;
2474               end loop;
2475            end if;
2476         end if;
2477
2478         Parser.Ignore_State_Special := False;
2479      end if;
2480
2481      if Coalesce_Space and then Id.Typ = Space then
2482         --  First character is necessarily not a space, so we'll change the
2483         --  type of the token to text
2484         declare
2485            Save_Length : constant Natural := Parser.Buffer_Length;
2486         begin
2487            while Parser.Last_Read_Is_Valid
2488              and then (not Parser.State.Greater_Special
2489                   or else Parser.Last_Read /= Greater_Than_Sign)
2490              and then Parser.Last_Read /= Less_Than_Sign
2491              and then Parser.Last_Read /= Ampersand
2492              and then (not Parser.State.Expand_Param_Entities
2493                        or else Parser.Last_Read /= Percent_Sign)
2494              and then Parser.Last_Read /= Equals_Sign
2495              and then Parser.Last_Read /= Quotation_Mark
2496              and then Parser.Last_Read /= Right_Square_Bracket
2497              and then Parser.Last_Read /= Apostrophe
2498              and then Parser.Last_Read /= Solidus
2499              and then (Parser.Last_Read /= Question_Mark
2500                        or else not Parser.State.Detect_End_Of_PI)
2501            loop
2502               Put_In_Buffer (Parser, Parser.Last_Read);
2503               Next_Char (Input, Parser);
2504            end loop;
2505
2506            --  Special case for ']': since the parser needs to detect whether
2507            --  this is the beginning of ']]>', this will be done in the next
2508            --  call to Next_Token. However, we shouldn't report the spaces as
2509            --  Ignorable_Whitespace in this case.
2510
2511            if Parser.Last_Read = Right_Square_Bracket
2512              or else Parser.Buffer_Length /= Save_Length
2513            then
2514               Id.Typ := Text;
2515            end if;
2516         end;
2517      end if;
2518
2519      Id.Last := Parser.Buffer_Length;
2520
2521      if Debug_Lexical then
2522         Debug_Print (Parser, Id);
2523      end if;
2524
2525      --  Internal entities should be processes inline
2526
2527      if Is_Entity_Ref /= None then
2528         declare
2529            N : constant Symbol := Find_Symbol (Parser, Id);
2530            V : constant Entity_Entry_Access := Get (Parser.Entities, N);
2531         begin
2532            Reset_Buffer (Parser, Id);
2533            if N = Parser.Lt_Sequence then
2534               Put_In_Buffer (Parser, Less_Than_Sign);
2535               Id.Typ := Text;
2536               Id.Last := Parser.Buffer_Length;
2537               Next_Char (Input, Parser);
2538
2539            elsif N = Parser.Gt_Sequence then
2540               Put_In_Buffer (Parser, Greater_Than_Sign);
2541               Id.Typ := Text;
2542               Id.Last := Parser.Buffer_Length;
2543               Next_Char (Input, Parser);
2544
2545            elsif N = Parser.Amp_Sequence then
2546               Put_In_Buffer (Parser, Ampersand);
2547               Id.Typ := Text;
2548               Id.Last := Parser.Buffer_Length;
2549               Next_Char (Input, Parser);
2550
2551            elsif N = Parser.Apos_Sequence then
2552               Put_In_Buffer (Parser, Apostrophe);
2553               Id.Typ := Text;
2554               Id.Last := Parser.Buffer_Length;
2555               Next_Char (Input, Parser);
2556
2557            elsif N = Parser.Quot_Sequence then
2558               Put_In_Buffer (Parser, Quotation_Mark);
2559               Id.Typ := Text;
2560               Id.Last := Parser.Buffer_Length;
2561               Next_Char (Input, Parser);
2562
2563            elsif V = null then
2564               declare
2565                  Sym : constant Cst_Byte_Sequence_Access := Get (N);
2566               begin
2567                  Skipped_Entity (Parser, N);
2568                  if N = Parser.Symbol_Ampersand
2569                    or else N = Parser.Symbol_Percent
2570                  then
2571                     Fatal_Error (Parser, Error_Entity_Name & " '"
2572                                  & Sym.all & "'", Id);
2573
2574                  elsif Sym (Sym'First) = '%' then
2575                     Error (Parser, Error_Entity_Undefined & " '"
2576                            & Sym.all & "'", Id);
2577
2578                  elsif not Parser.In_External_Entity then
2579                     --  WF Entity Declared
2580                     Fatal_Error
2581                       (Parser, Error_Entity_Undefined & " '"
2582                        & Sym.all & ''', Id);
2583
2584                  else
2585                     --  if Parser.Feature_Validation then
2586                     --  VC Entity Declared
2587                     Error
2588                       (Parser, Error_Entity_Undefined & " '"
2589                        & Sym.all & ''', Id);
2590                  end if;
2591               end;
2592
2593               Id.Typ := Text;
2594               Id.Last := Id.First - 1;
2595               Next_Char (Input, Parser);
2596
2597            else
2598               if Parser.Standalone_Document
2599                 and then V.External_Declaration
2600               then
2601                  --  4.1 WF Entity Declared
2602                  Fatal_Error
2603                    (Parser, Error_Entity_Not_Standalone, Id);
2604               end if;
2605
2606               if Is_Entity_Ref = Entity
2607                 and then Parser.Current_Node = null
2608                 and then not Parser.State.In_DTD
2609               then
2610                  Fatal_Error (Parser, Error_Entity_Toplevel, Id);
2611
2612               --  Else if we are in the internal subset of the DTD, and in
2613               --  a context other than a declaration
2614               elsif Is_Entity_Ref = Param_Entity
2615                 and then not Parser.In_External_Entity
2616                 and then Parser.State.Name /= DTD_State.Name
2617               then
2618                  Fatal_Error (Parser, Error_ParamEntity_In_Attribute, Id);
2619               end if;
2620
2621               Close_Inputs (Parser, Parser.Close_Inputs);
2622
2623               --  not in string context
2624               if not Parser.State.Ignore_Special then
2625                  Start_Entity (Parser, N);
2626               end if;
2627
2628               if V.Already_Read then
2629                  Fatal_Error (Parser, Error_Entity_Self_Ref, Id);
2630               end if;
2631
2632               V.Already_Read := True;
2633
2634               Parser.Element_Id := Parser.Element_Id + 1;
2635
2636               if Debug_Internal then
2637                  Put_Line ("Expanding entity " & Get (N).all & " External="
2638                            & V.External'Img
2639                            & " Value=" & Get (V.Value).all);
2640               end if;
2641
2642               Old_System_Id := Get_System_Id (Parser.Locator);
2643
2644               Parser.Inputs := new Entity_Input_Source'
2645                 (External       => V.External,
2646                  Name           => N,
2647                  Input          => null,
2648                  Save_Loc       => Get_Location (Parser.Locator),
2649                  System_Id      => Find_Symbol
2650                    (Parser, Get (System_Id (Parser)).all & '#' & Get (N).all),
2651                  Public_Id      => Find_Symbol
2652                    (Parser, Get (Public_Id (Parser)).all & '#' & Get (N).all),
2653                  Handle_Strings => not Parser.State.Ignore_Special,
2654                  Next           => Parser.Inputs);
2655
2656               if V.External then
2657                  if Parser.State.Name = Attlist_Str_Def_State.Name
2658                    or else Parser.State.Name = Attr_Value_State.Name
2659                  then
2660                     Fatal_Error (Parser, Error_Attribute_External_Entity, Id);
2661                  end if;
2662
2663                  declare
2664                     URI : constant Symbol :=
2665                       Resolve_URI (Parser, Old_System_Id, V.Value);
2666                  begin
2667                     Parser.Inputs.Input := Resolve_Entity
2668                       (Parser,
2669                        Public_Id => Get (V.Public).all,
2670                        System_Id => Get (URI).all);
2671
2672                     --  If either there is no entity resolver or if the
2673                     --  standard algorithm should be used
2674
2675                     if Parser.Inputs.Input = null then
2676                        Parser.Inputs.Input := new File_Input;
2677                        Open (Get (URI).all,
2678                              File_Input (Parser.Inputs.Input.all));
2679                        Set_Public_Id
2680                          (Parser.Inputs.Input.all, Get (V.Value).all);
2681                        Set_System_Id (Parser.Inputs.Input.all, Get (URI).all);
2682                     end if;
2683
2684                     Parser.Inputs.Name := Find_Symbol
2685                       (Parser, Get_System_Id (Parser.Inputs.Input.all));
2686
2687                     Set_System_Id (Parser.Locator, URI);
2688                     Set_Public_Id (Parser.Locator, V.Value);
2689
2690                  exception
2691                     when Name_Error =>
2692                        Error
2693                          (Parser, Error_External_Entity_Not_Found
2694                           & Get (URI).all, Id);
2695                        Unchecked_Free (Parser.Inputs.Input);
2696                     when E : Mismatching_BOM =>
2697                        Error (Parser, Exception_Message (E));
2698                        Unchecked_Free (Parser.Inputs.Input);
2699                  end;
2700
2701                  Parser.In_External_Entity := True;
2702               else
2703                  Parser.Inputs.Input := new String_Input;
2704
2705                  --  4.4.8: Expansion of parameter entities must include
2706                  --  a leading and trailing space, unless we are within an
2707                  --  entity value.
2708                  if Is_Entity_Ref = Param_Entity
2709                    and then not Parser.State.Ignore_Special
2710                  then
2711                     Open (' ' & Get (V.Value).all & ' ',
2712                           Encoding,
2713                           String_Input (Parser.Inputs.Input.all));
2714                  else
2715                     Open (Get (V.Value).all, Encoding,
2716                           String_Input (Parser.Inputs.Input.all));
2717                  end if;
2718                  Set_Public_Id
2719                    (Parser.Locator,
2720                     Find_Symbol (Parser, "entity " & Get (N).all));
2721                  Set_Public_Id
2722                    (Parser.Inputs.Input.all,
2723                     Get (Get_Public_Id (Parser.Locator)).all);
2724               end if;
2725
2726               if Parser.Inputs.Input = null then
2727                  Skipped_Entity (Parser, V.Name);
2728                  Next_Char (Input, Parser);
2729                  Next_Token (Input, Parser, Id);
2730
2731               else
2732                  Set_Line_Number (Parser.Locator, 1);
2733                  Set_Column_Number
2734                    (Parser.Locator,
2735                     Prolog_Size (Parser.Inputs.Input.all));
2736
2737                  Next_Char (Input, Parser);
2738                  Next_Token (Input, Parser, Id);
2739
2740                  V.Already_Read := False;
2741               end if;
2742            end if;
2743         end;
2744      end if;
2745   end Next_Token;
2746
2747   ----------------------------
2748   -- Next_Token_Skip_Spaces --
2749   ----------------------------
2750
2751   procedure Next_Token_Skip_Spaces
2752     (Input  : in out Input_Sources.Input_Source'Class;
2753      Parser : in out Sax_Reader'Class;
2754      Id     : out Token;
2755      Must_Have : Boolean := False) is
2756   begin
2757      Next_Token (Input, Parser, Id);
2758      if Must_Have and then Id.Typ /= Space then
2759         Fatal_Error (Parser, Error_Expecting_Space, Id);
2760      end if;
2761      while Id.Typ = Space loop
2762         Reset_Buffer (Parser, Id);
2763         Next_Token (Input, Parser, Id);
2764      end loop;
2765   end Next_Token_Skip_Spaces;
2766
2767   -------------------------------
2768   -- Next_NS_Token_Skip_Spaces --
2769   -------------------------------
2770
2771   procedure Next_NS_Token_Skip_Spaces
2772     (Input   : in out Input_Sources.Input_Source'Class;
2773      Parser  : in out Sax_Reader'Class;
2774      NS_Id   : out Token;
2775      Name_Id : out Token)
2776   is
2777      Id : Token;
2778      Saved_In_Tag : constant Boolean := Parser.State.In_Tag;
2779   begin
2780      NS_Id := Null_Token;
2781      Next_Token (Input, Parser, Id);
2782      while Id.Typ = Space loop
2783         Reset_Buffer (Parser, Id);
2784         Next_Token (Input, Parser, Id);
2785      end loop;
2786      Name_Id := Id;
2787
2788      if Name_Id.Typ = Colon then
2789         --  An empty namespace, used in the XML testsuite ?
2790         NS_Id := Null_Token;
2791         Reset_Buffer (Parser, Id);
2792         Next_Token (Input, Parser, Name_Id);
2793
2794      elsif Name_Id.Typ = Name then
2795         if Parser.Last_Read_Is_Valid
2796           and then Parser.Last_Read = Unicode.Names.Basic_Latin.Colon
2797           and then Parser.Feature_Namespace
2798         then
2799            Parser.State.In_Tag := True;  --  Get COLON on its own
2800            Next_Token (Input, Parser, Id);
2801            Parser.State.In_Tag := Saved_In_Tag;
2802
2803            NS_Id := Name_Id;
2804            Reset_Buffer (Parser, Id);
2805            Next_Token (Input, Parser, Name_Id);
2806         end if;
2807      end if;
2808   end Next_NS_Token_Skip_Spaces;
2809
2810   ------------------
2811   -- Reset_Buffer --
2812   ------------------
2813
2814   procedure Reset_Buffer
2815     (Parser : in out Sax_Reader'Class; Id : Token := Null_Token) is
2816   begin
2817      Parser.Buffer_Length := Id.First - 1;
2818   end Reset_Buffer;
2819
2820   ---------------
2821   -- Set_State --
2822   ---------------
2823
2824   procedure Set_State
2825     (Parser : in out Sax_Reader'Class; State : Parser_State) is
2826   begin
2827      Parser.State := State;
2828   end Set_State;
2829
2830   ---------------
2831   -- Get_State --
2832   ---------------
2833
2834   function Get_State (Parser : Sax_Reader'Class) return Parser_State is
2835   begin
2836      return Parser.State;
2837   end Get_State;
2838
2839   -------------------------
2840   -- Parse_Element_Model --
2841   -------------------------
2842
2843   procedure Parse_Element_Model
2844     (Input   : in out Input_Source'Class;
2845      Parser  : in out Sax_Reader'Class;
2846      Result  : out Element_Model_Ptr;
2847      Attlist : Boolean := False;
2848      Open_Was_Read : Boolean)
2849   is
2850      --  ??? Would be nice to get rid of this hard-coded limitation in stacks
2851      Stack_Size : constant Natural := 1024;
2852      Operand_Stack : Element_Model_Array (1 .. Stack_Size);
2853      Operand_Index : Natural := Operand_Stack'First;
2854      Operator_Stack : array (1 .. Stack_Size) of Unicode_Char;
2855      Operator_Index : Natural := Operator_Stack'First;
2856      Expect_Operator : Boolean := not Open_Was_Read;
2857
2858      procedure Parse_Element_Model_From_Entity (Name : Symbol);
2859      --  Parse the element model defined in the entity Name, and leave the
2860      --  contents on the stacks.
2861
2862      procedure Parse
2863        (Input         : in out Input_Source'Class;
2864         Result        : out Element_Model_Ptr;
2865         Open_Was_Read : Boolean;
2866         Is_Recursive_Call : Boolean);
2867      --  Parse the content model read in Input
2868      --  Is_Recursive_Call should be true when called from itself or from
2869      --  Parse_Element_Model_From_Entity.
2870
2871      -------------------------------------
2872      -- Parse_Element_Model_From_Entity --
2873      -------------------------------------
2874
2875      procedure Parse_Element_Model_From_Entity (Name : Symbol) is
2876         Loc : Sax.Locators.Location;
2877         Last : constant Unicode_Char := Parser.Last_Read;
2878         Input_S : String_Input;
2879         Val : constant Entity_Entry_Access := Get (Parser.Entities, Name);
2880         M : Element_Model_Ptr;
2881      begin
2882         if Val = null then
2883            Fatal_Error
2884              (Parser,
2885               Error_Entity_Undefined & ' ' & Get (Name).all);
2886
2887         elsif Val.Value = Empty_String then
2888            return;
2889
2890         else
2891            Loc := Get_Location (Parser.Locator);
2892            Set_Line_Number (Parser.Locator, 1);
2893            Set_Column_Number (Parser.Locator, 1);
2894            Set_Public_Id
2895              (Parser.Locator,
2896               Find_Symbol (Parser, "entity " & Get (Name).all));
2897
2898            Open (Get (Val.Value).all, Encoding, Input_S);
2899            Next_Char (Input_S, Parser);
2900            Parse (Input_S, M, False, True);
2901            --  Parse_Element_Model (Input_S, Parser, M, Attlist, False);
2902            Close (Input_S);
2903
2904            Set_Location (Parser.Locator, Loc);
2905            Parser.Last_Read := Last;
2906         end if;
2907      end Parse_Element_Model_From_Entity;
2908
2909      -----------
2910      -- Parse --
2911      -----------
2912
2913      procedure Parse
2914        (Input : in out Input_Source'Class;
2915         Result : out Element_Model_Ptr;
2916         Open_Was_Read : Boolean;
2917         Is_Recursive_Call : Boolean)
2918      is
2919         Num_Items : Positive;
2920         Current_Item, Current_Operand : Natural;
2921         Start_Sub : Natural := Parser.Buffer_Length + 1;
2922         M : Element_Model_Ptr;
2923         Found : Boolean;
2924         Start_Id : constant Symbol := System_Id (Parser);
2925         Start_Token : Token;
2926         Test_Multiplier : Boolean;
2927         Can_Be_Mixed : Boolean;
2928         Num_Parenthesis : Integer := 0;
2929         Already_Displayed_Self_Contained_Error : Boolean := False;
2930
2931      begin
2932         Start_Token                 := Null_Token;
2933         Start_Token.Location.Line   := Get_Line_Number (Parser.Locator);
2934         Start_Token.Location.Column := Get_Column_Number (Parser.Locator);
2935
2936         if Open_Was_Read then
2937            Start_Token.Location.Column := Start_Token.Location.Column - 1;
2938         end if;
2939
2940         while Is_White_Space (Parser.Last_Read) loop
2941            Next_Char (Input, Parser);
2942         end loop;
2943
2944         loop
2945            if End_Of_Stream (Parser) then
2946               if not Is_Recursive_Call then
2947                  for J in Operand_Stack'First .. Operand_Index - 1 loop
2948                     Free (Operand_Stack (J));
2949                  end loop;
2950
2951               elsif Num_Parenthesis /= 0 then
2952                  Fatal_Error (Parser, Error_Entity_Nested, Start_Token);
2953
2954               elsif Parser.Buffer_Length >= Start_Sub then
2955                  Operand_Stack (Operand_Index) :=
2956                    new Element_Model (Element_Ref);
2957                  Operand_Stack (Operand_Index).Name := Find_Symbol
2958                    (Parser,
2959                     Parser.Buffer (Start_Sub .. Parser.Buffer_Length));
2960                  Operand_Index := Operand_Index + 1;
2961                  Parser.Buffer_Length := Start_Sub - 1;
2962               end if;
2963
2964               exit;
2965            end if;
2966
2967            if Parser.Feature_Validation
2968              and then (not Parser.Last_Read_Is_Valid
2969                        or else System_Id (Parser) /= Start_Id)
2970              and then not Already_Displayed_Self_Contained_Error
2971            then
2972               Already_Displayed_Self_Contained_Error := True;
2973               Error (Parser, Error_Entity_Self_Contained, Start_Token);
2974            end if;
2975
2976            Test_Multiplier := False;
2977
2978            --  Process the operator
2979            case Parser.Last_Read is
2980               when Left_Parenthesis =>
2981                  Operator_Stack (Operator_Index) := Parser.Last_Read;
2982                  Operator_Index := Operator_Index + 1;
2983                  Expect_Operator := False;
2984                  Next_Char (Input, Parser);
2985                  Num_Parenthesis := Num_Parenthesis + 1;
2986
2987               when Right_Parenthesis =>
2988                  Num_Parenthesis := Num_Parenthesis - 1;
2989                  Num_Items := 1;
2990                  Current_Item := Operator_Index - 1;
2991                  Current_Operand := Operand_Index - 1;
2992                  Can_Be_Mixed :=  Current_Operand >= Operand_Stack'First
2993                    and then
2994                    (Operand_Stack (Current_Operand).Content = Character_Data
2995                     or else Operand_Stack (Current_Operand).Content
2996                     = Element_Ref);
2997
2998                  if Current_Operand >= Operand_Stack'First
2999                    and then Is_Mixed (Operand_Stack (Current_Operand))
3000                  then
3001                     Fatal_Error (Parser, Error_Mixed_Contents);
3002                  end if;
3003
3004                  while Current_Item >= Operator_Stack'First
3005                    and then
3006                      Operator_Stack (Current_Item) /= Left_Parenthesis
3007                  loop
3008                     if Operator_Stack (Current_Item) /= Comma
3009                       and then Operator_Stack (Current_Item) /= Vertical_Line
3010                     then
3011                        Fatal_Error
3012                          (Parser, Error_Invalid_Content_Model, Start_Token);
3013                     end if;
3014
3015                     if Current_Operand = 0 then
3016                        Fatal_Error
3017                          (Parser, Error_Missing_Operand, Start_Token);
3018                     end if;
3019
3020                     Current_Operand := Current_Operand - 1;
3021
3022                     if Current_Operand < Operand_Stack'First then
3023                        Fatal_Error
3024                          (Parser, Error_Invalid_Content_Model, Start_Token);
3025                     end if;
3026
3027                     if Operand_Stack (Current_Operand).Content
3028                       /= Character_Data and then
3029                       Operand_Stack (Current_Operand).Content /= Element_Ref
3030                     then
3031                        Can_Be_Mixed := False;
3032                     end if;
3033
3034                     if Is_Mixed (Operand_Stack (Current_Operand)) then
3035                        Fatal_Error (Parser, Error_Mixed_Contents);
3036                     end if;
3037
3038                     Num_Items := Num_Items + 1;
3039                     Current_Item := Current_Item - 1;
3040                  end loop;
3041
3042                  if Current_Item < Operator_Stack'First then
3043                     Fatal_Error
3044                       (Parser, Error_Invalid_Content_Model, Start_Token);
3045                  end if;
3046
3047                  if Current_Operand < Operand_Stack'First then
3048                     Fatal_Error
3049                       (Parser, Error_Content_Model_Empty_List, Start_Token);
3050                  end if;
3051
3052                  if Operator_Stack (Operator_Index - 1) = Comma then
3053                     M := new Element_Model (Sequence);
3054                  else
3055                     if not Can_Be_Mixed
3056                       and then Operand_Stack (Current_Operand).Content
3057                       = Character_Data
3058                     then
3059                        Fatal_Error
3060                          (Parser, Error_Content_Model_Nested_Groups);
3061                     end if;
3062
3063                     M := new Element_Model (Any_Of);
3064                  end if;
3065                  M.List := new Element_Model_Array (1 .. Num_Items);
3066                  for J in Current_Operand .. Operand_Index - 1 loop
3067                     M.List (J - Current_Operand + 1) := Operand_Stack (J);
3068                  end loop;
3069                  Operand_Index := Current_Operand + 1;
3070                  Operand_Stack (Current_Operand) := M;
3071                  Operator_Index := Current_Item;
3072                  Expect_Operator := False;
3073                  Test_Multiplier := True;
3074                  Next_Char (Input, Parser);
3075
3076                  if not End_Of_Stream (Parser)
3077                    and then Current_Operand >= Operand_Stack'First
3078                    and then Is_Mixed (Operand_Stack (Current_Operand))
3079                    and then Operand_Stack (Current_Operand).List'Length >= 2
3080                    and then Parser.Last_Read /= Asterisk
3081                  then
3082                     Fatal_Error
3083                       (Parser, Error_Content_Model_Closing_Paren);
3084                  end if;
3085
3086               when Comma | Vertical_Line =>
3087                  if Attlist and then Parser.Last_Read = Comma then
3088                     Fatal_Error (Parser, Error_Attlist_Invalid_Enum);
3089                  end if;
3090
3091                  if Parser.Last_Read = Comma
3092                    and then Operand_Index - 1 < Operand_Stack'First
3093                  then
3094                     Fatal_Error (Parser, Error_Content_Model_Invalid_Seq);
3095                  end if;
3096
3097                  if Parser.Last_Read = Comma
3098                    and then Operator_Stack (Operator_Index - 1)
3099                    = Left_Parenthesis
3100                    and then Operand_Stack (Operand_Index - 1).Content
3101                    = Character_Data
3102                  then
3103                     Fatal_Error (Parser, Error_Content_Model_Pcdata);
3104                  end if;
3105
3106                  if Operator_Index = Operator_Stack'First
3107                    or else
3108                    (Operator_Stack (Operator_Index - 1) /= Parser.Last_Read
3109                     and then
3110                     Operator_Stack (Operator_Index - 1) /=
3111                       Left_Parenthesis)
3112                  then
3113                     Fatal_Error (Parser, Error_Content_Model_Mixing);
3114                  end if;
3115                  Operator_Stack (Operator_Index) := Parser.Last_Read;
3116                  Operator_Index := Operator_Index + 1;
3117                  Expect_Operator := False;
3118                  Next_Char (Input, Parser);
3119
3120               when Asterisk | Question_Mark | Plus_Sign =>
3121                  Fatal_Error
3122                    (Parser, Error_Content_Model_Invalid_Multiplier,
3123                     Start_Token);
3124
3125               when Number_Sign =>
3126                  if Expect_Operator then
3127                     Fatal_Error
3128                       (Parser, Error_Content_Model_Invalid_Start,
3129                        Start_Token);
3130                  end if;
3131                  Expect_Operator := True;
3132
3133                  --  #PCDATA can only be the first element of a choice list
3134                  --  ??? Note that in that case the Choice model can only be a
3135                  --  list of names, not a parenthesis expression.
3136                  Start_Sub := Parser.Buffer_Length + 1;
3137
3138                  Next_Char (Input, Parser);
3139                  Found := (Parser.Last_Read = Latin_Capital_Letter_P);
3140                  if Found then
3141                     Next_Char (Input, Parser);
3142                     Found := (Parser.Last_Read = Latin_Capital_Letter_C);
3143                     if Found then
3144                        Next_Char (Input, Parser);
3145                        Found := (Parser.Last_Read = Latin_Capital_Letter_D);
3146                        if Found then
3147                           Next_Char (Input, Parser);
3148                           Found := Parser.Last_Read = Latin_Capital_Letter_A;
3149                           if Found then
3150                              Next_Char (Input, Parser);
3151                              Found :=
3152                                (Parser.Last_Read = Latin_Capital_Letter_T);
3153                              if Found then
3154                                 Next_Char (Input, Parser);
3155                                 Found :=
3156                                   (Parser.Last_Read = Latin_Capital_Letter_A);
3157                              end if;
3158                           end if;
3159                        end if;
3160                     end if;
3161                  end if;
3162
3163                  if not Found then
3164                     Fatal_Error
3165                       (Parser, Error_Content_Model_Invalid_Seq, Start_Token);
3166                  end if;
3167
3168                  if Operator_Stack (Operator_Index - 1)
3169                    /= Left_Parenthesis
3170                  then
3171                     Fatal_Error (Parser, Error_Content_Model_Pcdata_First);
3172                  end if;
3173
3174                  Operand_Stack (Operand_Index) :=
3175                    new Element_Model (Character_Data);
3176                  Operand_Index := Operand_Index + 1;
3177                  Parser.Buffer_Length := Start_Sub - 1;
3178                  Next_Char (Input, Parser);
3179
3180               when Percent_Sign =>
3181                  if not Parser.In_External_Entity
3182                    and then Parser.State.Name /= DTD_State.Name
3183                  then
3184                     Fatal_Error (Parser, Error_ParamEntity_In_Attribute);
3185                  end if;
3186
3187                  Start_Sub := Parser.Buffer_Length + 1;
3188
3189                  while Parser.Last_Read_Is_Valid
3190                    and then Parser.Last_Read /= Semicolon
3191                  loop
3192                     Put_In_Buffer (Parser, Parser.Last_Read);
3193                     Next_Char (Input, Parser);
3194                  end loop;
3195
3196                  Parse_Element_Model_From_Entity
3197                    (Find_Symbol
3198                       (Parser,
3199                        Parser.Buffer (Start_Sub .. Parser.Buffer_Length)));
3200                  Parser.Buffer_Length := Start_Sub - 1;
3201                  Next_Char (Input, Parser);
3202
3203               when others =>
3204                  if Parser.Last_Read_Is_Valid then
3205                     if Expect_Operator then
3206                        Fatal_Error
3207                          (Parser, Error_Content_Model_Expect_Operator);
3208                     end if;
3209                     Expect_Operator := True;
3210
3211                     --  ??? Should test Is_Nmtoken
3212                     Start_Sub := Parser.Buffer_Length + 1;
3213
3214                     while Parser.Last_Read = Unicode.Names.Basic_Latin.Colon
3215                       or else Is_Valid_Name_Char
3216                         (Parser.Last_Read, Parser.XML_Version)
3217                     loop
3218                        Put_In_Buffer (Parser, Parser.Last_Read);
3219                        Next_Char (Input, Parser);
3220                     end loop;
3221
3222                     if Start_Sub > Parser.Buffer_Length then
3223                        Error (Parser, Error_Content_Model_Invalid_Name
3224                               & Debug_Encode (Parser.Last_Read),
3225                               Start_Token);
3226                     end if;
3227
3228                     Operand_Stack (Operand_Index) :=
3229                       new Element_Model (Element_Ref);
3230                     Operand_Stack (Operand_Index).Name := Find_Symbol
3231                       (Parser,
3232                        Parser.Buffer (Start_Sub .. Parser.Buffer_Length));
3233                     Operand_Index := Operand_Index + 1;
3234                     Parser.Buffer_Length := Start_Sub - 1;
3235                     Test_Multiplier := True;
3236
3237                  else
3238                     --  Could happen with improper entity nesting
3239                     Next_Char (Input, Parser);
3240                  end if;
3241
3242            end case;
3243
3244            if Test_Multiplier then
3245               case Parser.Last_Read is
3246                  when Asterisk =>
3247                     if Operand_Index = Operand_Stack'First then
3248                        Fatal_Error
3249                          (Parser, Error_Content_Model_Invalid_Multiplier);
3250                     end if;
3251                     Operand_Stack (Operand_Index - 1) := new Element_Model'
3252                       (Repeat, 0, Positive'Last,
3253                        Operand_Stack (Operand_Index - 1));
3254                     Expect_Operator := True;
3255                     Next_Char (Input, Parser);
3256
3257                  when Plus_Sign =>
3258                     if Operand_Index = Operand_Stack'First then
3259                        Fatal_Error
3260                          (Parser, Error_Content_Model_Invalid_Multiplier);
3261                     end if;
3262                     if Is_Mixed (Operand_Stack (Operand_Index - 1)) then
3263                        Fatal_Error
3264                          (Parser, Error_Content_Model_Pcdata_Occurrence);
3265                     end if;
3266
3267                     Operand_Stack (Operand_Index - 1) := new Element_Model'
3268                       (Repeat, 1,
3269                        Positive'Last, Operand_Stack (Operand_Index - 1));
3270                     Expect_Operator := True;
3271                     Next_Char (Input, Parser);
3272
3273                  when Question_Mark =>
3274                     if Operand_Index = Operand_Stack'First then
3275                        Fatal_Error
3276                          (Parser, Error_Content_Model_Invalid_Multiplier);
3277                     end if;
3278                     if Is_Mixed (Operand_Stack (Operand_Index - 1)) then
3279                        Fatal_Error
3280                          (Parser, Error_Content_Model_Pcdata_Occurrence);
3281                     end if;
3282                     Operand_Stack (Operand_Index - 1) := new Element_Model'
3283                       (Repeat, 0, 1, Operand_Stack (Operand_Index - 1));
3284                     Expect_Operator := True;
3285                     Next_Char (Input, Parser);
3286
3287                  when others => null;
3288               end case;
3289            end if;
3290
3291            exit when Operator_Index = Operator_Stack'First
3292              and then Operand_Index = Operand_Stack'First + 1;
3293
3294            while Is_White_Space (Parser.Last_Read) loop
3295               Next_Char (Input, Parser);
3296            end loop;
3297         end loop;
3298
3299         if not Is_Recursive_Call then
3300            if Operator_Index /= Operator_Stack'First
3301              or else Operand_Index /= Operand_Stack'First + 1
3302            then
3303               Error
3304                 (Parser, Error_Content_Model_Invalid, Start_Token);
3305            end if;
3306
3307            Result := Operand_Stack (Operand_Stack'First);
3308
3309         elsif Num_Parenthesis /= 0 then
3310            Error (Parser, Error_Entity_Nested, Start_Token);
3311         end if;
3312
3313      exception
3314         when others =>
3315            if not Is_Recursive_Call then
3316               for J in Operand_Stack'First .. Operand_Index - 1 loop
3317                  Free (Operand_Stack (J));
3318               end loop;
3319            end if;
3320            raise;
3321      end Parse;
3322
3323   begin
3324      if Open_Was_Read then
3325         --  Insert the opening parenthesis into the operators stack
3326         Operator_Stack (Operator_Stack'First) := Left_Parenthesis;
3327         Operator_Index := Operator_Index + 1;
3328      end if;
3329
3330      Parse (Input, Result, Open_Was_Read, False);
3331   end Parse_Element_Model;
3332
3333   --------------------------------
3334   -- Check_Valid_Name_Or_NCname --
3335   --------------------------------
3336
3337   procedure Check_Valid_Name_Or_NCname
3338     (Parser : in out Sax_Reader'Class;
3339      Name   : Token)
3340   is
3341   begin
3342      if Parser.Feature_Namespace then
3343         if not Is_Valid_NCname
3344           (Parser.Buffer (Name.First .. Name.Last), Parser.XML_Version)
3345         then
3346            Fatal_Error (Parser, Error_Is_Ncname, Name);
3347         end if;
3348      else
3349         if not Is_Valid_Name
3350           (Parser.Buffer (Name.First .. Name.Last), Parser.XML_Version)
3351         then
3352            Fatal_Error (Parser, Error_Is_Name, Name);
3353         end if;
3354      end if;
3355   end Check_Valid_Name_Or_NCname;
3356
3357   ---------------------------
3358   -- Check_Attribute_Value --
3359   ---------------------------
3360
3361   procedure Check_Attribute_Value
3362     (Parser     : in out Sax_Reader'Class;
3363      Local_Name : Symbol;
3364      Typ        : Attribute_Type;
3365      Value      : Symbol;
3366      Error_Loc  : Token)
3367   is
3368      Ent : Entity_Entry_Access;
3369      Val : constant Cst_Byte_Sequence_Access := Get (Value);
3370   begin
3371      case Typ is
3372         when Id | Idref =>
3373            if Parser.Feature_Namespace then
3374               if not Is_Valid_NCname (Val.all, Parser.XML_Version) then
3375                  --  Always a non-fatal error, since we are dealing with
3376                  --  namespaces
3377                  Error (Parser, Error_Attribute_Is_Ncname
3378                         & Get (Local_Name).all, Error_Loc);
3379               end if;
3380            else
3381               if not Is_Valid_Name (Val.all, Parser.XML_Version) then
3382                  Error (Parser, Error_Attribute_Is_Name
3383                         & Get (Local_Name).all, Error_Loc);
3384               end if;
3385            end if;
3386
3387         when Idrefs =>
3388            if Parser.Feature_Namespace then
3389               if not Is_Valid_NCnames (Val.all, Parser.XML_Version) then
3390                  Error (Parser, Error_Attribute_Is_Ncname
3391                         & Get (Local_Name).all, Error_Loc);
3392               end if;
3393            else
3394               if not Is_Valid_Names (Val.all, Parser.XML_Version) then
3395                  Error (Parser, Error_Attribute_Is_Name
3396                         & Get (Local_Name).all, Error_Loc);
3397               end if;
3398            end if;
3399
3400         when Nmtoken =>
3401            if not Is_Valid_Nmtoken (Val.all, Parser.XML_Version) then
3402               Error (Parser, Error_Attribute_Is_Nmtoken
3403                      & Get (Local_Name).all, Error_Loc);
3404            end if;
3405
3406         when Nmtokens =>
3407            if not Is_Valid_Nmtokens (Val.all, Parser.XML_Version) then
3408               Error (Parser, Error_Attribute_Is_Nmtoken
3409                      & Get (Local_Name).all, Error_Loc);
3410            end if;
3411
3412         when Entity =>
3413            if not Is_Valid_Name (Val.all, Parser.XML_Version) then
3414               Error (Parser, Error_Attribute_Is_Name
3415                      & Get (Local_Name).all, Error_Loc);
3416            end if;
3417
3418            Ent := Get (Parser.Entities, Value);
3419            if Ent = null or else not Ent.Unparsed then
3420               Error (Parser, Error_Attribute_Ref_Unparsed_Entity
3421                      & Get (Local_Name).all, Error_Loc);
3422            end if;
3423
3424         when Entities =>
3425            declare
3426               Index : Integer := Val'First;
3427               Last, Previous  : Integer;
3428               C     : Unicode_Char;
3429            begin
3430               Last := Index;
3431               while Last <= Val'Last loop
3432                  Previous := Last;
3433                  Encoding.Read (Val.all, Last, C);
3434                  if C = Unicode.Names.Basic_Latin.Space
3435                    or else Last > Val'Last
3436                  then
3437                     if not Is_Valid_Name (Val (Index .. Previous),
3438                                           Parser.XML_Version)
3439                     then
3440                        Error (Parser, Error_Attribute_Is_Name
3441                               & Get (Local_Name).all,
3442                               Error_Loc);
3443                     end if;
3444
3445                     Ent := Get
3446                       (Parser.Entities,
3447                        Find_Symbol (Parser, Val (Index .. Previous)));
3448                     if Ent = null or else not Ent.Unparsed then
3449                        Error (Parser, Error_Attribute_Ref_Unparsed_Entity
3450                               & Get (Local_Name).all,
3451                               Error_Loc);
3452                     end if;
3453                     Index := Last;
3454                  end if;
3455               end loop;
3456            end;
3457
3458         when others =>
3459            null;
3460      end case;
3461   end Check_Attribute_Value;
3462
3463   ------------
3464   -- Append --
3465   ------------
3466
3467   procedure Append
3468      (List         : in out Sax_Attribute_List;
3469       Local_Name   : Sax.Symbols.Symbol;
3470       Prefix       : Sax.Symbols.Symbol;
3471       Att_Type     : Attribute_Type := Cdata;
3472       URI          : Sax.Symbols.Symbol := No_Symbol;
3473       Value        : Sax.Symbols.Symbol;
3474       Location     : Sax.Locators.Location;
3475       Default_Decl : Default_Declaration := Default;
3476       If_Unique    : Boolean := False)
3477   is
3478      Tmp : Sax_Attribute_Array_Access;
3479   begin
3480      if If_Unique then
3481         for A in 1 .. List.Count loop
3482            if List.List (A).Local_Name = Local_Name
3483              and then List.List (A).Prefix = Prefix
3484            then
3485               return;
3486            end if;
3487         end loop;
3488      end if;
3489
3490      if List.List = null or else List.Count = List.List'Last then
3491         Tmp := List.List;
3492         if Tmp /= null then
3493            List.List := new Sax_Attribute_Array (Tmp'First .. Tmp'Last + 1);
3494            List.List (Tmp'Range) := Tmp.all;
3495            Unchecked_Free (Tmp);
3496         else
3497            List.List  := new Sax_Attribute_Array (1 .. 1);
3498            List.Count := 0;
3499         end if;
3500      end if;
3501
3502      --  The URI cannot be resolved at this point, since it will
3503      --  depend on the contents of the document at the place where
3504      --  the attribute is used.
3505
3506      List.Count := List.Count + 1;
3507      List.List (List.Count) := Sax_Attribute'
3508        (Prefix               => Prefix,
3509         Local_Name           => Local_Name,
3510         Value                => Value,
3511         Non_Normalized_Value => Value,
3512         Att_Type             => Att_Type,
3513         URI                  => URI,
3514         Default_Decl         => Default_Decl,
3515         Location             => Location);
3516   end Append;
3517
3518   ---------------------
3519   -- Syntactic_Parse --
3520   ---------------------
3521
3522   procedure Syntactic_Parse
3523     (Parser : in out Sax_Reader'Class;
3524      Input  : in out Input_Sources.Input_Source'Class)
3525   is
3526      Id  : Token := Null_Token;
3527
3528      procedure Parse_Start_Tag;
3529      --  Process an element start and its attributes   <!name name="value"..>
3530
3531      procedure Parse_Attributes
3532        (Elem_NS_Id, Elem_Name_Id : Token; Id : in out Token);
3533      --  Process the list of attributes in a start tag, and store them in
3534      --  Parser.Attributes.
3535      --  Id should have been initialized to the first token in the attributes
3536      --  list, and will be left on the first token after it.
3537      --  Return the list of attributes for this element
3538      --  On exit, NS_Count is set to the number of references to Elem_NS_Id
3539      --  among the attributes. The count for other XML_NS that the one of the
3540      --  element is directly increment in the corresponding XML_NS, but for
3541      --  the element we want to keep it virgin until we have called the
3542      --  validation hook.
3543
3544      procedure Resolve_Attribute_Namespaces;
3545      --  For each attributes defined in Parser.Attributes, set its URI for
3546      --  the namespace
3547
3548      procedure Check_And_Define_Namespace
3549        (Prefix, URI : Symbol; Location : Sax.Locators.Location);
3550      --  An attribute defining a namespace was found. Check that the values
3551      --  are valid, and register the new namespace. If Prefix is Null_Token,
3552      --  the default namespace is defined
3553
3554      function Get_String (Str : Token) return String;
3555      function Get_String (First, Last : Token) return String;
3556      pragma Inline (Get_String);
3557      --  Return the string pointed to by the token
3558
3559      procedure Add_Default_Attributes (DTD_Attr : Sax_Attribute_Array_Access);
3560      --  Add all DEFAULT attributes declared in the DTD into the attributes of
3561      --  the current element, if they weren't overriden by the user
3562
3563      procedure Parse_End_Tag;
3564      --  Process an element end   </name>
3565
3566      procedure Parse_Doctype;
3567      --  Process the DTD declaration
3568
3569      procedure Parse_Doctype_Contents;
3570      --  Process the DTD's contents
3571
3572      procedure Parse_Entity_Def (Id : in out Token);
3573      --  Parse an <!ENTITY declaration
3574
3575      procedure Parse_Element_Def (Id : in out Token);
3576      --  Parse an <!ELEMENT declaration
3577
3578      procedure Parse_Notation_Def (Id : in out Token);
3579      --  Parse an <!NOTATION declaration
3580
3581      procedure Parse_Attlist_Def (Id : in out Token);
3582      --  Parse an <!ATTLIST declaration
3583
3584      procedure Parse_PI (Id : in out Token);
3585      --  Parse a <?...?> processing instruction
3586
3587      procedure End_Element;
3588      --  End the current element. Its namespace prefix and local_name are
3589      --  given in the parameters.
3590
3591      procedure Get_String
3592        (Id : in out Token;
3593         State : Parser_State;
3594         Str_Start, Str_End : out Token;
3595         Normalize         : Boolean := False;
3596         Collapse_Spaces   : Boolean := False);
3597      --  Get all the character till the end of the string. Id should contain
3598      --  the initial quote that starts the string.
3599      --  On exit, Str_Start is set to the first token of the string, and
3600      --  Str_End to the last token.
3601      --  If Normalize is True, then all space characters are converted to
3602      --  ' '.
3603      --  If Collapse_Spaces is True, then all duplicate spaces sequences are
3604      --  collapsed into a single space character. Leading and trailing spaces
3605      --  are also removed.
3606
3607      procedure Get_Name_NS (Id : in out Token; NS_Id, Name_Id : out Token);
3608      --  Read the next tokens so as to match either a single name or
3609      --  a "ns:name" name.
3610      --  Id should initially point to the candidate token for the name, and
3611      --  will be left on the token following that name.
3612      --  An error is raised if we can't even match a Name.
3613
3614      procedure Get_External
3615        (Id : in out Token;
3616         System_Start, System_End, Public_Start, Public_End : out Token;
3617         Allow_Publicid : Boolean := False);
3618      --  Parse a PUBLIC or SYSTEM definition and its arguments.
3619      --  Id should initially point to the keyword itself, and will be set to
3620      --  the first identifier following the full definition
3621      --  If Allow_Publicid is True, then PUBLIC might be followed by a single
3622      --  string, as in rule [83] of the XML specifications.
3623
3624      procedure Check_Standalone_Value (Id : in out Token);
3625      procedure Check_Encoding_Value (Id : in out Token);
3626      procedure Check_Version_Value (Id : in out Token);
3627      --  Check the arguments for the <?xml?> processing instruction.
3628      --  Each of this procedures gets the arguments from Next_Token, up to,
3629      --  and including, the following space or End_Of_PI character.
3630      --  They raise errors appropriately
3631
3632      procedure Check_Model;
3633      --  Check that the last element inserted matches the model. This
3634      --  procedure should not be called for the root element.
3635
3636      ----------------
3637      -- Get_String --
3638      ----------------
3639
3640      procedure Get_String
3641        (Id : in out Token;
3642         State : Parser_State;
3643         Str_Start, Str_End : out Token;
3644         Normalize : Boolean := False;
3645         Collapse_Spaces : Boolean := False)
3646      is
3647         T : constant Token := Id;
3648         Saved_State : constant Parser_State := Get_State (Parser);
3649         Possible_End : Token := Null_Token;
3650         C : Unicode_Char;
3651         Index : Natural;
3652         Last_Space : Natural := 0;
3653         Had_Space : Boolean := Collapse_Spaces; --  Avoid leading spaces
3654
3655      begin
3656         if Debug_Internal then
3657            Put_Line ("Get_String Normalize="
3658                      & Boolean'Image (Normalize)
3659                      & " Collapse_Spaces="
3660                      & Boolean'Image (Collapse_Spaces));
3661         end if;
3662         Set_State (Parser, State);
3663         Next_Token (Input, Parser, Id);
3664         Str_Start := Id;
3665         Str_End := Id;
3666
3667         while Id.Typ /= T.Typ and then Id.Typ /= End_Of_Input loop
3668            Str_End := Id;
3669            case Id.Typ is
3670               when Double_String_Delimiter =>
3671                  Str_End.First := Parser.Buffer_Length + 1;
3672                  Put_In_Buffer (Parser, Quotation_Mark);
3673                  Str_End.Last := Parser.Buffer_Length;
3674                  Possible_End := Str_End;
3675                  Had_Space := False;
3676               when Single_String_Delimiter =>
3677                  Str_End.First := Parser.Buffer_Length + 1;
3678                  Put_In_Buffer (Parser, Apostrophe);
3679                  Str_End.Last := Parser.Buffer_Length;
3680                  Possible_End := Str_End;
3681                  Had_Space := False;
3682               when Start_Of_Tag =>
3683                  if Possible_End = Null_Token then
3684                     Fatal_Error (Parser, Error_Attribute_Less_Than, Id);
3685                  else
3686                     Fatal_Error
3687                       (Parser, Error_Attribute_Less_Than_Suggests
3688                        & Location (Parser, Possible_End.Location), Id);
3689                  end if;
3690               when Char_Ref =>
3691                  --  3.3.3 item 3: character references are kept as is
3692                  if Get_String (Id) = Space_Sequence then
3693                     if Collapse_Spaces and Had_Space then
3694                        Reset_Buffer (Parser, Id);
3695                     end if;
3696                     Had_Space := True;
3697                     Last_Space := Parser.Buffer_Length;
3698                  else
3699                     Had_Space := False;
3700                  end if;
3701
3702               when others =>
3703                  if Normalize or Collapse_Spaces then
3704                     declare
3705                        Str : constant Byte_Sequence :=
3706                          Parser.Buffer (Id.First .. Id.Last);
3707                     begin
3708                        Reset_Buffer (Parser, Id);
3709                        Index := Str'First;
3710                        while Index <= Str'Last loop
3711                           Encoding.Read (Str, Index, C);
3712
3713                           --  ??? If we have a character reference, we must
3714                           --  replace the character it represents, and not do
3715                           --  entity replacement. How to do that, we have lost
3716                           --  that information
3717
3718                           --  When parsing an attribute value, we should still
3719                           --  process white spaces, therefore the test for
3720                           --  Ignore_Special
3721                           if Is_White_Space (C) then
3722                              if not Collapse_Spaces or not Had_Space then
3723                                 Put_In_Buffer
3724                                   (Parser, Unicode.Names.Basic_Latin.Space);
3725                              end if;
3726                              Had_Space := True;
3727                              Last_Space := Parser.Buffer_Length;
3728                           else
3729                              Had_Space := False;
3730                              Put_In_Buffer (Parser, C);
3731                           end if;
3732                        end loop;
3733                     end;
3734                     Str_End.Last := Parser.Buffer_Length;
3735                  end if;
3736            end case;
3737            Next_Token (Input, Parser, Id);
3738         end loop;
3739
3740         if Collapse_Spaces and then Had_Space and then Last_Space /= 0 then
3741            Str_End.Last := Last_Space - 1;
3742         end if;
3743
3744         if Id.Typ = End_Of_Input then
3745            if Possible_End = Null_Token then
3746               Fatal_Error (Parser, Error_Unterminated_String);
3747            else
3748               Fatal_Error (Parser, Error_Unterminated_String_Suggests
3749                            & Location (Parser, Possible_End.Location), T);
3750            end if;
3751         end if;
3752         Set_State (Parser, Saved_State);
3753      end Get_String;
3754
3755      ------------------
3756      -- Get_External --
3757      ------------------
3758
3759      procedure Get_External
3760        (Id : in out Token;
3761         System_Start, System_End, Public_Start, Public_End : out Token;
3762         Allow_Publicid : Boolean := False)
3763      is
3764         Had_Space : Boolean;
3765         C : Unicode_Char;
3766         Index : Natural;
3767      begin
3768         System_Start := Null_Token;
3769         System_End := Null_Token;
3770         Public_Start := Null_Token;
3771         Public_End := Null_Token;
3772
3773         --  Check the arguments for PUBLIC
3774         if Id.Typ = Public then
3775            Next_Token_Skip_Spaces (Input, Parser, Id, Must_Have => True);
3776            if Id.Typ /= Double_String_Delimiter
3777              and then Id.Typ /= Single_String_Delimiter
3778            then
3779               Fatal_Error (Parser, Error_Public_String);
3780            else
3781               Get_String
3782                 (Id, Non_Interpreted_String_State, Public_Start, Public_End);
3783
3784               Index := Public_Start.First;
3785               while Index <= Public_End.Last loop
3786                  Encoding.Read (Parser.Buffer.all, Index, C);
3787
3788                  if not Is_Pubid_Char (C) then
3789                     Fatal_Error
3790                       (Parser, Error_Public_Invalid & "'"
3791                        & Debug_Encode (C) & "'", Public_Start);
3792                  end if;
3793               end loop;
3794            end if;
3795
3796            Next_Token (Input, Parser, Id);
3797            Had_Space := (Id.Typ = Space);
3798            if Had_Space then
3799               Next_Token (Input, Parser, Id);
3800            elsif Allow_Publicid then
3801               return;
3802            end if;
3803
3804            if Id.Typ /= Double_String_Delimiter
3805              and then Id.Typ /= Single_String_Delimiter
3806            then
3807               if not Allow_Publicid then
3808                  Fatal_Error (Parser, Error_Public_Sysid);
3809               end if;
3810            else
3811               if not Had_Space then
3812                  Fatal_Error (Parser, Error_Public_Sysid_Space, Id);
3813               end if;
3814               Get_String
3815                 (Id, Non_Interpreted_String_State, System_Start, System_End);
3816               Next_Token (Input, Parser, Id);
3817            end if;
3818
3819            --  Check the arguments for SYSTEM
3820         elsif Id.Typ = System then
3821            Next_Token_Skip_Spaces (Input, Parser, Id, Must_Have => True);
3822            if Id.Typ /= Double_String_Delimiter
3823              and then Id.Typ /= Single_String_Delimiter
3824            then
3825               Fatal_Error (Parser, Error_System_String);
3826            else
3827               Get_String
3828                 (Id, Non_Interpreted_String_State, System_Start, System_End);
3829               Next_Token (Input, Parser, Id);
3830            end if;
3831         end if;
3832      end Get_External;
3833
3834      -----------------
3835      -- Get_Name_NS --
3836      -----------------
3837
3838      procedure Get_Name_NS (Id : in out Token; NS_Id, Name_Id : out Token) is
3839      begin
3840         Name_Id := Id;
3841
3842         if Id.Typ = Text then
3843            Fatal_Error
3844              (Parser, Error_Invalid_Name & "'"
3845               & Parser.Buffer (Id.First .. Id.Last) & "'", Id);
3846         --  An empty namespace ? This seems to be useful only for the XML
3847         --  conformance suite, so we only handle the case of a single ':'
3848         --  to mean both an empty prefix and empty local name.
3849         elsif Name_Id.Typ = Colon then
3850            Name_Id.Typ := Text;
3851            NS_Id := Name_Id;
3852            Next_Token (Input, Parser, Id);
3853
3854         elsif Id.Typ /= Name then
3855            Fatal_Error (Parser, Error_Is_Name, Id);
3856
3857         else
3858            Next_Token (Input, Parser, Id);
3859            if Id.Typ = Colon then
3860               NS_Id := Name_Id;
3861               Next_Token (Input, Parser, Name_Id);
3862               if Name_Id.Typ /= Name then
3863                  Fatal_Error (Parser, Error_Is_Name);
3864               end if;
3865               Next_Token (Input, Parser, Id);
3866            else
3867               NS_Id := Null_Token;
3868            end if;
3869         end if;
3870      end Get_Name_NS;
3871
3872      ----------------------
3873      -- Parse_Entity_Def --
3874      ----------------------
3875
3876      procedure Parse_Entity_Def (Id : in out Token) is
3877         Is_Parameter : Token := Null_Token;
3878         Name_Id : Token;
3879         Def_Start, Def_End : Token := Null_Token;
3880         Ndata_Id : Token := Null_Token;
3881         Public_Start, Public_End : Token := Null_Token;
3882         System_Start, System_End : Token := Null_Token;
3883         Had_Space : Boolean;
3884         Sym : Symbol;
3885      begin
3886         Set_State (Parser, Entity_Def_State);
3887         Next_Token_Skip_Spaces (Input, Parser, Name_Id, True);
3888
3889         if Debug_Internal then
3890            Put_Line ("Parsing entity definition "
3891                      & Parser.Buffer (Name_Id.First .. Name_Id.Last));
3892         end if;
3893
3894         if Name_Id.Typ = Text
3895           and then Parser.Buffer (Name_Id.First .. Name_Id.Last) =
3896           Percent_Sign_Sequence
3897         then
3898            Is_Parameter := Name_Id;
3899            Next_Token_Skip_Spaces (Input, Parser, Name_Id);
3900         end if;
3901
3902         if Name_Id.Typ /= Name then
3903            Fatal_Error (Parser, Error_Is_Name);
3904         end if;
3905
3906         Check_Valid_Name_Or_NCname (Parser, Name_Id);
3907
3908         Next_Token_Skip_Spaces (Input, Parser, Id, Must_Have => True);
3909
3910         if Id.Typ = Public or else Id.Typ = System then
3911            Get_External
3912              (Id, System_Start, System_End, Public_Start, Public_End);
3913
3914            if Contains_URI_Fragment
3915              (Parser.Buffer (System_Start.First .. System_End.Last))
3916            then
3917               Error (Parser, Error_System_URI, Id);
3918            end if;
3919
3920            Had_Space := (Id.Typ = Space);
3921            if Had_Space then
3922               Next_Token (Input, Parser, Id);
3923            end if;
3924
3925            if Id.Typ = Ndata then
3926               if not Had_Space then
3927                  Fatal_Error (Parser, Error_Ndata_Space, Id);
3928               end if;
3929
3930               if Is_Parameter /= Null_Token then
3931                  Fatal_Error (Parser, Error_Ndata_ParamEntity, Id);
3932               end if;
3933               Next_Token_Skip_Spaces (Input, Parser, Ndata_Id, True);
3934
3935               if Ndata_Id.Typ /= Text and then Ndata_Id.Typ /= Name then
3936                  Fatal_Error (Parser, Error_Ndata_String);
3937               else
3938                  Sym := Find_Symbol (Parser, Ndata_Id);
3939
3940                  if Parser.Feature_Validation
3941                    and then Get (Parser.Notations, Sym) = Null_Notation
3942                  then
3943                     --  The notation might be declared later in the same DTD
3944                     Set (Parser.Notations,
3945                       (Name             => Sym,
3946                        Declaration_Seen => False));
3947                  end if;
3948
3949                  Next_Token_Skip_Spaces (Input, Parser, Id);
3950               end if;
3951            end if;
3952
3953         elsif Id.Typ = Double_String_Delimiter
3954           or else Id.Typ = Single_String_Delimiter
3955         then
3956            Get_String (Id, Entity_Str_Def_State, Def_Start, Def_End);
3957            Next_Token_Skip_Spaces (Input, Parser, Id);
3958         else
3959            Fatal_Error (Parser, Error_Entity_Definition);
3960         end if;
3961
3962         if Id.Typ /= End_Of_Tag then
3963            Fatal_Error (Parser, Error_Entity_Definition_Unterminated);
3964         end if;
3965
3966         --  Only report the first definition
3967
3968         Sym := Find_Symbol
3969           (Parser,
3970            Parser.Buffer (Is_Parameter.First .. Is_Parameter.Last)
3971            & Parser.Buffer (Name_Id.First .. Name_Id.Last));
3972
3973         if Get (Parser.Entities, Sym) /= null then
3974            null;
3975
3976         elsif Def_End /= Null_Token then
3977            Set (Parser.Entities,
3978                 new Entity_Entry'
3979                   (Name => Find_Symbol
3980                      (Parser,
3981                       Parser.Buffer (Is_Parameter.First .. Is_Parameter.Last)
3982                       & Parser.Buffer (Name_Id.First .. Name_Id.Last)),
3983                    Value => Find_Symbol
3984                      (Parser,
3985                       Parser.Buffer (Def_Start.First .. Def_End.Last)),
3986                    Public       => No_Symbol,
3987                    Unparsed     => False,
3988                    External_Declaration => (Parser.Inputs /= null
3989                       and then Parser.Inputs.External)
3990                       or else Parser.In_External_Entity,
3991                    External     => False,
3992                 Already_Read => False));
3993            if Debug_Internal then
3994               Put_Line ("Internal_Entity_Decl: "
3995                         & Parser.Buffer (Name_Id.First .. Name_Id.Last) & "="
3996                         & Parser.Buffer (Def_Start.First .. Def_End.Last)
3997                         & " length="
3998                         & Integer'Image (Def_End.Last - Def_Start.First + 1));
3999            end if;
4000            Internal_Entity_Decl
4001              (Parser,
4002               Name => Parser.Buffer (Is_Parameter.First .. Is_Parameter.Last)
4003               & Parser.Buffer (Name_Id.First .. Name_Id.Last),
4004               Value => Parser.Buffer (Def_Start.First .. Def_End.Last));
4005
4006         elsif Ndata_Id /= Null_Token then
4007            Set (Parser.Entities,
4008                 new Entity_Entry'
4009                   (Name => Find_Symbol
4010                      (Parser,
4011                       Parser.Buffer (Is_Parameter.First .. Is_Parameter.Last)
4012                       & Parser.Buffer (Name_Id.First .. Name_Id.Last)),
4013                    Value        => No_Symbol,
4014                    Public       => No_Symbol,
4015                    Unparsed     => True,
4016                    External_Declaration => (Parser.Inputs /= null
4017                       and then Parser.Inputs.External)
4018                       or else Parser.In_External_Entity,
4019                    External     => False,
4020                 Already_Read => True));
4021            Unparsed_Entity_Decl
4022              (Parser,
4023               Name => Parser.Buffer (Is_Parameter.First .. Is_Parameter.Last)
4024               & Parser.Buffer (Name_Id.First .. Name_Id.Last),
4025               System_Id =>
4026                 Parser.Buffer (System_Start.First .. System_End.Last),
4027               Notation_Name =>
4028                 Parser.Buffer (Ndata_Id.First .. Ndata_Id.Last));
4029
4030         else
4031            Set
4032              (Parser.Entities,
4033               new Entity_Entry'
4034                 (Name => Find_Symbol
4035                    (Parser,
4036                     Parser.Buffer (Is_Parameter.First .. Is_Parameter.Last)
4037                     & Parser.Buffer (Name_Id.First .. Name_Id.Last)),
4038                  Value => Find_Symbol
4039                    (Parser,
4040                     Parser.Buffer (System_Start.First .. System_End.Last)),
4041                  Public       => Find_Symbol
4042                    (Parser,
4043                     Parser.Buffer (Public_Start.First .. Public_End.Last)),
4044                  Unparsed     => False,
4045                  External_Declaration => (Parser.Inputs /= null
4046                     and then Parser.Inputs.External)
4047                     or else Parser.In_External_Entity,
4048                  External     => True,
4049                  Already_Read => False));
4050
4051            External_Entity_Decl
4052              (Parser,
4053               Name => Parser.Buffer (Is_Parameter.First .. Is_Parameter.Last)
4054               & Parser.Buffer (Name_Id.First .. Name_Id.Last),
4055               Public_Id => Parser.Buffer
4056                 (Public_Start.First .. Public_End.Last),
4057               System_Id => Parser.Buffer
4058                 (System_Start.First .. System_End.Last));
4059         end if;
4060
4061         if Is_Parameter /= Null_Token then
4062            Reset_Buffer (Parser, Is_Parameter);
4063         else
4064            Reset_Buffer (Parser, Name_Id);
4065         end if;
4066         Set_State (Parser, DTD_State);
4067      end Parse_Entity_Def;
4068
4069      -----------------------
4070      -- Parse_Element_Def --
4071      -----------------------
4072
4073      procedure Parse_Element_Def (Id : in out Token) is
4074         Name_Id : Token;
4075         M : Element_Model_Ptr;
4076         M2 : Content_Model;
4077         NS_Id : Token;
4078      begin
4079         Set_State (Parser, Element_Def_State);
4080
4081         Next_NS_Token_Skip_Spaces (Input, Parser, NS_Id, Name_Id);
4082
4083         if Name_Id.Typ /= Name then
4084            Fatal_Error (Parser, Error_Is_Name);
4085         end if;
4086
4087         Next_Token_Skip_Spaces (Input, Parser, Id, Must_Have => True);
4088
4089         case Id.Typ is
4090            when Empty  => M := new Element_Model (Empty);
4091            when Any    => M := new Element_Model (Anything);
4092            when Open_Paren =>
4093               Parse_Element_Model
4094                 (Input, Parser, M, Attlist => False, Open_Was_Read => True);
4095            when others =>
4096               Fatal_Error (Parser, "Invalid content model: expecting"
4097                            & " '(', 'EMPTY' or 'ANY'", Id);
4098         end case;
4099         Next_Token_Skip_Spaces (Input, Parser, Id);
4100
4101         if Id.Typ /= End_Of_Tag then
4102            Free (M);
4103            Fatal_Error (Parser, "Expecting end of ELEMENT definition");
4104         end if;
4105
4106         M2 := Create_Model (M);
4107         Element_Decl
4108           (Parser, Parser.Buffer (Name_Id.First .. Name_Id.Last), M2);
4109         Unref (M2);
4110
4111         if NS_Id /= Null_Token then
4112            Reset_Buffer (Parser, NS_Id);
4113         else
4114            Reset_Buffer (Parser, Name_Id);
4115         end if;
4116
4117         Set_State (Parser, DTD_State);
4118      end Parse_Element_Def;
4119
4120      ------------------------
4121      -- Parse_Notation_Def --
4122      ------------------------
4123
4124      procedure Parse_Notation_Def (Id : in out Token) is
4125         Public_Start, Public_End : Token := Null_Token;
4126         System_Start, System_End : Token := Null_Token;
4127         Name_Id : Token;
4128         Sym     : Symbol;
4129      begin
4130         Set_State (Parser, Element_Def_State);
4131         Next_Token_Skip_Spaces (Input, Parser, Name_Id);
4132
4133         Check_Valid_Name_Or_NCname (Parser, Name_Id);
4134
4135         if Name_Id.Typ /= Name then
4136            Fatal_Error (Parser, Error_Is_Name);
4137         end if;
4138
4139         Next_Token_Skip_Spaces (Input, Parser, Id);
4140
4141         if Id.Typ = Public or else Id.Typ = System then
4142            Get_External
4143              (Id, System_Start, System_End, Public_Start, Public_End, True);
4144            if Id.Typ = Space then
4145               Next_Token (Input, Parser, Id);
4146            end if;
4147         else
4148            Fatal_Error (Parser, Error_Invalid_Notation_Decl);
4149         end if;
4150
4151         if Id.Typ /= End_Of_Tag then
4152            Fatal_Error (Parser, "Expecting end of NOTATION definition");
4153         end if;
4154
4155         if Contains_URI_Fragment
4156           (Parser.Buffer (System_Start.First .. System_End.Last))
4157         then
4158            Error (Parser, Error_System_URI);
4159         end if;
4160
4161         if Parser.Hooks.Notation_Decl /= null then
4162            Parser.Hooks.Notation_Decl
4163              (Parser'Access,
4164               Name => Parser.Buffer (Name_Id.First .. Name_Id.Last),
4165               Public_Id =>
4166                 Parser.Buffer (Public_Start.First .. Public_End.Last),
4167               System_Id =>
4168                 Parser.Buffer (System_Start.First .. System_End.Last));
4169         end if;
4170
4171         Notation_Decl
4172           (Parser,
4173            Name => Parser.Buffer (Name_Id.First .. Name_Id.Last),
4174            Public_Id => Parser.Buffer (Public_Start.First .. Public_End.Last),
4175            System_Id =>
4176              Parser.Buffer (System_Start.First .. System_End.Last));
4177
4178         if Parser.Feature_Validation then
4179            Sym := Find_Symbol (Parser, Name_Id);
4180            Remove (Parser.Notations, Sym);
4181            Set (Parser.Notations,
4182                 (Name             => Sym,
4183                  Declaration_Seen => True));
4184         end if;
4185
4186         Set_State (Parser, DTD_State);
4187         Reset_Buffer (Parser, Name_Id);
4188      end Parse_Notation_Def;
4189
4190      -----------------------
4191      -- Parse_Attlist_Def --
4192      -----------------------
4193
4194      procedure Parse_Attlist_Def (Id : in out Token) is
4195         M : Element_Model_Ptr;
4196         M2 : Content_Model;
4197         Default_Start, Default_End : Token;
4198         Ename_Id, Ename_NS_Id, Name_Id, NS_Id, Type_Id : Token;
4199         Default_Id : Token;
4200         Attr : Attributes_Table.Element_Ptr;
4201         Default_Decl : Default_Declaration;
4202         Att_Type : Attribute_Type;
4203         Ename, SName : Symbol;
4204      begin
4205         Set_State (Parser, Element_Def_State);
4206
4207         Next_NS_Token_Skip_Spaces (Input, Parser, Ename_NS_Id, Ename_Id);
4208
4209         if Ename_Id.Typ /= Name then
4210            Fatal_Error (Parser, Error_Is_Name, Ename_Id);
4211         end if;
4212
4213         Ename := Find_Symbol (Parser, Ename_Id);
4214
4215         Attr := Get_Ptr (Parser.Default_Atts, Ename);
4216         if Attr = null then
4217            declare
4218               Attr2 : constant Attributes_Entry :=
4219                 (Element_Name => Ename,
4220                  Attributes   => (0, null));
4221            begin
4222               Set (Parser.Default_Atts, Attr2);
4223               Attr := Get_Ptr (Parser.Default_Atts, Ename);
4224            end;
4225         end if;
4226
4227         if Id.Typ = Space then
4228            Next_Token_Skip_Spaces (Input, Parser, Id);
4229         end if;
4230
4231         loop
4232            --  Temporarily disable In_Attlist, so that the names like "NAME"
4233            --  are parsed as names and not as NMTOKEN.
4234            Set_State (Parser, Attribute_Def_Name_State);
4235
4236            Next_Token_Skip_Spaces (Input, Parser, Id);
4237            exit when Id.Typ = End_Of_Tag or else Id.Typ = End_Of_Input;
4238
4239            Get_Name_NS (Id, NS_Id, Name_Id);
4240            SName := Find_Symbol (Parser, Name_Id);
4241
4242            if Id.Typ /= Space then
4243               Fatal_Error (Parser, Error_Expecting_Space, Id);  --  3.3
4244            end if;
4245
4246            Set_State (Parser, Attribute_Def_State);
4247            Next_Token_Skip_Spaces (Input, Parser, Id);
4248
4249            Type_Id := Id;
4250            Default_Start := Null_Token;
4251            Default_End := Null_Token;
4252            case Type_Id.Typ is
4253               when Id_Type  => Att_Type := Sax.Attributes.Id;
4254               when Idref    => Att_Type := Sax.Attributes.Idref;
4255               when Idrefs   => Att_Type := Sax.Attributes.Idrefs;
4256               when Cdata    => Att_Type := Sax.Attributes.Cdata;
4257               when Nmtoken  => Att_Type := Sax.Attributes.Nmtoken;
4258               when Nmtokens => Att_Type := Sax.Attributes.Nmtokens;
4259               when Entity   => Att_Type := Sax.Attributes.Entity;
4260               when Entities => Att_Type := Sax.Attributes.Entities;
4261               when Notation =>
4262                  Att_Type := Notation;
4263                  Next_Token (Input, Parser, Id);
4264                  if Id.Typ /= Space then
4265                     Fatal_Error
4266                       (Parser,  --  3.3.1
4267                        "Space is required between NOTATION keyword"
4268                        & " and list of enumerated", Id);
4269                  end if;
4270                  Parse_Element_Model (Input, Parser, M, True, False);
4271
4272                  if Parser.Feature_Validation then
4273                     for J in M.List'Range loop
4274                        if Get (Parser.Notations, M.List (J).Name) /=
4275                          Null_Notation
4276                        then
4277                           Error
4278                             (Parser, Error_Notation_Undeclared
4279                              & Get (M.List (J).Name).all, Id);
4280                        end if;
4281                     end loop;
4282                  end if;
4283
4284               when Open_Paren =>
4285                  Att_Type := Enumeration;
4286                  Parse_Element_Model (Input, Parser, M, True, True);
4287
4288               when others =>
4289                  Fatal_Error (Parser, Error_Attlist_Type);
4290            end case;
4291
4292            declare
4293               QName : constant Byte_Sequence :=
4294                 Qname_From_Name (Parser, NS_Id, Name_Id);
4295               Default_Val : Symbol;
4296            begin
4297               Next_Token_Skip_Spaces (Input, Parser, Default_Id, True);
4298               if Default_Id.Typ = Implied then
4299                  Default_Decl := Sax.Attributes.Implied;
4300               elsif Default_Id.Typ = Required then
4301                  Default_Decl := Sax.Attributes.Required;
4302               else
4303                  Id := Default_Id;
4304                  if Default_Id.Typ = Fixed then
4305                     Next_Token_Skip_Spaces (Input, Parser, Id, True);
4306                     Default_Decl := Sax.Attributes.Fixed;
4307                  else
4308                     Default_Decl := Sax.Attributes.Default;
4309                  end if;
4310
4311                  if Id.Typ = Double_String_Delimiter
4312                    or else Id.Typ = Single_String_Delimiter
4313                  then
4314                     Get_String
4315                       (Id, Attlist_Str_Def_State, Default_Start, Default_End,
4316                        Normalize => True, Collapse_Spaces => True);
4317
4318                     --  Errata 9 on XML 1.0 specs: the default value must be
4319                     --  syntactically correct. Validity will only be checked
4320                     --  if the attribute is used.
4321
4322                     Default_Val := Find_Symbol
4323                       (Parser, Default_Start, Default_End);
4324
4325                     if Parser.Feature_Validation then
4326                        Check_Attribute_Value
4327                          (Parser,
4328                           Local_Name => SName,
4329                           Typ       => Att_Type,
4330                           Value     => Default_Val,
4331                           Error_Loc => Default_Start);
4332                     end if;
4333                  else
4334                     Fatal_Error
4335                       (Parser, "Invalid default value for attribute");
4336                  end if;
4337               end if;
4338
4339               if Parser.Feature_Validation
4340                 and then Att_Type = Sax.Attributes.Id
4341                 and then Default_Decl /= Sax.Attributes.Implied
4342                 and then Default_Decl /= Sax.Attributes.Required
4343               then
4344                  Error
4345                    (Parser,
4346                     "Default value for an ID attribute must be"
4347                     & " IMPLIED or REQUIRED",
4348                     Default_Id);
4349               end if;
4350
4351               --  Always report the attribute, even when we know the value
4352               --  won't be used. We can't do it coherently otherwise, in case
4353               --  an attribute is seen in the external subset, and then
4354               --  overriden in the internal subset.
4355               M2 := Create_Model (M);
4356               Attribute_Decl
4357                 (Parser,
4358                  Ename => Parser.Buffer (Ename_Id.First .. Ename_Id.Last),
4359                  Aname => QName,
4360                  Typ   => Att_Type,
4361                  Content => M2,
4362                  Value_Default => Default_Decl,
4363                  Value => Parser.Buffer
4364                    (Default_Start.First .. Default_End.Last));
4365               Unref (M2);
4366
4367               Append
4368                 (List         => Attr.Attributes,
4369                  If_Unique    => True,
4370                  Location     => Name_Id.Location,
4371                  Local_Name   => SName,
4372                  Prefix       => Find_Symbol (Parser, NS_Id),
4373                  Value        => Default_Val,
4374                  Att_Type     => Att_Type,
4375                  Default_Decl => Default_Decl);
4376            end;
4377
4378            --  M will be freed automatically when the Default_Atts field is
4379            --  freed. However, we need to reset it for the next attribute
4380            --  in the list.
4381            M := null;
4382
4383            if NS_Id /= Null_Token then
4384               Reset_Buffer (Parser, NS_Id);
4385            else
4386               Reset_Buffer (Parser, Name_Id);
4387            end if;
4388            Set_State (Parser, Element_Def_State);
4389         end loop;
4390
4391         if Id.Typ /= End_Of_Tag then
4392            Fatal_Error (Parser, "Expecting end of ATTLIST definition");
4393         end if;
4394
4395         Set_State (Parser, DTD_State);
4396
4397         if Ename_NS_Id /= Null_Token then
4398            Reset_Buffer (Parser, Ename_NS_Id);
4399         else
4400            Reset_Buffer (Parser, Ename_Id);
4401         end if;
4402
4403      exception
4404         when others =>
4405            Free (M);
4406            raise;
4407      end Parse_Attlist_Def;
4408
4409      -----------------
4410      -- Check_Model --
4411      -----------------
4412
4413      procedure Check_Model is
4414      begin
4415         null;
4416      end Check_Model;
4417
4418      ----------------
4419      -- Get_String --
4420      ----------------
4421
4422      function Get_String (Str : Token) return String is
4423      begin
4424         return Parser.Buffer (Str.First .. Str.Last);
4425      end Get_String;
4426
4427      ----------------
4428      -- Get_String --
4429      ----------------
4430
4431      function Get_String (First, Last : Token) return String is
4432      begin
4433         return Parser.Buffer (First.First .. Last.Last);
4434      end Get_String;
4435
4436      --------------------------------
4437      -- Check_And_Define_Namespace --
4438      --------------------------------
4439
4440      procedure Check_And_Define_Namespace
4441        (Prefix, URI : Symbol; Location : Sax.Locators.Location) is
4442      begin
4443         if Prefix = Empty_String then
4444            --  [2] Empty value is legal for the default namespace, and
4445            --  provides unbinding
4446            null;
4447
4448         else
4449            if Prefix = Parser.Xmlns_Sequence then
4450               Fatal_Error  --  NS 3
4451                 (Parser, "Cannot redefine the xmlns prefix", Location);
4452
4453            elsif URI = Empty_String then
4454               Fatal_Error
4455                 (Parser,  --  NS 2.2
4456                  "Cannot use an empty URI for namespaces", Location);
4457
4458            elsif Prefix = Parser.Xml_Sequence then
4459               if URI /= Parser.Namespaces_URI_Sequence then
4460                  Fatal_Error  --  NS 3
4461                    (Parser, "Cannot redefine the xml prefix", Location);
4462               end if;
4463
4464            elsif URI = Parser.Namespaces_URI_Sequence then
4465               Fatal_Error
4466                 (Parser,  --  NS 3
4467                  "Cannot bind the namespace URI to a prefix other"
4468                  & " than xml", Location);
4469            end if;
4470         end if;
4471
4472         if URI /= Empty_String
4473           and then not Is_Valid_IRI
4474             (Get (URI).all, Version => Parser.XML_Version)
4475         then
4476            if Parser.Feature_Allow_Relative_IRI then
4477               Warning
4478                 (Parser,
4479                  "Invalid absolute IRI (Internationalized Resource"
4480                  & " Identifier) for namespace: """ & Get (URI).all & """",
4481                  Location);
4482            else
4483               Error
4484                 (Parser,
4485                  "Invalid absolute IRI (Internationalized Resource"
4486                  & " Identifier) for namespace: """ & Get (URI).all & """",
4487                  Location);
4488               --  NS 2
4489            end if;
4490         end if;
4491
4492         Add_Namespace (Parser, Parser.Current_Node, Prefix, URI);
4493      end Check_And_Define_Namespace;
4494
4495      ----------------------------
4496      -- Add_Default_Attributes --
4497      ----------------------------
4498
4499      procedure Add_Default_Attributes
4500        (DTD_Attr : Sax_Attribute_Array_Access)
4501      is
4502         Found    : Boolean;
4503         Is_Xmlns : Boolean;
4504      begin
4505         --  Add all the default attributes to the element.
4506         --  We shouldn't add an attribute if it was overriden by the user
4507
4508         if DTD_Attr /= null then
4509            for J in DTD_Attr'Range loop
4510               --  We must compare Qnames, since namespaces haven't been
4511               --  resolved in the default attributes.
4512               if DTD_Attr (J).Default_Decl = Default
4513                 or else DTD_Attr (J).Default_Decl = Fixed
4514               then
4515                  Found := False;
4516
4517                  for A in 1 .. Parser.Attributes.Count loop
4518                     if Parser.Attributes.List (A).Local_Name =
4519                         DTD_Attr (J).Local_Name
4520                       and then Parser.Attributes.List (A).Prefix =
4521                         DTD_Attr (J).Prefix
4522                     then
4523                        Found := True;
4524                        exit;
4525                     end if;
4526                  end loop;
4527
4528                  if not Found then
4529                     Is_Xmlns := DTD_Attr (J).Prefix = Parser.Xmlns_Sequence;
4530
4531                     if Parser.Feature_Namespace_Prefixes
4532                       or else not Is_Xmlns
4533                     then
4534                        Append
4535                          (List         => Parser.Attributes,
4536                           If_Unique    => True,
4537                           Location     => No_Location,
4538                           Local_Name   => DTD_Attr (J).Local_Name,
4539                           Prefix       => DTD_Attr (J).Prefix,
4540                           Value        => DTD_Attr (J).Value,
4541                           Att_Type     => DTD_Attr (J).Att_Type,
4542                           Default_Decl => DTD_Attr (J).Default_Decl);
4543                     end if;
4544
4545                     --  Is this a namespace declaration ?
4546                     if Is_Xmlns then
4547                        --  Following warning is because for parser that don't
4548                        --  read external DTDs, the behavior would be different
4549                        --  for the same document.
4550                        Warning
4551                          (Parser,
4552                           "namespace-declaring attribute inserted via "
4553                           & "DTD defaulting mechanisms are not good style");
4554                        Add_Namespace
4555                          (Parser, Parser.Current_Node,
4556                           Prefix => DTD_Attr (J).Local_Name,
4557                           URI    => DTD_Attr (J).Value);
4558                     end if;
4559                  end if;
4560               end if;
4561            end loop;
4562         end if;
4563      end Add_Default_Attributes;
4564
4565      ----------------------------------
4566      -- Resolve_Attribute_Namespaces --
4567      ----------------------------------
4568
4569      procedure Resolve_Attribute_Namespaces is
4570         NS         : XML_NS;
4571      begin
4572         if Parser.Feature_Namespace then
4573            for J in 1 .. Parser.Attributes.Count loop
4574               Find_NS (Parser, Parser.Attributes.List (J).Prefix, NS,
4575                        Include_Default_NS => False);
4576               if NS = No_XML_NS then
4577                  Fatal_Error
4578                    (Parser, Error_Prefix_Not_Declared
4579                     & Get (Parser.Attributes.List (J).Prefix).all);
4580               end if;
4581
4582               for A in 1 .. J - 1 loop
4583                  if Parser.Attributes.List (A).URI = Get_URI (NS)
4584                    and then Parser.Attributes.List (A).Local_Name =
4585                      Parser.Attributes.List (J).Local_Name
4586                  then
4587                     Fatal_Error --  3.1
4588                       (Parser, "Attributes may appear only once: "
4589                        & To_QName
4590                          (Get_URI (NS),
4591                           Parser.Attributes.List (J).Local_Name),
4592                        Parser.Attributes.List (J).Location);
4593                  end if;
4594               end loop;
4595
4596               Parser.Attributes.List (J).URI := Get_URI (NS);
4597            end loop;
4598         end if;
4599      end Resolve_Attribute_Namespaces;
4600
4601      ----------------------
4602      -- Parse_Attributes --
4603      ----------------------
4604
4605      procedure Parse_Attributes
4606        (Elem_NS_Id, Elem_Name_Id : Token; Id : in out Token)
4607      is
4608         Elem : constant Symbol := Find_Symbol
4609           (Parser, Qname_From_Name (Parser, Elem_NS_Id, Elem_Name_Id));
4610         Attr : constant Sax_Attribute_List := Get
4611           (Parser.Default_Atts, Elem).Attributes;
4612         --  The attributes as defined in the DTD
4613
4614         Attr_NS_Id   : Token;
4615         Attr_Name_Id : Token;
4616         Value_Start  : Token;
4617         Value_End    : Token;
4618         Add_Attr     : Boolean;
4619         A            : Integer;
4620         Attr_Name, Attr_Prefix, Attr_Value : Symbol;
4621         Attr_Type    : Attribute_Type;
4622
4623         function Find_Declaration return Integer;
4624         --  Return the position of the declaration for Attr_Prefix:Attr_Name
4625         --  in Attr, or -1 if no declaration exists
4626
4627         procedure Check_Required_Attributes;
4628         --  Check whether all required attributes have been defined
4629
4630         ----------------------
4631         -- Find_Declaration --
4632         ----------------------
4633
4634         function Find_Declaration return Integer is
4635         begin
4636            if Attr.List /= null then
4637               --  First test: same prefix and local name. We will test later
4638               --  for a same URI
4639
4640               for A in Attr.List'First .. Attr.Count loop
4641                  if Attr.List (A).Local_Name = Attr_Name
4642                    and then Attr.List (A).Prefix = Attr_Prefix
4643                  then
4644                     return A;
4645                  end if;
4646               end loop;
4647            end if;
4648            return -1;
4649         end Find_Declaration;
4650
4651         -------------------------------
4652         -- Check_Required_Attributes --
4653         -------------------------------
4654
4655         procedure Check_Required_Attributes is
4656            Found : Boolean;
4657         begin
4658            if Parser.Feature_Validation and then Attr.List /= null then
4659               for A in Attr.List'First .. Attr.Count loop
4660                  if Attr.List (A).Default_Decl = Required then
4661                     Found := False;
4662
4663                     for T in 1 .. Parser.Attributes.Count loop
4664                        if Parser.Attributes.List (T).Local_Name =
4665                            Attr.List (A).Local_Name
4666                          and then Parser.Attributes.List (T).Prefix =
4667                            Attr.List (A).Prefix
4668                        then
4669                           Found := True;
4670                           exit;
4671                        end if;
4672                     end loop;
4673
4674                     if not Found then
4675                        Error
4676                          (Parser, "[VC 3.3.2] Required attribute '"
4677                           & To_QName (Attr.List (A).Prefix,
4678                                       Attr.List (A).Local_Name)
4679                           & "' must be defined");
4680                     end if;
4681                  end if;
4682               end loop;
4683            end if;
4684         end Check_Required_Attributes;
4685
4686      begin
4687         Parser.Attributes.Count := 0;
4688
4689         while Id.Typ /= End_Of_Tag
4690           and then Id.Typ /= End_Of_Input
4691           and then Id.Typ /= End_Of_Start_Tag
4692         loop
4693            Get_Name_NS (Id, Attr_NS_Id, Attr_Name_Id);
4694            if Id.Typ = Space then
4695               Next_Token (Input, Parser, Id);
4696            end if;
4697
4698            if Id.Typ /= Equal then
4699               Fatal_Error  --  3.1
4700                 (Parser, "Attributes must have an explicit value", Id);
4701            end if;
4702
4703            Attr_Name   := Find_Symbol (Parser, Attr_Name_Id);
4704            Attr_Prefix := Find_Symbol (Parser, Attr_NS_Id);
4705
4706            A := Find_Declaration;
4707
4708            Next_Token_Skip_Spaces (Input, Parser, Id);
4709            if Id.Typ /= Double_String_Delimiter
4710              and then Id.Typ /= Single_String_Delimiter
4711            then
4712               Fatal_Error  --  3.1
4713                 (Parser, "Attribute values must be quoted", Id);
4714            end if;
4715
4716            --  3.3.3: If the attribute's type is not CDATA, we must
4717            --  normalize it, ie collapse sequence of spaces.
4718            --  ??? What if the information comes from an XML Schema instead
4719            --  of a DTD
4720            --  ??? That should be done only after we have processed the
4721            --  namespaces, otherwise we do not know what attribute we are
4722            --  dealing with
4723            --  In XML Schema 1.1 Part 1, Section 3.1.4, it is indicated that
4724            --  we should always normalize attribute values according to the
4725            --  whitespace property of their type. As a result, we do not
4726            --  normalize here by default if the attribute was registered, and
4727            --  it will be done by the schema parser if we are using one
4728            --  (see Hook_Start_Element).
4729
4730            Get_String
4731              (Id, Attr_Value_State, Value_Start, Value_End,
4732               Normalize       => True,
4733               Collapse_Spaces => A /= -1
4734                  and then Attr.List (A).Att_Type /= Cdata);
4735
4736            Attr_Value := Find_Symbol (Parser, Value_Start, Value_End);
4737            Add_Attr   := True;
4738
4739            --  Is this a namespace declaration ?
4740
4741            if Parser.Feature_Namespace
4742              and then Attr_Prefix = Parser.Xmlns_Sequence
4743            then
4744               Check_And_Define_Namespace
4745                 (Prefix   => Attr_Name,
4746                  URI      => Attr_Value,
4747                  Location => Attr_Name_Id.Location);
4748               Add_Attr := Parser.Feature_Namespace_Prefixes;
4749
4750            --  Is this the declaration of the default namespace (xmlns="uri")
4751
4752            elsif Parser.Feature_Namespace
4753              and then Attr_NS_Id = Null_Token
4754              and then Attr_Name = Parser.Xmlns_Sequence
4755            then
4756               if Get (Attr_Value).all = Xmlns_URI_Sequence
4757                 or else Get (Attr_Value).all = Namespaces_URI_Sequence
4758               then
4759                  Fatal_Error
4760                    (Parser,
4761                     "The xml namespace cannot be declared as the default"
4762                     & " namespace");
4763               end if;
4764
4765               --  We might have a FIXED declaration for this attribute in the
4766               --  DTD, as per the XML Conformance testsuite
4767               if Parser.Feature_Validation
4768                 and then A /= -1
4769               then
4770                  if Attr.List (A).Default_Decl = Fixed
4771                    and then Attr.List (A).Value /= Attr_Value
4772                  then
4773                     Error
4774                       (Parser,
4775                        "[VC 3.3.2] xmlns attribute doesn't match FIXED value",
4776                        Value_Start);
4777                  end if;
4778               end if;
4779
4780               Check_And_Define_Namespace
4781                 (Prefix   => Empty_String,
4782                  URI      => Attr_Value,
4783                  Location => Attr_Name_Id.Location);
4784               Add_Attr := Parser.Feature_Namespace_Prefixes;
4785
4786            else
4787               --  All attributes must be defined (including xml:lang, that
4788               --  requires additional testing afterwards)
4789               if Parser.Feature_Validation then
4790                  if Attr.List = null then
4791                     Error
4792                       (Parser, "[VC] No attribute allowed for element "
4793                        & Get (Parser.Current_Node.Name).all,
4794                        Attr_Name_Id);
4795                  elsif A = -1 then
4796                     Error
4797                       (Parser, "[VC] Attribute not declared in DTD: "
4798                        & To_QName (Attr_Prefix, Attr_Name),
4799                        Attr_Name_Id);
4800                  end if;
4801               end if;
4802
4803               if Get_String (Attr_NS_Id) = Xml_Sequence then
4804                  if Get_String (Attr_Name_Id) = Lang_Sequence then
4805                     Test_Valid_Lang
4806                       (Parser, Get_String (Value_Start, Value_End));
4807
4808                  elsif Get_String (Attr_Name_Id) = Space_Word_Sequence then
4809                     Test_Valid_Space
4810                       (Parser, Get_String (Value_Start, Value_End));
4811                  end if;
4812               end if;
4813            end if;
4814
4815            --  Register the attribute in the temporary list, until we can
4816            --  properly resolve namespaces
4817
4818            if Add_Attr then
4819               if Debug_Internal then
4820                  Put_Line
4821                    ("Register attribute: "
4822                     & Qname_From_Name (Parser, Attr_NS_Id, Attr_Name_Id)
4823                     & " value=" & Get_String (Value_Start, Value_End));
4824               end if;
4825
4826               if A /= -1 then
4827                  if Attr.List (A).Default_Decl = Fixed
4828                    and then Attr.List (A).Value /= Attr_Value
4829                  then
4830                     Error
4831                       (Parser, "[VC 3.3.2] Fixed attribute '"
4832                        & To_QName (Attr_Prefix, Attr_Name)
4833                        & "' must have the defined value",
4834                        Attr_Name_Id.Location);
4835                  end if;
4836
4837                  Attr_Type := Attr.List (A).Att_Type;
4838               else
4839                  Attr_Type := Cdata;
4840               end if;
4841
4842               Append
4843                 (List       => Parser.Attributes,
4844                  If_Unique  => False,
4845                  Location   => Attr_Name_Id.Location,
4846                  Local_Name => Attr_Name,
4847                  Prefix     => Attr_Prefix,
4848                  Att_Type   => Attr_Type,
4849                  Value      => Attr_Value);
4850            end if;
4851
4852            if Attr_NS_Id /= Null_Token then
4853               Reset_Buffer (Parser, Attr_NS_Id);
4854            else
4855               Reset_Buffer (Parser, Attr_Name_Id);
4856            end if;
4857
4858            Next_Token (Input, Parser, Id);
4859            if Id.Typ = Space then
4860               Next_Token (Input, Parser, Id);
4861            elsif Id.Typ /= End_Of_Tag and then Id.Typ /= End_Of_Start_Tag then
4862               Fatal_Error (Parser, Error_Expecting_Space, Id);
4863            end if;
4864         end loop;
4865
4866         Check_Required_Attributes;
4867
4868         Add_Default_Attributes (Attr.List);
4869
4870         --  Check attribute values. We must do that after adding the default
4871         --  attributes, so that they are properly checked as well. It would be
4872         --  nice to be able to check them only once, but that can't be done
4873         --  when they are declared (since they might be referencing entities
4874         --  declared after them in the DTD)
4875
4876         if Parser.Feature_Validation then
4877            for Att in 1 .. Parser.Attributes.Count loop
4878               Check_Attribute_Value
4879                 (Parser,
4880                  Local_Name => Parser.Attributes.List (Att).Local_Name,
4881                  Typ        => Parser.Attributes.List (Att).Att_Type,
4882                  Value      => Parser.Attributes.List (Att).Value,
4883                  Error_Loc  => Elem_Name_Id);
4884            end loop;
4885         end if;
4886      end Parse_Attributes;
4887
4888      ---------------------
4889      -- Parse_Start_Tag --
4890      ---------------------
4891
4892      procedure Parse_Start_Tag is
4893         Open_Id : constant Token := Id;
4894         Elem_Name_Id, Elem_NS_Id : Token;
4895         NS : XML_NS;
4896
4897      begin
4898         Set_State (Parser, Tag_State);
4899
4900         Parser.Current_Node := new Element'
4901           (NS             => No_XML_NS,
4902            Name           => No_Symbol,
4903            Namespaces     => No_XML_NS,
4904            Start          => Id.Location,
4905            Start_Tag_End  => Id.Location,
4906            Parent         => Parser.Current_Node);
4907
4908         Next_Token (Input, Parser, Id);
4909         Get_Name_NS (Id, Elem_NS_Id, Elem_Name_Id);
4910
4911         Parser.Current_Node.Name := Find_Symbol (Parser, Elem_Name_Id);
4912
4913         if Parser.Current_Node.Parent = null then
4914            Parser.Num_Toplevel_Elements := Parser.Num_Toplevel_Elements + 1;
4915            if Parser.Num_Toplevel_Elements > 1 then
4916               Fatal_Error   --  2.1
4917                 (Parser, "Too many children for top-level node,"
4918                  & " when adding <"
4919                  & Qname_From_Name (Parser, Elem_NS_Id, Elem_Name_Id)
4920                  & ">", Open_Id);
4921            end if;
4922
4923            if Parser.Feature_Validation then
4924               if Parser.DTD_Name = No_Symbol then
4925                  Error  --  VC 2.8
4926                    (Parser, "No DTD defined for this document", Id);
4927
4928               elsif Parser.DTD_Name /= Parser.Current_Node.Name then
4929                  Error
4930                    (Parser, "[VC 2.8] Name of root element doesn't match name"
4931                     & " of DTD ('"
4932                     & Get (Parser.DTD_Name).all & "')", Id);
4933               end if;
4934            end if;
4935
4936         elsif Parser.Feature_Validation then
4937            Check_Model;
4938         end if;
4939
4940         if Elem_NS_Id /= Null_Token
4941           and then Get_String (Elem_NS_Id) = Xmlns_Sequence
4942         then
4943            Fatal_Error (Parser, "Elements must not have the prefix xmlns");
4944         end if;
4945
4946         --  Call the hook before checking the attributes. This might mean we
4947         --  are passing incorrect attributes (or missing ones), but the hook
4948         --  is used for validation (otherwise standard users should use
4949         --  Start_Element itself).
4950         --  We want the count of elements in the NS to not include the current
4951         --  context.
4952
4953         if Debug_Internal then
4954            Put_Line
4955              ("Start_Element "
4956               & Qname_From_Name (Parser, Elem_NS_Id, Elem_Name_Id));
4957         end if;
4958
4959         --  We need to process the attributes first, because they might define
4960         --  the namespace for the element
4961
4962         if Id.Typ = Space then
4963            Next_Token (Input, Parser, Id);
4964            Parse_Attributes (Elem_NS_Id, Elem_Name_Id, Id);
4965
4966         elsif Id.Typ /= End_Of_Tag
4967           and then Id.Typ /= End_Of_Start_Tag
4968         then
4969            Fatal_Error (Parser, Error_Expecting_Space, Id);
4970
4971         else
4972            --  We still need to check the attributes, in case we have none but
4973            --  some where required
4974            Parse_Attributes (Elem_NS_Id, Elem_Name_Id, Id);
4975         end if;
4976
4977         Resolve_Attribute_Namespaces;
4978
4979         --  And report the elements to the callbacks
4980
4981         Set_State (Parser, Default_State);
4982         Find_NS (Parser, Elem_NS_Id, NS);
4983
4984         Parser.Current_Node.NS := NS;
4985
4986         if Parser.Hooks.Start_Element /= null then
4987            Parser.Hooks.Start_Element
4988              (Parser'Unchecked_Access, Parser.Current_Node,
4989               Parser.Attributes'Access);
4990         end if;
4991
4992         --  This does not take into account the use of the namespace by the
4993         --  attributes.
4994         --  ??? That would be costly to again do a Find_NS for each of the
4995         --  attributes. ??? We don't do a Find_NS anymore, so that would be
4996         --  doable in fact.
4997         Increment_Count (NS);
4998
4999         Parser.Current_Node.Start_Tag_End := Get_Location (Parser.Locator);
5000
5001         pragma Warnings (Off, "overlaps with actual");
5002         Start_Element
5003           (Parser,
5004            NS         => NS,
5005            Local_Name => Parser.Current_Node.Name,
5006            Atts       => Parser.Attributes);
5007         pragma Warnings (On, "overlaps with actual");
5008
5009         if Id.Typ = End_Of_Start_Tag then
5010            End_Element;
5011         end if;
5012
5013         if Elem_NS_Id /= Null_Token then
5014            Reset_Buffer (Parser, Elem_NS_Id);
5015         else
5016            Reset_Buffer (Parser, Elem_Name_Id);
5017         end if;
5018
5019         if Id.Typ = End_Of_Input then
5020            Fatal_Error (Parser, "Unexpected end of stream");
5021         end if;
5022      end Parse_Start_Tag;
5023
5024      ----------------------------
5025      -- Parse_Doctype_Contents --
5026      ----------------------------
5027
5028      procedure Parse_Doctype_Contents is
5029         Start_Id : Symbol;
5030
5031         Num_Include : Natural := 0;
5032         --  Number of <![INCLUDE[ sections at the top of the external
5033         --  subset.
5034
5035         Num_Ignore : Natural := 0;
5036         --  Number of <![IGNORE[ and <![INCLUDE[ sections, starting at the
5037         --  first ignore section.
5038      begin
5039         loop
5040            Next_Token_Skip_Spaces (Input, Parser, Id);
5041            Start_Id := Id.Location.System_Id;
5042
5043            if Id.Typ = Ignore then
5044               Num_Ignore := Num_Ignore + 1;
5045
5046            elsif Id.Typ = Include or else Id.Typ = Start_Conditional then
5047               if Num_Ignore > 0 then
5048                  Num_Ignore := Num_Ignore + 1;
5049               else
5050                  Num_Include := Num_Include + 1;
5051               end if;
5052
5053            elsif Id.Typ = End_Conditional then
5054               if Num_Include + Num_Ignore = 0 then
5055                  Fatal_Error (Parser, Error_Unexpected_Chars3, Id);
5056               elsif Num_Ignore > 0 then
5057                  Num_Ignore := Num_Ignore - 1;
5058               else
5059                  Num_Include := Num_Include  - 1;
5060               end if;
5061
5062            elsif Id.Typ = End_Of_Input then
5063               exit;
5064
5065            elsif Num_Ignore = 0 then
5066               case Id.Typ is
5067                  when End_Of_Tag | Internal_DTD_End =>
5068                     exit;
5069                  when Entity_Def => Parse_Entity_Def (Id);
5070                  when Element_Def => Parse_Element_Def (Id);
5071                  when Notation => Parse_Notation_Def (Id);
5072                  when Attlist_Def => Parse_Attlist_Def (Id);
5073                  when Text | Name =>
5074                     if Id.First < Id.Last then
5075                        Fatal_Error
5076                          (Parser,  "Unexpected character in the DTD");
5077                     else
5078                        Reset_Buffer (Parser, Id);
5079                     end if;
5080                  when Comment =>
5081                     Comment (Parser, Parser.Buffer (Id.First .. Id.Last));
5082                     Reset_Buffer (Parser, Id);
5083                  when Start_Of_PI =>
5084                     Parse_PI (Id);
5085                  when others =>
5086                     Fatal_Error  --  2.8
5087                       (Parser, "Element not allowed in the DTD", Id);
5088               end case;
5089
5090            else
5091               Reset_Buffer (Parser, Id);
5092            end if;
5093
5094            --  XML 1.0 Errata 14 or XML 1.1 section 4.3.2: nesting of entities
5095            --  doesn't apply for well-formedness in the DTD
5096            if Parser.Feature_Validation then
5097               if Start_Id /= Id.Location.System_Id then
5098                  Error (Parser, Error_Entity_Self_Contained, Id);
5099               end if;
5100            end if;
5101         end loop;
5102
5103         if Num_Ignore + Num_Include /= 0 then
5104            Fatal_Error  --  3.4
5105              (Parser, "Conditional section must be properly terminated",
5106               Id);
5107         end if;
5108      end Parse_Doctype_Contents;
5109
5110      -------------------
5111      -- Parse_Doctype --
5112      -------------------
5113
5114      procedure Parse_Doctype is
5115         Public_Start, Public_End : Token := Null_Token;
5116         System_Start, System_End : Token := Null_Token;
5117         Name_Id : Token;
5118         NS_Id : Token;
5119      begin
5120         Set_State (Parser, DTD_State);
5121
5122         Next_NS_Token_Skip_Spaces (Input, Parser, NS_Id, Name_Id);
5123
5124         if Name_Id.Typ /= Name then
5125            Fatal_Error (Parser, "Expecting name after <!DOCTYPE");
5126         end if;
5127
5128         Next_Token_Skip_Spaces (Input, Parser, Id);
5129
5130         Get_External (Id, System_Start, System_End, Public_Start, Public_End);
5131         if Id.Typ = Space then
5132            Next_Token (Input, Parser, Id);
5133         end if;
5134         Start_DTD
5135           (Parser,
5136            Name => Parser.Buffer (Name_Id.First .. Name_Id.Last),
5137            Public_Id => Parser.Buffer (Public_Start.First .. Public_End.Last),
5138            System_Id =>
5139              Parser.Buffer (System_Start.First .. System_End.Last));
5140
5141         if Parser.Feature_Validation then
5142            Parser.DTD_Name := Find_Symbol (Parser, Name_Id);
5143         end if;
5144
5145         if Id.Typ = Internal_DTD_Start then
5146            Parse_Doctype_Contents;
5147            if Id.Typ /= Internal_DTD_End then
5148               Fatal_Error  --  2.8
5149                 (Parser, "Expecting end of internal subset ']>'", Id);
5150            end if;
5151         elsif Id.Typ /= End_Of_Tag then
5152            Fatal_Error (Parser, "Expecting end of DTD");
5153         end if;
5154
5155         --  Read the external subset if required. This needs to be read
5156         --  after the internal subset only, so that the latter gets
5157         --  priority (XML specifications 2.8)
5158         if System_End.Last >= System_Start.First then
5159            declare
5160               Loc : constant Sax.Locators.Location :=
5161                 Get_Location (Parser.Locator);
5162               System : constant Symbol :=
5163                 Find_Symbol
5164                   (Parser,
5165                    Parser.Buffer (System_Start.First .. System_End.Last));
5166               URI : constant Symbol :=
5167                 Resolve_URI (Parser, System_Id (Parser), System);
5168               In_External : constant Boolean := Parser.In_External_Entity;
5169               Input_F : File_Input;
5170               Saved_Last_Read : constant Unicode_Char := Parser.Last_Read;
5171            begin
5172               Open (Get (URI).all, Input_F);
5173
5174               --  Protect against the case where the last character read was
5175               --  a LineFeed.
5176               Parser.Last_Read := Unicode_Char'Val (16#00#);
5177               Parser.Last_Read_Is_Valid := False;
5178
5179               Set_Line_Number (Parser.Locator, 1);
5180               Set_Column_Number (Parser.Locator, Prolog_Size (Input_F));
5181               Set_System_Id (Parser.Locator, URI);
5182               Set_Public_Id (Parser.Locator, System);
5183
5184               if NS_Id /= Null_Token then
5185                  Reset_Buffer (Parser, NS_Id);
5186               else
5187                  Reset_Buffer (Parser, Name_Id);
5188               end if;
5189
5190               Parser.In_External_Entity := True;
5191
5192               Syntactic_Parse (Parser, Input_F);
5193               Close (Input_F);
5194               Parser.In_External_Entity := In_External;
5195
5196               Set_Location (Parser.Locator, Loc);
5197               Parser.Last_Read := Saved_Last_Read;
5198               Parser.Last_Read_Is_Valid := True;
5199            exception
5200               when Name_Error =>
5201                  Close (Input_F);
5202                  Error
5203                    (Parser,
5204                     "External subset not found: "
5205                     & Parser.Buffer (System_Start.First .. System_End.Last),
5206                     Id);
5207
5208                  if NS_Id /= Null_Token then
5209                     Reset_Buffer (Parser, NS_Id);
5210                  else
5211                     Reset_Buffer (Parser, Name_Id);
5212                  end if;
5213
5214               when others =>
5215                  Close (Input_F);
5216                  raise;
5217            end;
5218
5219         else
5220            if NS_Id /= Null_Token then
5221               Reset_Buffer (Parser, NS_Id);
5222            else
5223               Reset_Buffer (Parser, Name_Id);
5224            end if;
5225         end if;
5226
5227         --  Check that all declarations are fully declared
5228         if Parser.Feature_Validation then
5229            declare
5230               Iter : Notations_Table.Iterator := First (Parser.Notations);
5231            begin
5232               while Iter /= Notations_Table.No_Iterator loop
5233                  if not Current (Iter).Declaration_Seen then
5234                     Error (Parser, Error_Notation_Undeclared
5235                            & Get (Current (Iter).Name).all);
5236                  end if;
5237                  Next (Parser.Notations, Iter);
5238               end loop;
5239            end;
5240         end if;
5241
5242         Parser.In_External_Entity := False;
5243         End_DTD (Parser);
5244         Set_State (Parser, Default_State);
5245      end Parse_Doctype;
5246
5247      -----------------
5248      -- End_Element --
5249      -----------------
5250
5251      procedure End_Element is
5252      begin
5253         if Parser.Hooks.End_Element /= null then
5254            Parser.Hooks.End_Element
5255              (Parser'Unchecked_Access, Parser.Current_Node);
5256         end if;
5257
5258         End_Element
5259           (Parser, NS => Parser.Current_Node.NS,
5260            Local_Name => Parser.Current_Node.Name);
5261
5262         --  Tag must end in the same entity
5263         if Parser.Feature_Validation
5264           and then
5265             Id.Location.System_Id /= Parser.Current_Node.Start.System_Id
5266         then
5267            Error (Parser, Error_Entity_Self_Contained, Id);
5268         end if;
5269
5270         Close_Namespaces (Parser, Parser.Current_Node.Namespaces);
5271
5272         --  Move back to the parent node (after freeing the current node)
5273         Free (Parser.Current_Node);
5274      end End_Element;
5275
5276      -------------------
5277      -- Parse_End_Tag --
5278      -------------------
5279
5280      procedure Parse_End_Tag is
5281         Open_Id : constant Token := Id;
5282         NS_Id, Name_Id : Token := Null_Token;
5283      begin
5284         Set_State (Parser, Tag_State);
5285
5286         Next_Token (Input, Parser, Id);
5287         Get_Name_NS (Id, NS_Id, Name_Id);
5288         if Id.Typ = Space then
5289            Next_Token (Input, Parser, Id);
5290         end if;
5291
5292         if Id.Typ /= End_Of_Tag then
5293            Fatal_Error (Parser, "Tags must end with a '>' symbol", Id);
5294            --  3.1
5295         end if;
5296
5297         if Parser.Current_Node = null then
5298            Fatal_Error --  3
5299              (Parser, "No start tag found for this end tag", Id);
5300         end if;
5301
5302         --  Tag must end in the same entity
5303         if Parser.Feature_Validation
5304           and then Id.Location.System_Id /=
5305             Parser.Current_Node.Start.System_Id
5306         then
5307            Error (Parser, Error_Entity_Self_Contained, Id);
5308         end if;
5309
5310         if Parser.Current_Node = null then
5311            Fatal_Error
5312              (Parser,  --  WF element type match
5313               "Unexpected closing tag", Open_Id);
5314
5315         elsif Parser.Buffer (NS_Id.First .. NS_Id.Last) /=
5316           Get (Get_Prefix (Parser.Current_Node.NS)).all
5317           or else Parser.Buffer (Name_Id.First .. Name_Id.Last) /=
5318           Get (Parser.Current_Node.Name).all
5319         then
5320            --  Well-Formedness Constraint: Element Type Match
5321            if Get_Prefix (Parser.Current_Node.NS) /= Empty_String then
5322               Fatal_Error
5323                 (Parser,  --  WF element type match
5324                  "Name differ for closing tag (expecting "
5325                  & Get (Get_Prefix (Parser.Current_Node.NS)).all
5326                  & ':' & Get (Parser.Current_Node.Name).all
5327                  & ", opened line"
5328                  & Integer'Image (Parser.Current_Node.Start.Line)
5329                  & ')',
5330                  Open_Id);
5331            else
5332               Fatal_Error
5333                 (Parser, --  WF element type match
5334                  "Name differ for closing tag ("
5335                  & "expecting " & Get (Parser.Current_Node.Name).all
5336                  & ", opened line"
5337                  & Integer'Image (Parser.Current_Node.Start.Line)
5338                  & ')',
5339                  Open_Id);
5340            end if;
5341         end if;
5342
5343         End_Element;
5344
5345         Set_State (Parser, Default_State);
5346         if NS_Id /= Null_Token then
5347            Reset_Buffer (Parser, NS_Id);
5348         else
5349            Reset_Buffer (Parser, Name_Id);
5350         end if;
5351      end Parse_End_Tag;
5352
5353      -------------------------
5354      -- Check_Version_Value --
5355      -------------------------
5356
5357      procedure Check_Version_Value (Id : in out Token) is
5358         C : Unicode_Char;
5359         J : Natural;
5360         Value_Start, Value_End : Token;
5361         Tmp_Version : XML_Versions;
5362      begin
5363         Next_Token_Skip_Spaces (Input, Parser, Id);
5364         if Id.Typ /= Equal then
5365            Fatal_Error (Parser, "Expecting '=' sign", Id);
5366         end if;
5367
5368         Next_Token_Skip_Spaces (Input, Parser, Id);
5369         if Id.Typ /= Double_String_Delimiter
5370           and then Id.Typ /= Single_String_Delimiter
5371         then
5372            Fatal_Error (Parser, "Expecting version value", Id);
5373         end if;
5374         Get_String (Id, Attr_Value_State, Value_Start, Value_End);
5375
5376         J := Value_Start.First;
5377         while J <= Value_End.Last loop
5378            Encoding.Read (Parser.Buffer.all, J, C);
5379            if not (C in Latin_Small_Letter_A .. Latin_Small_Letter_Z)
5380              and then
5381                 not (C in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z)
5382              and then not (C in Digit_Zero .. Digit_Nine)
5383              and then C /= Low_Line
5384              and then C /= Full_Stop
5385              and then C /= Unicode.Names.Basic_Latin.Colon
5386              and then C /= Hyphen_Minus
5387            then
5388               Fatal_Error  --  2.8
5389                 (Parser, "Illegal version number in <?xml?> processing"
5390                  & " instruction", Value_Start);
5391            end if;
5392         end loop;
5393
5394         if Parser.Buffer (Value_Start.First .. Value_End.Last) = "1.1" then
5395            Tmp_Version := XML_1_1;
5396
5397         elsif Parser.Buffer (Value_Start.First .. Value_End.Last) = "1.0" then
5398            Tmp_Version := XML_1_0;
5399
5400         else
5401            case Parser.XML_Version is
5402               when XML_1_0_Third_Edition
5403                  | XML_1_0_Fourth_Edition =>
5404                  Error
5405                    (Parser, "Unsupported version of XML: "
5406                     & Parser.Buffer (Value_Start.First .. Value_End.Last));
5407
5408               when XML_1_0_Fifth_Edition
5409                  | XML_1_0
5410                  | XML_1_1 =>
5411                  null;
5412            end case;
5413         end if;
5414
5415         if Parser.In_External_Entity
5416           and then
5417             ((Tmp_Version = XML_1_1
5418               and then Parser.XML_Version /= XML_1_1)
5419              or else
5420                (Tmp_Version /= XML_1_1
5421                 and then Parser.XML_Version = XML_1_1))
5422         then
5423            Fatal_Error
5424              (Parser,
5425               "External entity doesn't have the same"
5426               & " XML version as document");
5427         end if;
5428
5429         --  Override the version in the parser, but only if the one set
5430         --  doesn't match yet. In particular, this allows users to set their
5431         --  preferred edition of XML 1.0
5432
5433         if Tmp_Version = XML_1_1
5434           and then Parser.XML_Version /= XML_1_1
5435         then
5436            Parser.XML_Version := XML_1_1;
5437         elsif Tmp_Version = XML_1_0
5438           and then Parser.XML_Version = XML_1_1
5439         then
5440            Parser.XML_Version := XML_1_0;
5441         end if;
5442
5443         Next_Token (Input, Parser, Id);
5444         if Id.Typ = Space then
5445            Next_Token (Input, Parser, Id);
5446         elsif Id.Typ /= End_Of_PI then
5447            Fatal_Error (Parser, "values must be separated by spaces", Id);
5448         end if;
5449      end Check_Version_Value;
5450
5451      --------------------------
5452      -- Check_Encoding_Value --
5453      --------------------------
5454
5455      procedure Check_Encoding_Value (Id : in out Token) is
5456         Inp : Input_Source_Access := Input'Unchecked_Access;
5457         C : Unicode_Char;
5458         J : Natural;
5459         Value_Start, Value_End : Token;
5460         Tmp : Positive;
5461      begin
5462         --  If we are parsing an external entity, everything applies to it.
5463         --  See test xmltest/valid/ext-sa/008.xml
5464         if Parser.Inputs /= null then
5465            Inp := Parser.Inputs.Input;
5466         end if;
5467
5468         Next_Token_Skip_Spaces (Inp.all, Parser, Id);
5469         if Id.Typ /= Equal then
5470            Fatal_Error (Parser, "Expecting '=' sign");
5471         end if;
5472
5473         Next_Token_Skip_Spaces (Inp.all, Parser, Id);
5474         if Id.Typ /= Double_String_Delimiter
5475           and then Id.Typ /= Single_String_Delimiter
5476         then
5477            Fatal_Error (Parser, "Expecting encoding value");
5478         end if;
5479         Get_String (Id, Attr_Value_State, Value_Start, Value_End);
5480
5481         if Value_End.Last < Value_Start.First then
5482            Fatal_Error   --  4.3.3
5483              (Parser, "Empty value for encoding not allowed");
5484         else
5485            Tmp := Value_Start.First;
5486            Encoding.Read (Parser.Buffer.all, Tmp, C);
5487            if not (C in Latin_Small_Letter_A .. Latin_Small_Letter_Z)
5488              and then not
5489                (C in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z)
5490            then
5491               Fatal_Error   --  4.3.3
5492                 (Parser, "Illegal character '"
5493                  & Debug_Encode (C) & "' in encoding value", Value_Start);
5494            end if;
5495
5496            J := Value_Start.First + Encoding.Width (C);
5497            while J <= Value_End.Last loop
5498               Encoding.Read (Parser.Buffer.all, J, C);
5499               if not (C in Latin_Small_Letter_A .. Latin_Small_Letter_Z)
5500                 and then not
5501                   (C in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z)
5502                 and then not (C in Digit_Zero .. Digit_Nine)
5503                 and then C /= Full_Stop
5504                 and then C /= Low_Line
5505                 and then C /= Hyphen_Minus
5506               then
5507                  Fatal_Error  --  4.3.3
5508                    (Parser, "Illegal character '"
5509                     & Debug_Encode (C) & "' in encoding value",
5510                     Value_Start);
5511               end if;
5512            end loop;
5513         end if;
5514
5515         --  Check we indeed have a following space
5516
5517         Next_Token (Inp.all, Parser, Id);
5518         if Id.Typ = Space then
5519            Next_Token (Inp.all, Parser, Id);
5520         elsif Id.Typ /= End_Of_PI then
5521            Fatal_Error (Parser, "values must be separated by spaces", Id);
5522         end if;
5523
5524         --  Change the encoding for the streams, if needed
5525         Set_Stream_Encoding
5526           (Inp.all, Parser.Buffer (Value_Start.First .. Value_End.Last));
5527      end Check_Encoding_Value;
5528
5529      ----------------------------
5530      -- Check_Standalone_Value --
5531      ----------------------------
5532
5533      procedure Check_Standalone_Value (Id : in out Token) is
5534         Value_Start, Value_End : Token;
5535      begin
5536         Next_Token_Skip_Spaces (Input, Parser, Id);
5537         if Id.Typ /= Equal then
5538            Fatal_Error (Parser, "Expecting '=' sign");
5539         end if;
5540
5541         Next_Token_Skip_Spaces (Input, Parser, Id);
5542         if Id.Typ /= Double_String_Delimiter
5543           and then Id.Typ /= Single_String_Delimiter
5544         then
5545            Fatal_Error
5546              (Parser, "Parameter to 'standalone' must be quoted", Id);
5547         end if;
5548         Get_String (Id, Attr_Value_State, Value_Start, Value_End);
5549
5550         if Parser.Buffer (Value_Start.First .. Value_End.Last) /= Yes_Sequence
5551           and then Parser.Buffer (Value_Start.First .. Value_End.Last) /=
5552             No_Sequence
5553         then
5554            Fatal_Error
5555              (Parser,   --  2.9 [32]
5556               "Invalid value for standalone parameter in <?xml?>",
5557               Value_Start);
5558         end if;
5559
5560         Parser.Standalone_Document :=
5561           Parser.Buffer (Value_Start.First .. Value_End.Last) =
5562           Yes_Sequence;
5563
5564         Next_Token (Input, Parser, Id);
5565         if Id.Typ = Space then
5566            Next_Token (Input, Parser, Id);
5567         elsif Id.Typ /= End_Of_PI then
5568            Fatal_Error (Parser, "values must be separated by spaces", Id);
5569         end if;
5570      end Check_Standalone_Value;
5571
5572      --------------
5573      -- Parse_PI --
5574      --------------
5575
5576      procedure Parse_PI (Id : in out Token) is
5577         State : constant Parser_State := Get_State (Parser);
5578         Open_Id : constant Token := Id;
5579         Name_Id, Data_Start : Token;
5580         Data_End : Token := Null_Token;
5581      begin
5582         Set_State (Parser, PI_State);
5583
5584         Next_Token (Input, Parser, Name_Id);
5585         if Name_Id.Typ /= Name then
5586            Fatal_Error
5587              (Parser,  --  2.6
5588               "Processing Instruction must specify a target name",
5589               Name_Id);
5590         end if;
5591
5592         Check_Valid_Name_Or_NCname (Parser, Name_Id);
5593
5594         Next_Token (Input, Parser, Id);
5595         if Id.Typ /= Space and then Id.Typ /= End_Of_PI then
5596            Fatal_Error (Parser, "Must have space between target and data");
5597         elsif Id.Typ = Space then
5598            Next_Token (Input, Parser, Id);
5599         end if;
5600
5601         --  Special handling for <?xml?>
5602         if Parser.Buffer (Name_Id.First .. Name_Id.Last) = Xml_Sequence then
5603
5604            if Open_Id.Location.Line /= 1
5605              or else
5606                (Parser.Inputs = null
5607                 and then Open_Id.Location.Column /= 1 + Prolog_Size (Input))
5608              or else
5609                (Parser.Inputs /= null
5610                 and then Open_Id.Location.Column /=
5611                   1 + Prolog_Size (Parser.Inputs.Input.all))
5612              or else (Parser.Inputs /= null
5613                       and then not Parser.Inputs.External)
5614            then
5615               Fatal_Error
5616                 (Parser,   --  2.8
5617                  "<?xml?> instruction must be first in document",
5618                  Open_Id);
5619            end if;
5620
5621            --  ??? No true for text declaratinos 4.3.1 (external parsed
5622            --  entities)
5623            Set_State (Parser, Tag_State);
5624
5625            if Parser.Buffer (Id.First .. Id.Last) = Version_Sequence then
5626               Check_Version_Value (Id);
5627            elsif not Parser.In_External_Entity then
5628               Fatal_Error
5629                 (Parser, "'version' must be the first argument to <?xml?>",
5630                  Id);
5631            end if;
5632
5633            if Id.Typ = Name
5634              and then Parser.Buffer (Id.First .. Id.Last) = Encoding_Sequence
5635            then
5636               Check_Encoding_Value (Id);
5637            elsif Parser.In_External_Entity then
5638               Fatal_Error
5639                 (Parser, "'encoding' must be specified for <?xml?> in"
5640                  & " external entities", Id);
5641            end if;
5642
5643            if not Parser.In_External_Entity
5644              and then Id.Typ = Name
5645              and then Parser.Buffer (Id.First .. Id.Last) =
5646                Standalone_Sequence
5647            then
5648               Check_Standalone_Value (Id);
5649            end if;
5650
5651            if Id.Typ /= End_Of_PI then
5652               if Parser.In_External_Entity then
5653                  Fatal_Error
5654                    (Parser,
5655                     "Text declarations <?xml?> in external entity cannot"
5656                     & " specify parameters other than 'version' and"
5657                     & " 'encoding'", Id);
5658               else
5659                  Fatal_Error
5660                    (Parser,
5661                     "<?xml..?> arguments can only be 'version', 'encoding' or"
5662                     & " 'standalone', in that order", Id);
5663               end if;
5664            end if;
5665
5666         else
5667            --  (2.6)[17]: Name can not be 'xml' (case insensitive)
5668            declare
5669               C : Unicode_Char;
5670               J : Natural := Name_Id.First;
5671            begin
5672               Encoding.Read (Parser.Buffer.all, J, C);
5673
5674               if C = Latin_Small_Letter_X
5675                 or else C = Latin_Capital_Letter_X
5676               then
5677                  Encoding.Read (Parser.Buffer.all, J, C);
5678
5679                  if C = Latin_Capital_Letter_M
5680                    or else C = Latin_Small_Letter_M
5681                  then
5682                     Encoding.Read (Parser.Buffer.all, J, C);
5683
5684                     if (C = Latin_Capital_Letter_L
5685                         or else C = Latin_Small_Letter_L)
5686                       and then J = Name_Id.Last + 1
5687                     then
5688                        Fatal_Error
5689                          (Parser,   --  2.6
5690                           "'"
5691                           & Parser.Buffer (Name_Id.First .. Name_Id.Last)
5692                           & "' is not a valid processing instruction target",
5693                           Name_Id);
5694                     end if;
5695                  end if;
5696               end if;
5697            end;
5698
5699            Data_Start := Id;
5700
5701            while Id.Typ /= End_Of_PI and then Id.Typ /= End_Of_Input loop
5702               Data_End := Id;
5703
5704               if Id.Typ = Double_String_Delimiter then
5705                  Put_In_Buffer (Parser, """");
5706                  Data_End.Last := Data_End.Last + 1;
5707               elsif Id.Typ = Single_String_Delimiter then
5708                  Put_In_Buffer (Parser, "'");
5709                  Data_End.Last := Data_End.Last + 1;
5710               end if;
5711
5712               Next_Token (Input, Parser, Id);
5713            end loop;
5714
5715            if Id.Typ = End_Of_Input then
5716               Fatal_Error  --  2.6
5717                 (Parser, "Processing instruction must end with '?>'",
5718                  Open_Id);
5719            end if;
5720
5721            Processing_Instruction
5722              (Parser,
5723               Target => Parser.Buffer (Name_Id.First .. Name_Id.Last),
5724               Data   => Parser.Buffer (Data_Start.First .. Data_End.Last));
5725         end if;
5726
5727         Set_State (Parser, State);
5728         Reset_Buffer (Parser, Name_Id);
5729      end Parse_PI;
5730
5731   begin
5732      --  Initialize the parser with the first character of the stream.
5733      if Eof (Input) then
5734         return;
5735      end if;
5736      Next_Char (Input, Parser);
5737
5738      if Parser.State.In_DTD then
5739         Parse_Doctype_Contents;
5740      end if;
5741
5742      loop
5743         --  Unless in string, buffer should be empty at this point. Strings
5744         --  are special-cased just in case we are currently substituting
5745         --  entities while in a string.
5746         pragma Assert (Parser.State.Ignore_Special
5747                        or else Parser.Buffer_Length = 0);
5748
5749         Next_Token (Input, Parser, Id,
5750                     Coalesce_Space => Parser.Current_Node /= null);
5751         exit when Id.Typ = End_Of_Input;
5752
5753         case Id.Typ is
5754            when Start_Of_PI =>
5755               Parse_PI (Id);
5756
5757            when Cdata_Section =>
5758               if Parser.Current_Node = null then
5759                  Fatal_Error  --  2.1
5760                    (Parser, "Non-white space found at top level", Id);
5761               end if;
5762               Start_Cdata (Parser);
5763
5764               if Parser.Hooks.Characters /= null then
5765                  Parser.Hooks.Characters
5766                    (Parser'Unchecked_Access,
5767                     Parser.Buffer (Id.First .. Id.Last));
5768               end if;
5769
5770               Characters (Parser, Parser.Buffer (Id.First .. Id.Last));
5771               End_Cdata (Parser);
5772               Reset_Buffer (Parser, Id);
5773
5774            when Text | Name =>
5775               if Parser.Current_Node = null then
5776                  Fatal_Error  --  2.1
5777                    (Parser, "Non-white space found at top level", Id);
5778               end if;
5779
5780               if Parser.Hooks.Characters /= null then
5781                  Parser.Hooks.Characters
5782                    (Parser'Unchecked_Access,
5783                     Parser.Buffer (Id.First .. Id.Last));
5784               end if;
5785
5786               Characters (Parser, Parser.Buffer (Id.First .. Id.Last));
5787               Reset_Buffer (Parser, Id);
5788
5789            when Sax.Readers.Space =>
5790               --   If "xml:space" attribute is preserve
5791               --   then same as Text
5792
5793               if Parser.Hooks.Whitespace /= null then
5794                  Parser.Hooks.Whitespace
5795                    (Parser'Unchecked_Access,
5796                     Parser.Buffer (Id.First .. Id.Last));
5797               end if;
5798
5799               Ignorable_Whitespace
5800                 (Parser, Parser.Buffer (Id.First .. Id.Last));
5801               Reset_Buffer (Parser, Id);
5802
5803            when Comment =>
5804               Comment (Parser, Parser.Buffer (Id.First .. Id.Last));
5805               Reset_Buffer (Parser, Id);
5806
5807            when Start_Of_Tag =>
5808               Parse_Start_Tag;
5809
5810            when Start_Of_End_Tag =>
5811               Parse_End_Tag;
5812
5813            when Doctype_Start =>
5814               Parse_Doctype;
5815
5816            when others =>
5817               Fatal_Error (Parser, "Currently ignored: "
5818                            & Token_Type'Image (Id.Typ));
5819         end case;
5820      end loop;
5821   end Syntactic_Parse;
5822
5823   ----------
5824   -- Free --
5825   ----------
5826
5827   procedure Free (Parser : in out Sax_Reader'Class) is
5828      Tmp, Tmp2 : Element_Access;
5829   begin
5830      Close_Inputs (Parser, Parser.Inputs);
5831      Close_Inputs (Parser, Parser.Close_Inputs);
5832
5833      Free (Parser.Default_Namespaces);
5834      Free (Parser.Buffer);
5835      Parser.Buffer_Length := 0;
5836
5837      Parser.Attributes.Count := 0;
5838      Unchecked_Free (Parser.Attributes.List);
5839
5840      --  Free the nodes, in case there are still some open
5841      Tmp := Parser.Current_Node;
5842      while Tmp /= null loop
5843         Tmp2 := Tmp.Parent;
5844         Free (Tmp);
5845         Tmp := Tmp2;
5846      end loop;
5847
5848      --  Free the content model for the default attributes
5849      --  is done automatically when the attributes are reset
5850
5851      if Parser.Hooks.Data /= null then
5852         Free (Parser.Hooks.Data.all);
5853         Unchecked_Free (Parser.Hooks.Data);
5854      end if;
5855
5856      --  Free the internal tables
5857      Reset (Parser.Entities);
5858      Reset (Parser.Default_Atts);
5859      Reset (Parser.Notations);
5860
5861      Free (Parser.Locator);
5862   end Free;
5863
5864   ---------------
5865   -- Set_Hooks --
5866   ---------------
5867
5868   procedure Set_Hooks
5869     (Handler        : in out Sax_Reader;
5870      Data           : Hook_Data_Access  := null;
5871      Start_Element  : Start_Element_Hook := null;
5872      End_Element    : End_Element_Hook   := null;
5873      Characters     : Characters_Hook    := null;
5874      Whitespace     : Whitespace_Hook    := null;
5875      Doc_Locator    : Set_Doc_Locator_Hook := null;
5876      Notation_Decl  : Notation_Decl_Hook := null) is
5877   begin
5878      if Handler.Hooks.Data /= null then
5879         Free (Handler.Hooks.Data.all);
5880         Unchecked_Free (Handler.Hooks.Data);
5881      end if;
5882
5883      Handler.Hooks :=
5884        (Data           => Data,
5885         Start_Element  => Start_Element,
5886         End_Element    => End_Element,
5887         Characters     => Characters,
5888         Whitespace     => Whitespace,
5889         Doc_Locator    => Doc_Locator,
5890         Notation_Decl  => Notation_Decl);
5891   end Set_Hooks;
5892
5893   ------------------------
5894   -- Initialize_Symbols --
5895   ------------------------
5896
5897   procedure Initialize_Symbols (Parser : in out Sax_Reader) is
5898   begin
5899      if Parser.Lt_Sequence = No_Symbol then
5900         if Get (Parser.Symbols) = null then
5901            if Debug_Internal then
5902               Put_Line ("Initialize_Symbols: creating new table");
5903            end if;
5904            Parser.Symbols := Sax.Utils.Allocate;
5905         end if;
5906
5907         Parser.Lt_Sequence    := Find_Symbol (Parser, Lt_Sequence);
5908         Parser.Gt_Sequence    := Find_Symbol (Parser, Gt_Sequence);
5909         Parser.Amp_Sequence   := Find_Symbol (Parser, Amp_Sequence);
5910         Parser.Apos_Sequence  := Find_Symbol (Parser, Apos_Sequence);
5911         Parser.Quot_Sequence  := Find_Symbol (Parser, Quot_Sequence);
5912         Parser.Xmlns_Sequence := Find_Symbol (Parser, Xmlns_Sequence);
5913         Parser.Xml_Sequence   := Find_Symbol (Parser, Xml_Sequence);
5914         Parser.Symbol_Percent := Find_Symbol (Parser, "%");
5915         Parser.Symbol_Ampersand := Find_Symbol (Parser, "&");
5916         Parser.Namespaces_URI_Sequence :=
5917           Find_Symbol (Parser, Namespaces_URI_Sequence);
5918      end if;
5919   end Initialize_Symbols;
5920
5921   ----------------------
5922   -- Close_Namespaces --
5923   ----------------------
5924
5925   procedure Close_Namespaces
5926     (Parser : in out Sax_Reader'Class; List : XML_NS)
5927   is
5928      NS : XML_NS := List;
5929   begin
5930      while NS /= No_XML_NS loop
5931         if Get_Prefix (NS) /= Empty_String
5932           and then Get_Prefix (NS) /= Parser.Xmlns_Sequence
5933         then
5934            End_Prefix_Mapping (Parser, Get_Prefix (NS));
5935         end if;
5936         NS := Next_In_List (NS);
5937      end loop;
5938   end Close_Namespaces;
5939
5940   -----------
5941   -- Parse --
5942   -----------
5943
5944   procedure Parse
5945     (Parser : in out Sax_Reader;
5946      Input  : in out Input_Sources.Input_Source'Class) is
5947   begin
5948      Initialize_Symbols (Parser);
5949
5950      Parser.Locator := Sax.Locators.Create;
5951      Parser.Public_Id := Find_Symbol (Parser, Get_Public_Id (Input));
5952      Set_Public_Id (Parser.Locator, Parser.Public_Id);
5953      Parser.System_Id := Find_Symbol (Parser, Get_System_Id (Input));
5954      Set_System_Id (Parser.Locator, Parser.System_Id);
5955      Set_Column_Number (Parser.Locator, Prolog_Size (Input));
5956      Set_Line_Number (Parser.Locator, 1);
5957      Parser.Lookup_Char := Unicode.Unicode_Char'Last;
5958      Parser.Current_Node := null;
5959      Parser.Num_Toplevel_Elements := 0;
5960      Parser.Previous_Char_Was_CR := False;
5961      Parser.Ignore_State_Special := False;
5962      Parser.In_External_Entity := False;
5963      Parser.Last_Read_Is_Valid := False;
5964      Parser.Buffer := new Byte_Sequence (1 .. Initial_Buffer_Length);
5965      Set_State (Parser, Default_State);
5966
5967      pragma Warnings (Off, "overlaps with actual");
5968      Add_Namespace_No_Event
5969        (Parser,
5970         Prefix => Parser.Xml_Sequence,
5971         URI    => Find_Symbol
5972           (Parser,
5973            Encodings.From_Utf32
5974              (Basic_8bit.To_Utf32 ("http://www.w3.org/XML/1998/namespace"))));
5975      Add_Namespace_No_Event
5976        (Parser, Parser.Xmlns_Sequence, Parser.Xmlns_Sequence);
5977      Add_Namespace_No_Event (Parser, Empty_String, Empty_String);
5978
5979      if Parser.Hooks.Doc_Locator /= null then
5980         Parser.Hooks.Doc_Locator (Parser, Parser.Locator);
5981      end if;
5982
5983      Set_Document_Locator (Sax_Reader'Class (Parser), Parser.Locator);
5984
5985      Start_Document (Sax_Reader'Class (Parser));
5986      Syntactic_Parse (Sax_Reader'Class (Parser), Input);
5987      Close_Namespaces (Parser, Parser.Default_Namespaces);
5988      pragma Warnings (On, "overlaps with actual");
5989
5990      --  All the nodes must have been closed at the end of the document
5991      if Parser.Current_Node /= null then
5992         Fatal_Error   --  2.1
5993           (Parser, "Node <" & Get (Parser.Current_Node.Name).all
5994            & "> is not closed");
5995      end if;
5996
5997      if Parser.Num_Toplevel_Elements = 0 then
5998         Fatal_Error (Parser, "No root element specified"); --  2.1
5999      end if;
6000
6001      End_Document (Sax_Reader'Class (Parser));
6002
6003      Free (Parser);
6004
6005   exception
6006      when others =>
6007         Free (Parser);
6008         raise;
6009   end Parse;
6010
6011   ----------
6012   -- Hash --
6013   ----------
6014
6015   function Hash (Str : String) return Unsigned_32 is
6016      Result : Unsigned_32 := Str'Length;
6017   begin
6018      for J in Str'Range loop
6019         Result := Rotate_Left (Result, 1) +
6020           Unsigned_32 (Character'Pos (Str (J)));
6021      end loop;
6022
6023      return Result;
6024   end Hash;
6025
6026   -------------
6027   -- Get_Key --
6028   -------------
6029
6030   function Get_Key (Entity : Entity_Entry_Access) return Symbol is
6031   begin
6032      return Entity.Name;
6033   end Get_Key;
6034
6035   ----------
6036   -- Free --
6037   ----------
6038
6039   procedure Free (Att : in out Attributes_Entry) is
6040   begin
6041      Unchecked_Free (Att.Attributes.List);
6042      Att.Attributes.Count := 0;
6043   end Free;
6044
6045   -------------
6046   -- Get_Key --
6047   -------------
6048
6049   function Get_Key (Att : Attributes_Entry) return Symbol is
6050   begin
6051      return Att.Element_Name;
6052   end Get_Key;
6053
6054   ----------
6055   -- Free --
6056   ----------
6057
6058   procedure Free (Notation : in out Notation_Entry) is
6059      pragma Unreferenced (Notation);
6060   begin
6061      null;
6062   end Free;
6063
6064   -------------
6065   -- Get_Key --
6066   -------------
6067
6068   function Get_Key (Notation : Notation_Entry) return Symbol is
6069   begin
6070      return Notation.Name;
6071   end Get_Key;
6072
6073   -----------------
6074   -- Get_Feature --
6075   -----------------
6076
6077   function Get_Feature (Parser : Sax_Reader; Name : String) return Boolean is
6078   begin
6079      if Name = Namespace_Feature then
6080         return Parser.Feature_Namespace;
6081
6082      elsif Name = Namespace_Prefixes_Feature then
6083         return Parser.Feature_Namespace_Prefixes;
6084
6085      elsif Name = External_General_Entities_Feature then
6086         return Parser.Feature_External_General_Entities;
6087
6088      elsif Name = External_Parameter_Entities_Feature then
6089         return Parser.Feature_External_Parameter_Entities;
6090
6091      elsif Name = Validation_Feature then
6092         return Parser.Feature_Validation;
6093
6094      elsif Name = Parameter_Entities_Feature then
6095         return False;  --  ??? Unsupported for now
6096
6097      elsif Name = Test_Valid_Chars_Feature then
6098         return Parser.Feature_Test_Valid_Chars;
6099
6100      elsif Name = Allow_Relative_IRI_Feature then
6101         return Parser.Feature_Allow_Relative_IRI;
6102
6103      elsif Name = Schema_Validation_Feature then
6104         return Parser.Feature_Schema_Validation;
6105      end if;
6106
6107      return False;
6108   end Get_Feature;
6109
6110   -----------------
6111   -- Set_Feature --
6112   -----------------
6113
6114   procedure Set_Feature
6115     (Parser : in out Sax_Reader; Name : String; Value : Boolean) is
6116   begin
6117      if Name = Namespace_Feature then
6118         Parser.Feature_Namespace := Value;
6119
6120      elsif Name = Namespace_Prefixes_Feature then
6121         Parser.Feature_Namespace_Prefixes := Value;
6122
6123      elsif Name = External_General_Entities_Feature then
6124         Parser.Feature_External_General_Entities := Value;
6125
6126      elsif Name = External_Parameter_Entities_Feature then
6127         Parser.Feature_External_Parameter_Entities := Value;
6128
6129      elsif Name = Validation_Feature then
6130         Parser.Feature_Validation := Value;
6131
6132      elsif Name = Test_Valid_Chars_Feature then
6133         Parser.Feature_Test_Valid_Chars := Value;
6134
6135      elsif Name = Schema_Validation_Feature then
6136         Parser.Feature_Schema_Validation := Value;
6137
6138      elsif Name = Allow_Relative_IRI_Feature then
6139         Parser.Feature_Allow_Relative_IRI := Value;
6140      end if;
6141   end Set_Feature;
6142
6143   -----------------
6144   -- Fatal_Error --
6145   -----------------
6146
6147   procedure Fatal_Error
6148     (Handler : in out Sax_Reader; Except : Sax_Parse_Exception'Class)
6149   is
6150      pragma Warnings (Off, Handler);
6151   begin
6152      Raise_Exception
6153        (XML_Fatal_Error'Identity,
6154         Get_Message (Except));
6155   end Fatal_Error;
6156
6157   --------------------------
6158   -- Start_Prefix_Mapping --
6159   --------------------------
6160
6161   procedure Start_Prefix_Mapping
6162     (Handler : in out Reader;
6163      Prefix  : Sax.Symbols.Symbol;
6164      URI     : Sax.Symbols.Symbol)
6165   is
6166   begin
6167      Start_Prefix_Mapping
6168        (Reader'Class (Handler), Get (Prefix).all, Get (URI).all);
6169   end Start_Prefix_Mapping;
6170
6171   ------------------------
6172   -- End_Prefix_Mapping --
6173   ------------------------
6174
6175   procedure End_Prefix_Mapping (Handler : in out Reader; Prefix : Symbol) is
6176   begin
6177      End_Prefix_Mapping
6178        (Reader'Class (Handler), Get (Prefix).all);
6179   end End_Prefix_Mapping;
6180
6181   -------------------
6182   -- Start_Element --
6183   -------------------
6184
6185   procedure Start_Element
6186     (Handler       : in out Reader;
6187      NS            : Sax.Utils.XML_NS;
6188      Local_Name    : Sax.Symbols.Symbol;
6189      Atts          : Sax_Attribute_List)
6190   is
6191      Attributes : Sax.Attributes.Attributes := Create_Attribute_List (Atts);
6192   begin
6193      Start_Element
6194        (Reader'Class (Handler),
6195         Namespace_URI => Get (Get_URI (NS)).all,
6196         Local_Name    => Get (Local_Name).all,
6197         Qname         => Qname_From_Name (Get_Prefix (NS), Local_Name),
6198         Atts          => Attributes);
6199      Clear (Attributes);
6200
6201   exception
6202      when others =>
6203         Clear (Attributes);
6204         raise;
6205   end Start_Element;
6206
6207   -----------------
6208   -- End_Element --
6209   -----------------
6210
6211   procedure End_Element
6212     (Handler       : in out Reader;
6213      NS            : Sax.Utils.XML_NS;
6214      Local_Name    : Sax.Symbols.Symbol) is
6215   begin
6216      End_Element
6217        (Reader'Class (Handler),
6218         Namespace_URI => Get (Get_URI (NS)).all,
6219         Local_Name    => Get (Local_Name).all,
6220         Qname         => Qname_From_Name (Get_Prefix (NS), Local_Name));
6221   end End_Element;
6222
6223   --------------------
6224   -- Skipped_Entity --
6225   --------------------
6226
6227   procedure Skipped_Entity
6228     (Handler : in out Reader;
6229      Name    : Sax.Symbols.Symbol) is
6230   begin
6231      Skipped_Entity (Reader'Class (Handler), Get (Name).all);
6232   end Skipped_Entity;
6233
6234   ------------------
6235   -- Start_Entity --
6236   ------------------
6237
6238   procedure Start_Entity
6239     (Handler : in out Reader;
6240      Name    : Sax.Symbols.Symbol) is
6241   begin
6242      Start_Entity (Reader'Class (Handler), Get (Name).all);
6243   end Start_Entity;
6244
6245   ----------------
6246   -- End_Entity --
6247   ----------------
6248
6249   procedure End_Entity
6250     (Handler : in out Reader;
6251      Name    : Sax.Symbols.Symbol) is
6252   begin
6253      End_Entity (Reader'Class (Handler), Get (Name).all);
6254   end End_Entity;
6255
6256   --------------------
6257   -- Resolve_Entity --
6258   --------------------
6259
6260   function Resolve_Entity
6261     (Handler   : Sax_Reader;
6262      Public_Id : Unicode.CES.Byte_Sequence;
6263      System_Id : Unicode.CES.Byte_Sequence)
6264      return Input_Sources.Input_Source_Access
6265   is
6266      pragma Warnings (Off, Handler);
6267      pragma Warnings (Off, Public_Id);
6268      pragma Warnings (Off, System_Id);
6269   begin
6270      return null;
6271   end Resolve_Entity;
6272
6273   --------------------
6274   -- Get_Hooks_Data --
6275   --------------------
6276
6277   function Get_Hooks_Data (Handler : Sax_Reader) return Hook_Data_Access is
6278   begin
6279      return Handler.Hooks.Data;
6280   end Get_Hooks_Data;
6281
6282   ------------------------------------
6283   -- Use_Basename_In_Error_Messages --
6284   ------------------------------------
6285
6286   procedure Use_Basename_In_Error_Messages
6287     (Parser       : in out Sax_Reader;
6288      Use_Basename : Boolean := True)
6289   is
6290   begin
6291      Parser.Basename_In_Messages := Use_Basename;
6292   end Use_Basename_In_Error_Messages;
6293
6294   ------------------------------------
6295   -- Use_Basename_In_Error_Messages --
6296   ------------------------------------
6297
6298   function Use_Basename_In_Error_Messages
6299     (Parser : Sax_Reader) return Boolean is
6300   begin
6301      return Parser.Basename_In_Messages;
6302   end Use_Basename_In_Error_Messages;
6303
6304   ------------
6305   -- Get_NS --
6306   ------------
6307
6308   function Get_NS (Elem : Element_Access) return XML_NS is
6309   begin
6310      return Elem.NS;
6311   end Get_NS;
6312
6313   --------------------
6314   -- Get_Local_Name --
6315   --------------------
6316
6317   function Get_Local_Name (Elem : Element_Access) return Symbol is
6318   begin
6319      return Elem.Name;
6320   end Get_Local_Name;
6321
6322   --------------
6323   -- To_QName --
6324   --------------
6325
6326   function To_QName
6327     (Namespace_URI, Local_Name : Sax.Symbols.Symbol)
6328      return Unicode.CES.Byte_Sequence is
6329   begin
6330      if Namespace_URI = Empty_String then
6331         return Get (Local_Name).all;
6332      else
6333         return '{' & Get (Namespace_URI).all & '}' & Get (Local_Name).all;
6334      end if;
6335   end To_QName;
6336
6337   --------------
6338   -- To_QName --
6339   --------------
6340
6341   function To_QName
6342     (Elem : Element_Access) return Unicode.CES.Byte_Sequence is
6343   begin
6344      return To_QName (Get_URI (Elem.NS), Elem.Name);
6345   end To_QName;
6346
6347   ----------------------
6348   -- Set_Symbol_Table --
6349   ----------------------
6350
6351   procedure Set_Symbol_Table
6352     (Parser  : in out Sax_Reader;
6353      Symbols : Symbol_Table) is
6354   begin
6355      Parser.Lt_Sequence := No_Symbol;
6356      Parser.Symbols := Symbols;
6357   end Set_Symbol_Table;
6358
6359   ----------------------
6360   -- Get_Symbol_Table --
6361   ----------------------
6362
6363   function Get_Symbol_Table (Parser : Sax_Reader'Class) return Symbol_Table is
6364   begin
6365      return Parser.Symbols;
6366   end Get_Symbol_Table;
6367
6368   ---------------
6369   -- Get_Index --
6370   ---------------
6371
6372   function Get_Index
6373     (List : Sax_Attribute_List;
6374      URI  : Sax.Symbols.Symbol;
6375      Local_Name : Sax.Symbols.Symbol) return Integer is
6376   begin
6377      for A in 1 .. List.Count loop
6378         if List.List (A).URI = URI
6379           and then List.List (A).Local_Name = Local_Name
6380         then
6381            return A;
6382         end if;
6383      end loop;
6384      return -1;
6385   end Get_Index;
6386
6387   ---------------
6388   -- Get_Index --
6389   ---------------
6390
6391   function Get_Index
6392     (Handler    : Sax_Reader'Class;
6393      List       : Sax_Attribute_List;
6394      URI        : Unicode.CES.Byte_Sequence;
6395      Local_Name : Unicode.CES.Byte_Sequence) return Integer is
6396   begin
6397      return Get_Index
6398        (List,
6399         URI        => Find_Symbol (Handler, URI),
6400         Local_Name => Find_Symbol (Handler, Local_Name));
6401   end Get_Index;
6402
6403   ---------------
6404   -- Get_Value --
6405   ---------------
6406
6407   function Get_Value
6408     (List : Sax_Attribute_List; Index : Integer) return Sax.Symbols.Symbol is
6409   begin
6410      if Index < 0 then
6411         return No_Symbol;
6412      else
6413         return List.List (Index).Value;
6414      end if;
6415   end Get_Value;
6416
6417   ---------------
6418   -- Set_Value --
6419   ---------------
6420
6421   procedure Set_Value
6422     (List  : Sax_Attribute_List;
6423      Index : Integer;
6424      Val   : Sax.Symbols.Symbol) is
6425   begin
6426      List.List (Index).Value := Val;
6427   end Set_Value;
6428
6429   ------------------
6430   -- Get_Location --
6431   ------------------
6432
6433   function Get_Location
6434     (List : Sax_Attribute_List; Index : Integer) return Sax.Locators.Location
6435   is
6436   begin
6437      if Index < 0 then
6438         return No_Location;
6439      else
6440         return List.List (Index).Location;
6441      end if;
6442   end Get_Location;
6443
6444   ------------------------
6445   -- Start_Tag_Location --
6446   ------------------------
6447
6448   function Start_Tag_Location
6449     (Elem : Element_Access) return Sax.Locators.Location is
6450   begin
6451      return Elem.Start;
6452   end Start_Tag_Location;
6453
6454   ----------------------------
6455   -- Start_Tag_End_Location --
6456   ----------------------------
6457
6458   function Start_Tag_End_Location
6459     (Elem : Element_Access) return Sax.Locators.Location is
6460   begin
6461      return Elem.Start_Tag_End;
6462   end Start_Tag_End_Location;
6463
6464   ------------------------------
6465   -- Get_Non_Normalized_Value --
6466   ------------------------------
6467
6468   function Get_Non_Normalized_Value
6469     (List : Sax_Attribute_List; Index : Integer) return Sax.Symbols.Symbol is
6470   begin
6471      return List.List (Index).Non_Normalized_Value;
6472   end Get_Non_Normalized_Value;
6473
6474   --------------------------
6475   -- Get_Value_As_Boolean --
6476   --------------------------
6477
6478   function Get_Value_As_Boolean
6479     (List : Sax_Attribute_List; Index : Integer; Default : Boolean := False)
6480      return Boolean
6481   is
6482      Val : Symbol;
6483   begin
6484      if Index < 0 then
6485         return Default;
6486      else
6487         Val := Get_Value (List, Index);
6488         return Get (Val).all = "true" or else Get (Val).all = "1";
6489      end if;
6490   end Get_Value_As_Boolean;
6491
6492   --------------------------
6493   -- Set_Normalized_Value --
6494   --------------------------
6495
6496   procedure Set_Normalized_Value
6497     (List : Sax_Attribute_List; Index : Integer; Value : Sax.Symbols.Symbol)
6498   is
6499   begin
6500      List.List (Index).Value := Value;
6501   end Set_Normalized_Value;
6502
6503   --------------
6504   -- Get_Type --
6505   --------------
6506
6507   function Get_Type
6508     (List : Sax_Attribute_List; Index : Integer)
6509      return Sax.Attributes.Attribute_Type is
6510   begin
6511      return List.List (Index).Att_Type;
6512   end Get_Type;
6513
6514   --------------
6515   -- Set_Type --
6516   --------------
6517
6518   procedure Set_Type
6519     (List : Sax_Attribute_List; Index : Integer;
6520      Typ  : Sax.Attributes.Attribute_Type) is
6521   begin
6522      List.List (Index).Att_Type := Typ;
6523   end Set_Type;
6524
6525   ----------------
6526   -- Get_Length --
6527   ----------------
6528
6529   function Get_Length (List : Sax_Attribute_List) return Natural is
6530   begin
6531      return List.Count;
6532   end Get_Length;
6533
6534   ----------------
6535   -- Get_Prefix --
6536   ----------------
6537
6538   function Get_Prefix
6539     (List : Sax_Attribute_List; Index : Integer) return Sax.Symbols.Symbol is
6540   begin
6541      return List.List (Index).Prefix;
6542   end Get_Prefix;
6543
6544   --------------
6545   -- Get_Name --
6546   --------------
6547
6548   function Get_Name
6549     (List : Sax_Attribute_List; Index : Integer) return Qualified_Name is
6550   begin
6551      return (NS    => List.List (Index).URI,
6552              Local => List.List (Index).Local_Name);
6553   end Get_Name;
6554
6555   ---------------
6556   -- Get_Qname --
6557   ---------------
6558
6559   function Get_Qname
6560     (List : Sax_Attribute_List; Index : Integer)
6561      return Unicode.CES.Byte_Sequence
6562   is
6563   begin
6564      return Qname_From_Name (List.List (Index).Prefix,
6565                              List.List (Index).Local_Name);
6566   end Get_Qname;
6567
6568   ----------------------
6569   -- Current_Location --
6570   ----------------------
6571
6572   function Current_Location
6573     (Handler : Sax_Reader) return Sax.Locators.Location is
6574   begin
6575      return Get_Location (Handler.Locator);
6576   end Current_Location;
6577
6578   ---------------------
6579   -- Set_XML_Version --
6580   ---------------------
6581
6582   procedure Set_XML_Version
6583     (Parser : in out Sax_Reader; XML : XML_Versions := XML_1_0_Fifth_Edition)
6584   is
6585   begin
6586      if XML = XML_1_0 then
6587         Parser.XML_Version := XML_1_0_Fifth_Edition;
6588      else
6589         Parser.XML_Version := XML;
6590      end if;
6591   end Set_XML_Version;
6592
6593   ---------------------
6594   -- Get_XML_Version --
6595   ---------------------
6596
6597   function Get_XML_Version (Parser : Sax_Reader) return XML_Versions is
6598   begin
6599      return Parser.XML_Version;
6600   end Get_XML_Version;
6601
6602end Sax.Readers;
6603