1------------------------------------------------------------------------------
2--                                                                          --
3--                            Matreshka Project                             --
4--                                                                          --
5--                               XML Processor                              --
6--                                                                          --
7--                        Runtime Library Component                         --
8--                                                                          --
9------------------------------------------------------------------------------
10--                                                                          --
11-- Copyright © 2010-2014, Vadim Godunko <vgodunko@gmail.com>                --
12-- All rights reserved.                                                     --
13--                                                                          --
14-- Redistribution and use in source and binary forms, with or without       --
15-- modification, are permitted provided that the following conditions       --
16-- are met:                                                                 --
17--                                                                          --
18--  * Redistributions of source code must retain the above copyright        --
19--    notice, this list of conditions and the following disclaimer.         --
20--                                                                          --
21--  * Redistributions in binary form must reproduce the above copyright     --
22--    notice, this list of conditions and the following disclaimer in the   --
23--    documentation and/or other materials provided with the distribution.  --
24--                                                                          --
25--  * Neither the name of the Vadim Godunko, IE nor the names of its        --
26--    contributors may be used to endorse or promote products derived from  --
27--    this software without specific prior written permission.              --
28--                                                                          --
29-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS      --
30-- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT        --
31-- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR    --
32-- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT     --
33-- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,   --
34-- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED --
35-- TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR   --
36-- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF   --
37-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     --
38-- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS       --
39-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.             --
40--                                                                          --
41------------------------------------------------------------------------------
42--  $Revision: 4777 $ $Date: 2014-03-29 11:52:24 +0400 (Sat, 29 Mar 2014) $
43------------------------------------------------------------------------------
44with Matreshka.Internals.Strings.Configuration;
45with Matreshka.Internals.Strings.Operations;
46with Matreshka.Internals.Unicode.Characters.Latin;
47with XML.SAX.Simple_Readers.Callbacks;
48with XML.SAX.Simple_Readers.Scanner.Tables;
49
50package body XML.SAX.Simple_Readers.Scanner.Actions is
51
52   use Matreshka.Internals.Strings.Configuration;
53   use Matreshka.Internals.Unicode;
54   use Matreshka.Internals.Unicode.Characters.Latin;
55   use Matreshka.Internals.Utf16;
56   use Matreshka.Internals.XML;
57   use Matreshka.Internals.XML.Entity_Tables;
58   use Matreshka.Internals.XML.Symbol_Tables;
59
60   procedure Resolve_Symbol
61    (Self            : in out Simple_Reader'Class;
62     Trim_Left       : Natural;
63     Trim_Right      : Natural;
64     Trim_Whitespace : Boolean;
65     Can_Be_Qname    : Boolean;
66     Not_Qname       : Boolean;
67     Error           : out Boolean;
68     Symbol          : out Matreshka.Internals.XML.Symbol_Identifier);
69   --  Converts name to symbol. Trim_Left, Trim_Right, Trim_Whitespace can be
70   --  used to trim several characters from head of tail of matched substring,
71   --  and to trim leading whitespaces. Not_Qname specify that resolved name
72   --  is not a qualified name at all (it is enumeration element of attribute
73   --  of non-NOTATION type). Can_Be_Qname specify that resolved name is
74   --  qualified name when namespace processing is enabled. Subprogram sets
75   --  Error when error is detected and Symbol when symbol is resolved.
76
77   procedure Character_Reference_To_Code_Point
78    (Self  : in out Simple_Reader'Class;
79     Hex   : Boolean;
80     Code  : out Code_Point;
81     Valid : out Boolean);
82   --  Converts scanned character reference to code point. Reports errors to
83   --  application is any and sets Valid to False. Otherwise sets Code to
84   --  referenced code point and sets Valid to True.
85
86   ---------------------------------------
87   -- Character_Reference_To_Code_Point --
88   ---------------------------------------
89
90   procedure Character_Reference_To_Code_Point
91    (Self  : in out Simple_Reader'Class;
92     Hex   : Boolean;
93     Code  : out Code_Point;
94     Valid : out Boolean)
95   is
96      Zero_Fixup  : constant := Digit_Zero;
97      Upper_Fixup : constant := Latin_Capital_Letter_A - 16#0A#;
98      Lower_Fixup : constant := Latin_Small_Letter_A - 16#0A#;
99
100      FP  : Utf16_String_Index := Self.Scanner_State.YY_Base_Position;
101      LP  : Utf16_String_Index := Self.Scanner_State.YY_Current_Position;
102      Aux : Code_Unit_32       := 0;
103      D   : Code_Point;
104
105   begin
106      --  NOTE: Sequences of leading and trailing character always fixed:
107      --  "&#" for decimal representation and "&#x" for hexadecimal
108      --  representation for the leading sequence of characters and ";" for
109      --  trailing; thus we can just add/subtract required number of code point
110      --  positions instead of doing more expensive iteration with analysis of
111      --  UTF-16 code units.
112      --
113      --  Actual value has limited character set ([0-9] or [0-9a-fA-F]), all
114      --  of characters is on BMP also, thus expensive decoding can be omitted
115      --  also.
116
117      if Hex then
118         --  Trim three leading characters and trailing character.
119
120         FP := FP + 3;
121         LP := LP - 1;
122
123         while FP < LP loop
124            D := Code_Point (Self.Scanner_State.Data.Value (FP));
125            FP := FP + 1;
126
127            if D in Latin_Capital_Letter_A .. Latin_Capital_Letter_F then
128               Aux := (Aux * 16) + D - Upper_Fixup;
129
130            elsif D in Latin_Small_Letter_A .. Latin_Small_Letter_F then
131               Aux := (Aux * 16) + D - Lower_Fixup;
132
133            else
134               Aux := (Aux * 16) + D - Zero_Fixup;
135            end if;
136
137            --  Check whether collected code is inside range of Unicode code
138            --  points. Then it is outside reset to maximum value and exit
139            --  the loop. Error will be reported later in this subprogram.
140
141            if Aux not in Code_Point then
142               Aux := Code_Unit_32'Last;
143
144               exit;
145            end if;
146         end loop;
147
148      else
149         --  Trim two leading characters and trailing character.
150
151         FP := FP + 2;
152         LP := LP - 1;
153
154         while FP < LP loop
155            D := Code_Point (Self.Scanner_State.Data.Value (FP));
156            FP := FP + 1;
157
158            Aux := (Aux * 10) + D - Zero_Fixup;
159
160            --  Check whether collected code is inside range of Unicode code
161            --  points. Then it is outside reset to maximum value and exit
162            --  the loop. Error will be reported later in this subprogram.
163
164            if Aux not in Code_Point then
165               Aux := Code_Unit_32'Last;
166
167               exit;
168            end if;
169         end loop;
170      end if;
171
172      --  [XML1.0/1.1 4.1 WFC: Legal Character]
173      --
174      --  "Characters referred to using character references MUST match the
175      --  production for Char."
176      --
177      --  Check whether character reference is resolved into valid character.
178
179      case Self.Version is
180         when XML_1_0 =>
181            --  [XML1.0 2.2 Characters]
182            --
183            --  [2] Char ::=
184            --        #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
185            --          | [#x10000-#x10FFFF]
186
187            Valid :=
188              Aux = 16#0009#
189                or Aux = 16#000A#
190                or Aux = 16#000D#
191                or Aux in 16#0020# .. 16#D7FF#
192                or Aux in 16#E000# .. 16#FFFD#
193                or Aux in 16#1_0000# .. 16#10_FFFF#;
194
195         when XML_1_1 =>
196            --  [XML1.1 2.2 Characters]
197            --
198            --  [2] Char ::=
199            --        [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
200
201            Valid :=
202              Aux in 16#0001# .. 16#D7FF#
203                or Aux in 16#E000# .. 16#FFFD#
204                or Aux in 16#1_0000# .. 16#10_FFFF#;
205      end case;
206
207      if not Valid then
208         Callbacks.Call_Fatal_Error
209          (Self,
210           League.Strings.To_Universal_String
211            ("[XML 4.1 WFC: Legal Character] character reference refers to"
212               & " invalid character"));
213         Self.Error_Reported := True;
214
215      else
216         Code := Aux;
217      end if;
218   end Character_Reference_To_Code_Point;
219
220   ----------------------------------------
221   -- On_Asterisk_In_Content_Declaration --
222   ----------------------------------------
223
224   function On_Asterisk_In_Content_Declaration
225    (Self : in out Simple_Reader'Class) return Token is
226   begin
227      if Self.Whitespace_Matched then
228         Callbacks.Call_Fatal_Error
229          (Self,
230           League.Strings.To_Universal_String
231            ("[XML [47], [48], [51]] illegal whitespace before asterisk"));
232
233         return Error;
234
235      else
236         return Token_Asterisk;
237      end if;
238   end On_Asterisk_In_Content_Declaration;
239
240   -----------------------------------------------------
241   -- On_Attribute_Name_In_Attribute_List_Declaration --
242   -----------------------------------------------------
243
244   function On_Attribute_Name_In_Attribute_List_Declaration
245    (Self : in out Simple_Reader'Class) return Token
246   is
247      Qname_Error : Boolean;
248
249   begin
250      --  [53] AttDef ::= S Name S AttType S DefaultDecl
251      --
252      --  Checks whitespace before the attribute name is present.
253
254      if not Self.Whitespace_Matched then
255         Callbacks.Call_Fatal_Error
256          (Self,
257           League.Strings.To_Universal_String
258            ("[XML [53] AttDef]"
259               & " no whitespace before attribute name"));
260
261         return Error;
262      end if;
263
264      Self.Whitespace_Matched := False;
265
266      Resolve_Symbol
267       (Self, 0, 0, False, True, False, Qname_Error, Self.YYLVal.Symbol);
268
269      if Qname_Error then
270         return Error;
271
272      else
273         Enter_Start_Condition (Self, Tables.ATTLIST_TYPE);
274
275         return Token_Name;
276      end if;
277   end On_Attribute_Name_In_Attribute_List_Declaration;
278
279   -----------------------
280   -- On_Attribute_Type --
281   -----------------------
282
283   function On_Attribute_Type
284    (Self       : in out Simple_Reader'Class;
285     Type_Token : Token) return Token is
286   begin
287      --  Checks ithat whitespace before attribute type keyword is detected
288      --  and report error when check fail.
289
290      if not Self.Whitespace_Matched then
291         --  XXX This is recoverable error.
292
293         Callbacks.Call_Fatal_Error
294          (Self,
295           League.Strings.To_Universal_String
296            ("whitespace required before attribute type"));
297
298         return Error;
299      end if;
300
301      Self.Whitespace_Matched := False;
302
303      return Type_Token;
304   end On_Attribute_Type;
305
306   ---------------------------------------
307   -- On_Attribute_Value_Character_Data --
308   ---------------------------------------
309
310   procedure On_Attribute_Value_Character_Data
311    (Self : in out Simple_Reader'Class)
312   is
313      Next : Utf16_String_Index := Self.Scanner_State.YY_Base_Position;
314      Code : Code_Point;
315
316   begin
317      --  Allocates buffer of necessary size to avoid memory reallocation. It
318      --  can be larger when needed if attribute value normalization is
319      --  activated, but usually not too large.
320
321      Matreshka.Internals.Strings.Mutate
322       (Self.Character_Data,
323        Self.Character_Data.Unused
324          + Self.Scanner_State.YY_Current_Position
325          - Self.Scanner_State.YY_Base_Position
326          + 1);
327
328      --  Two mostly equivalent paths are separated, because they are on the
329      --  performance critical path.
330
331      if Self.Normalize_Value then
332         --  Normalization is required for attribute's value.
333
334         while Next /= Self.Scanner_State.YY_Current_Position loop
335            Unchecked_Next (Self.Scanner_State.Data.Value, Next, Code);
336
337            --  It can be reasonable to implement this step of normalization
338            --  on SIMD.
339
340            if Code = Character_Tabulation
341              or Code = Line_Feed
342              or Code = Carriage_Return
343            then
344               Code := Space;
345            end if;
346
347            if Code = Space then
348               if not Self.Space_Before then
349                  Unchecked_Store
350                   (Self.Character_Data.Value,
351                    Self.Character_Data.Unused,
352                    Code);
353                  Self.Character_Data.Length := Self.Character_Data.Length + 1;
354                  Self.Space_Before := True;
355               end if;
356
357            else
358               Unchecked_Store
359                (Self.Character_Data.Value,
360                 Self.Character_Data.Unused,
361                 Code);
362               Self.Character_Data.Length := Self.Character_Data.Length + 1;
363               Self.Space_Before := False;
364            end if;
365         end loop;
366
367      else
368         --  XXX Can be optimized by adding special operation Append_Slice.
369
370         while Next /= Self.Scanner_State.YY_Current_Position loop
371            Unchecked_Next (Self.Scanner_State.Data.Value, Next, Code);
372
373            --  It can be reasonable to implement this step of normalization
374            --  on SIMD.
375
376            if Code = Character_Tabulation
377              or Code = Line_Feed
378              or Code = Carriage_Return
379            then
380               Code := Space;
381            end if;
382
383            Unchecked_Store
384             (Self.Character_Data.Value,
385              Self.Character_Data.Unused,
386              Code);
387            Self.Character_Data.Length := Self.Character_Data.Length + 1;
388         end loop;
389      end if;
390   end On_Attribute_Value_Character_Data;
391
392   ----------------------------------------
393   -- On_Attribute_Value_Close_Delimiter --
394   ----------------------------------------
395
396   function On_Attribute_Value_Close_Delimiter
397    (Self  : in out Simple_Reader'Class) return Boolean
398   is
399      --  NOTE: Attribute value delimiter can be ' or " and both are
400      --  represented as single UTF-16 code unit, thus expensive UTF-16
401      --  decoding can be avoided.
402
403      Delimiter : constant Matreshka.Internals.Unicode.Code_Point
404        := Code_Point
405            (Self.Scanner_State.Data.Value
406              (Self.Scanner_State.YY_Base_Position));
407
408   begin
409      if Self.Scanner_State.Delimiter /= Delimiter then
410         Matreshka.Internals.Strings.Operations.Unterminated_Append
411          (Self.Character_Data, Delimiter);
412
413         return False;
414
415      else
416         if Self.Normalize_Value and then Self.Space_Before then
417            --  One space character is at the end of the prepared string, it
418            --  must be removed from the result.
419
420            Self.Character_Data.Length := Self.Character_Data.Length - 1;
421            Self.Character_Data.Unused := Self.Character_Data.Unused - 1;
422         end if;
423
424         String_Handler.Fill_Null_Terminator (Self.Character_Data);
425         Matreshka.Internals.Strings.Reference (Self.Character_Data);
426         Set_String_Internal
427          (Item          => Self.YYLVal,
428           String        => Self.Character_Data,
429           Is_Whitespace => False);
430         Reset_Whitespace_Matched (Self);
431         Pop_Start_Condition (Self);
432
433         return True;
434      end if;
435   end On_Attribute_Value_Close_Delimiter;
436
437   -------------------------------------------
438   -- On_Attribute_Value_In_XML_Declaration --
439   -------------------------------------------
440
441   function On_Attribute_Value_In_XML_Declaration
442    (Self : in out Simple_Reader'Class) return Token is
443   begin
444      Set_String_Internal
445       (Item          => Self.YYLVal,
446        String        => YY_Text (Self, 1, 1),
447        Is_Whitespace => False);
448      Reset_Whitespace_Matched (Self);
449
450      return Token_String_Segment;
451   end On_Attribute_Value_In_XML_Declaration;
452
453   ---------------------------------------
454   -- On_Attribute_Value_Open_Delimiter --
455   ---------------------------------------
456
457   function On_Attribute_Value_Open_Delimiter
458    (Self  : in out Simple_Reader'Class;
459     State : Interfaces.Unsigned_32) return Boolean is
460   begin
461      if not Self.Whitespace_Matched then
462         --  XXX This is recoverable error.
463
464         Callbacks.Call_Fatal_Error
465          (Self,
466           League.Strings.To_Universal_String
467            ("whitespace required before default value"));
468
469         return False;
470      end if;
471
472      On_Attribute_Value_Open_Delimiter (Self, State);
473
474      return True;
475   end On_Attribute_Value_Open_Delimiter;
476
477   ---------------------------------------
478   -- On_Attribute_Value_Open_Delimiter --
479   ---------------------------------------
480
481   procedure On_Attribute_Value_Open_Delimiter
482    (Self  : in out Simple_Reader'Class;
483     State : Interfaces.Unsigned_32) is
484   begin
485      --  NOTE: Attribute value delimiter can be ' or " and both are
486      --  represented as single UTF-16 code unit, thus expensive UTF-16
487      --  decoding can be avoided.
488
489      Self.Scanner_State.Delimiter :=
490        Code_Point
491         (Self.Scanner_State.Data.Value (Self.Scanner_State.YY_Base_Position));
492      Matreshka.Internals.Strings.Operations.Reset (Self.Character_Data);
493
494      case Self.Version is
495         when XML_1_0 =>
496            Push_And_Enter_Start_Condition
497             (Self, State, Tables.ATTRIBUTE_VALUE_10);
498
499         when XML_1_1 =>
500            Push_And_Enter_Start_Condition
501             (Self, State, Tables.ATTRIBUTE_VALUE_11);
502      end case;
503   end On_Attribute_Value_Open_Delimiter;
504
505   --------------
506   -- On_CDATA --
507   --------------
508
509   function On_CDATA (Self : in out Simple_Reader'Class) return Token is
510   begin
511      --  Segment of CDATA section (production [20]) optionally terminated by
512      --  end of CDATA section mark (production [21]).
513
514      if Self.Scanner_State.YY_Current_Position
515           - Self.Scanner_State.YY_Base_Position >= 3
516        and then (Code_Point
517                   (Self.Scanner_State.Data.Value
518                     (Self.Scanner_State.YY_Current_Position - 1))
519                        = Greater_Than_Sign
520                    and Code_Point
521                         (Self.Scanner_State.Data.Value
522                           (Self.Scanner_State.YY_Current_Position - 2))
523                              = Right_Square_Bracket
524                    and Code_Point
525                         (Self.Scanner_State.Data.Value
526                           (Self.Scanner_State.YY_Current_Position - 3))
527                              = Right_Square_Bracket)
528      then
529         --  Character data ends with ']]>', move backward before end of CDATA
530         --  section literal. End of CDATA section literal will be processed
531         --  on next cycle.
532
533         YY_Move_Backward (Self);
534         YY_Move_Backward (Self);
535         YY_Move_Backward (Self);
536      end if;
537
538      Matreshka.Internals.Strings.Operations.Copy_Slice
539       (Self.Character_Data,
540        Self.Scanner_State.Data,
541        Self.Scanner_State.YY_Base_Position,
542        Self.Scanner_State.YY_Current_Position
543          - Self.Scanner_State.YY_Base_Position,
544        Self.Scanner_State.YY_Current_Index
545          - Self.Scanner_State.YY_Base_Index);
546
547      Matreshka.Internals.Strings.Reference (Self.Character_Data);
548      Set_String_Internal
549       (Item          => Self.YYLVal,
550        String        => Self.Character_Data,
551        Is_Whitespace => False);
552
553      return Token_String_Segment;
554   end On_CDATA;
555
556   -----------------------
557   -- On_Character_Data --
558   -----------------------
559
560   function On_Character_Data
561    (Self : in out Simple_Reader'Class) return Token
562   is
563      C : constant Code_Point
564        := Code_Point
565            (Self.Scanner_State.Data.Value
566              (Self.Scanner_State.YY_Current_Position - 1));
567
568   begin
569      if Self.Element_Names.Is_Empty then
570         --  Document content not entered.
571
572         Callbacks.Call_Fatal_Error
573          (Self,
574           League.Strings.To_Universal_String
575            ("Text may not appear after the root element"));
576         Self.Error_Reported := True;
577
578         return Error;
579      end if;
580
581      if C = Less_Than_Sign or C = Ampersand then
582         --  Matched string end with '<' or '&' which is start character of
583         --  tag or reference accordingly.
584
585         YY_Move_Backward (Self);
586
587      elsif C = Greater_Than_Sign
588        and then Self.Scanner_State.YY_Current_Position
589                   - Self.Scanner_State.YY_Base_Position >= 3
590        and then (Code_Point
591                   (Self.Scanner_State.Data.Value
592                     (Self.Scanner_State.YY_Current_Position - 2))
593                        = Right_Square_Bracket
594                    and Code_Point
595                         (Self.Scanner_State.Data.Value
596                           (Self.Scanner_State.YY_Current_Position - 3))
597                              = Right_Square_Bracket)
598      then
599         --  Matched string ends with literal ']]>' which is invalid in
600         --  character data.
601
602         if Self.Scanner_State.YY_Current_Position
603              - Self.Scanner_State.YY_Base_Position = 3
604         then
605            --  Exactly ']]>' found.
606
607            Callbacks.Call_Fatal_Error
608             (Self,
609              League.Strings.To_Universal_String
610               ("[[14] CharData]"
611                  & " Text may not contain a literal ']]>' sequence"));
612            Self.Error_Reported := True;
613
614            return Error;
615
616         else
617            --  String ends with ']]>', move backward to report character data
618            --  in this cycle and report error in next cycle.
619
620            YY_Move_Backward (Self);
621            YY_Move_Backward (Self);
622            YY_Move_Backward (Self);
623         end if;
624      end if;
625
626      Matreshka.Internals.Strings.Operations.Copy_Slice
627       (Self.Character_Data,
628        Self.Scanner_State.Data,
629        Self.Scanner_State.YY_Base_Position,
630        Self.Scanner_State.YY_Current_Position
631          - Self.Scanner_State.YY_Base_Position,
632        Self.Scanner_State.YY_Current_Index
633          - Self.Scanner_State.YY_Base_Index);
634
635      Matreshka.Internals.Strings.Reference (Self.Character_Data);
636      Set_String_Internal
637       (Item          => Self.YYLVal,
638        String        => Self.Character_Data,
639        Is_Whitespace => False);
640
641      return Token_String_Segment;
642   end On_Character_Data;
643
644   ----------------------------
645   -- On_Character_Reference --
646   ----------------------------
647
648   function On_Character_Reference
649    (Self : in out Simple_Reader'Class;
650     Hex  : Boolean) return Token
651   is
652      Code  : Code_Point;
653      Valid : Boolean;
654
655   begin
656      Character_Reference_To_Code_Point (Self, Hex, Code, Valid);
657
658      if not Valid then
659         return Error;
660      end if;
661
662      --  XXX Whitespace must be detected and reported in token.
663
664      if not Matreshka.Internals.Strings.Can_Be_Reused
665              (Self.Character_Buffer, 2)
666      then
667         --  Preallocated buffer can't be reused for some reason (most
668         --  probably because application made copy of the previous character
669         --  reference), so new buffer need to be preallocated. Requested
670         --  size of the buffer is maximum number of UTF-16 code unit to
671         --  store one Unicode code point.
672
673         Matreshka.Internals.Strings.Dereference (Self.Character_Buffer);
674         Self.Character_Buffer := Matreshka.Internals.Strings.Allocate (2);
675      end if;
676
677      Self.Character_Buffer.Unused := 0;
678      Self.Character_Buffer.Length := 1;
679      Matreshka.Internals.Utf16.Unchecked_Store
680       (Self.Character_Buffer.Value,
681        Self.Character_Buffer.Unused,
682        Code);
683      Matreshka.Internals.Strings.Reference (Self.Character_Buffer);
684      Set_String_Internal
685       (Item          => Self.YYLVal,
686        String        => Self.Character_Buffer,
687        Is_Whitespace => False);
688
689      return Token_String_Segment;
690   end On_Character_Reference;
691
692   -----------------------------------------------
693   -- On_Character_Reference_In_Attribute_Value --
694   -----------------------------------------------
695
696   function On_Character_Reference_In_Attribute_Value
697    (Self : in out Simple_Reader'Class;
698     Hex  : Boolean) return Boolean
699   is
700      Code  : Code_Point;
701      Valid : Boolean;
702
703   begin
704      Character_Reference_To_Code_Point (Self, Hex, Code, Valid);
705
706      if not Valid then
707         return False;
708      end if;
709
710      if Self.Normalize_Value then
711         if Code = Space then
712            if not Self.Space_Before then
713               Matreshka.Internals.Strings.Operations.Unterminated_Append
714                (Self.Character_Data, Code);
715               Self.Space_Before := True;
716            end if;
717
718         else
719            Matreshka.Internals.Strings.Operations.Unterminated_Append
720             (Self.Character_Data, Code);
721            Self.Space_Before := False;
722         end if;
723
724      else
725         Matreshka.Internals.Strings.Operations.Unterminated_Append
726          (Self.Character_Data, Code);
727      end if;
728
729      return True;
730   end On_Character_Reference_In_Attribute_Value;
731
732   -----------------------
733   -- On_Close_Of_CDATA --
734   -----------------------
735
736   function On_Close_Of_CDATA
737    (Self : in out Simple_Reader'Class) return Token is
738   begin
739      Pop_Start_Condition (Self);
740
741      return Token_CData_Close;
742   end On_Close_Of_CDATA;
743
744   -------------------------------------
745   -- On_Close_Of_Conditional_Section --
746   -------------------------------------
747
748   function On_Close_Of_Conditional_Section
749    (Self : in out Simple_Reader'Class) return Token is
750   begin
751      if Self.Conditional_Depth = 0 then
752         Callbacks.Call_Fatal_Error
753          (Self,
754           League.Strings.To_Universal_String
755            ("']]>' doesn't close conditional section"));
756
757         return Error;
758      end if;
759
760      Self.Conditional_Depth := Self.Conditional_Depth - 1;
761
762      if Self.Ignore_Depth /= 0 then
763         Self.Ignore_Depth := Self.Ignore_Depth - 1;
764      end if;
765
766      if Self.Ignore_Depth /= 0 then
767         case Self.Version is
768            when XML_1_0 =>
769               Enter_Start_Condition (Self, Tables.CONDITIONAL_IGNORE_10);
770
771            when XML_1_1 =>
772               Enter_Start_Condition (Self, Tables.CONDITIONAL_IGNORE_11);
773         end case;
774
775      else
776         case Self.Version is
777            when XML_1_0 =>
778               Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_10);
779
780            when XML_1_1 =>
781               Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_11);
782         end case;
783      end if;
784
785      return Token_Conditional_Close;
786   end On_Close_Of_Conditional_Section;
787
788   -----------------------------
789   -- On_Close_Of_Declaration --
790   -----------------------------
791
792   function On_Close_Of_Declaration
793    (Self : in out Simple_Reader'Class) return Token is
794   begin
795      case Self.Version is
796         when XML_1_0 =>
797            Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_10);
798
799         when XML_1_1 =>
800            Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_11);
801      end case;
802
803      return Token_Close;
804   end On_Close_Of_Declaration;
805
806   -------------------------------------------
807   -- On_Close_Of_Document_Type_Declaration --
808   -------------------------------------------
809
810   function On_Close_Of_Document_Type_Declaration
811    (Self : in out Simple_Reader'Class) return Boolean
812   is
813      Success : Boolean;
814      pragma Unreferenced (Success);
815
816   begin
817      if Self.External_Subset_Entity /= No_Entity
818        and Self.Validation.Load_DTD
819        and not Self.External_Subset_Done
820      then
821         --  External subset is declared, need to be loaded and not processed,
822         --  push it into the scanner stack to process before reporting of
823         --  close of document type declaration.
824
825         Self.External_Subset_Done := True;
826         YY_Move_Backward (Self);
827
828         Success :=
829           Scanner.Push_Entity
830            (Self             => Self,
831             Entity           => Self.External_Subset_Entity,
832             In_Document_Type => True,
833             In_Literal       => False);
834
835         --  XXX Error processing is not implemented.
836
837         return False;
838
839      else
840         case Self.Version is
841            when XML_1_0 =>
842               Enter_Start_Condition (Self, Tables.DOCUMENT_10);
843
844            when XML_1_1 =>
845               Enter_Start_Condition (Self, Tables.DOCUMENT_11);
846         end case;
847
848         return True;
849      end if;
850   end On_Close_Of_Document_Type_Declaration;
851
852   -----------------------------------
853   -- On_Close_Of_Empty_Element_Tag --
854   -----------------------------------
855
856   function On_Close_Of_Empty_Element_Tag
857    (Self : in out Simple_Reader'Class) return Token is
858   begin
859      case Self.Version is
860         when XML_1_0 =>
861            Enter_Start_Condition (Self, Tables.DOCUMENT_10);
862
863         when XML_1_1 =>
864            if Is_Internal_General_Entity
865                (Self.Entities, Self.Scanner_State.Entity)
866            then
867               --  Character references are resolved when replacement text of
868               --  internal general entity is constructed. In XML 1.1 character
869               --  references can refer to restricted characters which is not
870               --  valid in text, but valid in replacement text.
871
872               Enter_Start_Condition (Self, Tables.DOCUMENT_U11);
873
874            else
875               Enter_Start_Condition (Self, Tables.DOCUMENT_11);
876            end if;
877      end case;
878
879      return Token_Empty_Close;
880   end On_Close_Of_Empty_Element_Tag;
881
882   ----------------------------------------
883   -- On_Close_Of_Processing_Instruction --
884   ----------------------------------------
885
886   function On_Close_Of_Processing_Instruction
887    (Self     : in out Simple_Reader'Class;
888     Is_Empty : Boolean) return Token is
889   begin
890      if Is_Empty then
891         Set_String_Internal
892          (Item          => Self.YYLVal,
893           String        => Matreshka.Internals.Strings.Shared_Empty'Access,
894           Is_Whitespace => False);
895
896      else
897         if not Self.Whitespace_Matched then
898            raise Program_Error
899              with "no whitespace before processing instruction data";
900            --  XXX This is recoverable error.
901         end if;
902
903         Set_String_Internal
904          (Item          => Self.YYLVal,
905           String        => YY_Text (Self, 0, 2),
906           Is_Whitespace => False);
907      end if;
908
909      Pop_Start_Condition (Self);
910
911      return Token_Pi_Close;
912   end On_Close_Of_Processing_Instruction;
913
914   ---------------------
915   -- On_Close_Of_Tag --
916   ---------------------
917
918   function On_Close_Of_Tag
919    (Self : in out Simple_Reader'Class) return Token is
920   begin
921      case Self.Version is
922         when XML_1_0 =>
923            Enter_Start_Condition (Self, Tables.DOCUMENT_10);
924
925         when XML_1_1 =>
926            if Is_Internal_General_Entity
927                (Self.Entities, Self.Scanner_State.Entity)
928            then
929               --  Character references are resolved when replacement text of
930               --  internal general entity is constructed. In XML 1.1 character
931               --  references can refer to restricted characters which is not
932               --  valid in text, but valid in replacement text.
933
934               Enter_Start_Condition (Self, Tables.DOCUMENT_U11);
935
936            else
937               Enter_Start_Condition (Self, Tables.DOCUMENT_11);
938            end if;
939      end case;
940
941      return Token_Close;
942   end On_Close_Of_Tag;
943
944   -----------------------------------------
945   -- On_Close_Of_XML_Or_Text_Declaration --
946   -----------------------------------------
947
948   function On_Close_Of_XML_Or_Text_Declaration
949    (Self : in out Simple_Reader'Class) return Token is
950   begin
951      Set_String_Internal
952       (Item          => Self.YYLVal,
953        String        => Matreshka.Internals.Strings.Shared_Empty'Access,
954        Is_Whitespace => False);
955
956      if Self.Scanner_State.Entity /= No_Entity then
957         --  End of text declaration of the external entity is reached,
958         --  save current position to start from it next time entity is
959         --  referenced.
960
961         Set_First_Position
962          (Self.Entities,
963           Self.Scanner_State.Entity,
964           Self.Scanner_State.YY_Current_Position);
965      end if;
966
967      Pop_Start_Condition (Self);
968
969      return Token_Pi_Close;
970   end On_Close_Of_XML_Or_Text_Declaration;
971
972   -------------------------------------------------
973   -- On_Close_Parenthesis_In_Content_Declaration --
974   -------------------------------------------------
975
976   function On_Close_Parenthesis_In_Content_Declaration
977    (Self : in out Simple_Reader'Class) return Token is
978   begin
979      --  Whitespace can't be present between close parenthesis and
980      --  multiplicity indicator if any, so reset whitespace matching flag.
981
982      Self.Whitespace_Matched := False;
983
984      return Token_Close_Parenthesis;
985   end On_Close_Parenthesis_In_Content_Declaration;
986
987   ------------------------------------------------
988   -- On_Close_Parenthesis_In_Notation_Attribute --
989   ------------------------------------------------
990
991   function On_Close_Parenthesis_In_Notation_Attribute
992    (Self : in out Simple_Reader'Class) return Token is
993   begin
994      --  Resets whitespace matching flag.
995
996      Self.Whitespace_Matched := False;
997
998      return Token_Close_Parenthesis;
999   end On_Close_Parenthesis_In_Notation_Attribute;
1000
1001   --------------------------------------
1002   -- On_Conditional_Section_Directive --
1003   --------------------------------------
1004
1005   procedure On_Conditional_Section_Directive
1006    (Self    : in out Simple_Reader'Class;
1007     Include : Boolean) is
1008   begin
1009      --  XXX Syntax check must be added!
1010
1011      Self.Conditional_Directive := True;
1012      Self.Conditional_Depth := Self.Conditional_Depth + 1;
1013
1014      if Self.Ignore_Depth /= 0 or not Include then
1015         Self.Ignore_Depth := Self.Ignore_Depth + 1;
1016      end if;
1017   end On_Conditional_Section_Directive;
1018
1019   ----------------------------------------------
1020   -- On_Content_Of_Ignore_Conditional_Section --
1021   ----------------------------------------------
1022
1023   procedure On_Content_Of_Ignore_Conditional_Section
1024    (Self : in out Simple_Reader'Class) is
1025   begin
1026      YY_Move_Backward (Self);
1027      YY_Move_Backward (Self);
1028      YY_Move_Backward (Self);
1029   end On_Content_Of_Ignore_Conditional_Section;
1030
1031   ----------------------------
1032   -- On_Default_Declaration --
1033   ----------------------------
1034
1035   function On_Default_Declaration
1036    (Self          : in out Simple_Reader'Class;
1037     State         : Interfaces.Unsigned_32;
1038     Default_Token : Token) return Token is
1039   begin
1040      --  Checks ithat whitespace before attribute type keyword is detected
1041      --  and report error when check fail.
1042
1043      if not Self.Whitespace_Matched then
1044         --  XXX This is recoverable error.
1045
1046         Callbacks.Call_Fatal_Error
1047          (Self,
1048           League.Strings.To_Universal_String
1049            ("whitespace required before default declaration"));
1050
1051         return Error;
1052      end if;
1053
1054      Self.Whitespace_Matched := False;
1055      Enter_Start_Condition (Self, State);
1056
1057      return Default_Token;
1058   end On_Default_Declaration;
1059
1060   ---------------------------------------------------
1061   -- On_Element_Name_In_Attribute_List_Declaration --
1062   ---------------------------------------------------
1063
1064   function On_Element_Name_In_Attribute_List_Declaration
1065    (Self : in out Simple_Reader'Class) return Token
1066   is
1067      Qname_Error : Boolean;
1068
1069   begin
1070      --  [XML [52]] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1071      --
1072      --  Checks whitespace before the element name is present.
1073
1074      if not Self.Whitespace_Matched then
1075         Callbacks.Call_Fatal_Error
1076          (Self,
1077           League.Strings.To_Universal_String
1078            ("[XML [52] AttlistDecl]"
1079               & " no whitespace before element's name"));
1080
1081         return Error;
1082      end if;
1083
1084      Self.Whitespace_Matched := False;
1085
1086      Resolve_Symbol
1087       (Self, 0, 0, False, True, False, Qname_Error, Self.YYLVal.Symbol);
1088
1089      if Qname_Error then
1090         return Error;
1091
1092      else
1093         Enter_Start_Condition (Self, Tables.ATTLIST_DECL);
1094
1095         return Token_Name;
1096      end if;
1097   end On_Element_Name_In_Attribute_List_Declaration;
1098
1099   -------------------------
1100   -- On_Encoding_Keyword --
1101   -------------------------
1102
1103   function On_Encoding_Keyword
1104    (Self : in out Simple_Reader'Class) return Token is
1105   begin
1106      if not Self.Whitespace_Matched then
1107         Callbacks.Call_Fatal_Error
1108          (Self,
1109           League.Strings.To_Universal_String
1110            ("no whitespace before 'encoding'"));
1111         Self.Error_Reported := True;
1112         --  XXX This is recoverable error.
1113
1114         return Error;
1115
1116      else
1117         return Token_Encoding;
1118      end if;
1119   end On_Encoding_Keyword;
1120
1121   -------------------------------------
1122   -- On_Entity_Value_Close_Delimiter --
1123   -------------------------------------
1124
1125   function On_Entity_Value_Close_Delimiter
1126    (Self : in out Simple_Reader'Class) return Token
1127   is
1128      --  NOTE: Entity value delimiter can be ' or " and both are
1129      --  represented as single UTF-16 code unit, thus expensive UTF-16
1130      --  decoding can be avoided.
1131
1132      Delimiter : constant Matreshka.Internals.Unicode.Code_Point
1133        := Code_Point
1134            (Self.Scanner_State.Data.Value
1135              (Self.Scanner_State.YY_Base_Position));
1136
1137   begin
1138      if Self.Scanner_State.In_Literal
1139        or else Self.Scanner_State.Delimiter /= Delimiter
1140      then
1141         Set_String_Internal
1142          (Item          => Self.YYLVal,
1143           String        => YY_Text (Self),
1144           Is_Whitespace => False);
1145
1146         return Token_String_Segment;
1147
1148      else
1149         Enter_Start_Condition (Self, Tables.ENTITY_DEF);
1150
1151         return Token_Value_Close;
1152      end if;
1153   end On_Entity_Value_Close_Delimiter;
1154
1155   ------------------------------------
1156   -- On_Entity_Value_Open_Delimiter --
1157   ------------------------------------
1158
1159   function On_Entity_Value_Open_Delimiter
1160    (Self : in out Simple_Reader'Class) return Token is
1161   begin
1162      --  NOTE: Entity value delimiter can be ' or " and both are
1163      --  represented as single UTF-16 code unit, thus expensive UTF-16
1164      --  decoding can be avoided.
1165
1166      Self.Scanner_State.Delimiter :=
1167        Code_Point
1168         (Self.Scanner_State.Data.Value (Self.Scanner_State.YY_Base_Position));
1169
1170      if not Self.Whitespace_Matched then
1171         Callbacks.Call_Fatal_Error
1172          (Self,
1173           League.Strings.To_Universal_String
1174            ("[[71] GEDecl, [72] PEDecl]"
1175               & " no whitespace before entity value"));
1176
1177         return Error;
1178      end if;
1179
1180      Self.Whitespace_Matched := False;
1181
1182      case Self.Version is
1183         when XML_1_0 =>
1184            Enter_Start_Condition (Self, Tables.ENTITY_VALUE_10);
1185
1186         when XML_1_1 =>
1187            Enter_Start_Condition (Self, Tables.ENTITY_VALUE_11);
1188      end case;
1189
1190      return Token_Value_Open;
1191   end On_Entity_Value_Open_Delimiter;
1192
1193   ----------------------------------------------------
1194   -- On_General_Entity_Reference_In_Attribute_Value --
1195   ----------------------------------------------------
1196
1197   function On_General_Entity_Reference_In_Attribute_Value
1198    (Self : in out Simple_Reader'Class) return Boolean
1199   is
1200      Qualified_Name : Symbol_Identifier;
1201      Qname_Error    : Boolean;
1202      Entity         : Entity_Identifier;
1203      State          : Scanner_State_Information;
1204
1205   begin
1206      Resolve_Symbol
1207       (Self, 1, 1, False, False, False, Qname_Error, Qualified_Name);
1208
1209      if Qname_Error then
1210         return False;
1211      end if;
1212
1213      Entity := General_Entity (Self.Symbols, Qualified_Name);
1214
1215      --  [XML1.1 4.1 WFC: Entity Declared]
1216      --
1217      --  "In a document without any DTD, a document with only an internal
1218      --  DTD subset which contains no parameter entity references, or a
1219      --  document with "standalone='yes'", for an entity reference that
1220      --  does not occur within the external subset or a parameter entity,
1221      --  the Name given in the entity reference MUST match that in an
1222      --  entity declaration that does not occur within the external subset
1223      --  or a parameter entity, except that well-formed documents need not
1224      --  declare any of the following entities: amp, lt, gt, apos, quot.
1225      --  The declaration of a general entity MUST precede any reference
1226      --  to it which appears in a default value in an attribute-list
1227      --  declaration.
1228      --
1229      --  Note that non-validating processors are not obligated to to read
1230      --  and process entity declarations occurring in parameter entities
1231      --  or in the external subset; for such documents, the rule that an
1232      --  entity must be declared is a well-formedness constraint only if
1233      --  standalone='yes'."
1234      --
1235      --  Check whether entity is declared.
1236      --
1237      --  XXX This is probably too strong check, need to be arranged with
1238      --  standalone documents and validation.
1239
1240      if Entity = No_Entity then
1241         Callbacks.Call_Fatal_Error
1242          (Self,
1243           League.Strings.To_Universal_String
1244            ("[XML1.1 4.1 WFC: Entity Declared]"
1245               & " general entity must be declared"));
1246
1247         return False;
1248
1249      elsif Enclosing_Entity (Self.Entities, Entity) = No_Entity then
1250         --  All predefined entities doesn't have enclosing entity.
1251
1252         null;
1253
1254      elsif Self.Is_Standalone
1255        and not Is_Parameter_Entity (Self.Entities, Self.Scanner_State.Entity)
1256        and not Is_External_Subset (Self.Entities, Self.Scanner_State.Entity)
1257        and not Is_Document_Entity
1258                 (Self.Entities, Enclosing_Entity (Self.Entities, Entity))
1259      then
1260         Callbacks.Call_Fatal_Error
1261          (Self,
1262           League.Strings.To_Universal_String
1263            ("[XML1.1 4.1 WFC: Entity Declared]"
1264               & " general entity must not be declared externally"));
1265
1266         return False;
1267      end if;
1268
1269      --  [XML1.1 4.1 WFC: Parsed Entity]
1270      --
1271      --  "An entity reference MUST NOT contain the name of an unparsed
1272      --  entity. Unparsed entities may be referred to only in attribute
1273      --  values declared to be of type ENTITY or ENTITIES."
1274      --
1275      --  Check whether referenced entity is not unparsed external general
1276      --  entity. XXX Attribute's value type must be checked also.
1277
1278      if Is_External_Unparsed_General_Entity (Self.Entities, Entity) then
1279         Callbacks.Call_Fatal_Error
1280          (Self,
1281           League.Strings.To_Universal_String
1282            ("[XML1.1 4.1 WFC: Parsed Entity]"
1283               & " an entity reference must not contain the name of an"
1284               & " unparsed entity"));
1285
1286         return False;
1287      end if;
1288
1289      --  [XML1.1 3.1 WFC: No External Entity References]
1290      --
1291      --  "Attribute values MUST NOT contain direct or indirect entity
1292      --  references to external entities."
1293      --
1294      --  Check whether referenced entity is not parsed external general
1295      --  entity.
1296
1297      if Is_External_Parsed_General_Entity (Self.Entities, Entity) then
1298         Callbacks.Call_Fatal_Error
1299          (Self,
1300           League.Strings.To_Universal_String
1301            ("[XML1.1 3.1 WFC: No External Entity References]"
1302               & " attribute value must not contain entity reference to"
1303               & " external entity"));
1304
1305         return False;
1306      end if;
1307
1308      --  [XML1.1 4.1 WFC: No Recursion]
1309      --
1310      --  "A parsed entity MUST NOT contain a recursive reference to itself,
1311      --  either directly or indirectly."
1312      --
1313      --  Check whether there is no replacement text of the same entity in the
1314      --  scanner stack.
1315
1316      if Self.Scanner_State.Entity = Entity then
1317         Callbacks.Call_Fatal_Error
1318          (Self,
1319           League.Strings.To_Universal_String
1320            ("[XML1.1 4.1 WFC: No Recursion]"
1321               & " parsed entity must not containt a direct recursive"
1322               & " reference to itself"));
1323
1324         return False;
1325      end if;
1326
1327      for J in 1 .. Integer (Self.Scanner_Stack.Length) loop
1328         State := Self.Scanner_Stack.Element (J);
1329
1330         if State.Entity = Entity then
1331            Callbacks.Call_Fatal_Error
1332             (Self,
1333              League.Strings.To_Universal_String
1334               ("[XML1.1 4.1 WFC: No Recursion]"
1335                  & " parsed entity must not containt a indirect recursive"
1336                  & " reference to itself"));
1337
1338            return False;
1339         end if;
1340      end loop;
1341
1342      return
1343        Push_Entity
1344         (Self             => Self,
1345          Entity           => Entity,
1346          In_Document_Type => False,
1347          In_Literal       => True);
1348   end On_General_Entity_Reference_In_Attribute_Value;
1349
1350   -----------------------------------------------------
1351   -- On_General_Entity_Reference_In_Document_Content --
1352   -----------------------------------------------------
1353
1354   function On_General_Entity_Reference_In_Document_Content
1355    (Self : in out Simple_Reader'Class) return Token
1356   is
1357      Qualified_Name : Symbol_Identifier;
1358      Qname_Error    : Boolean;
1359      Entity         : Entity_Identifier;
1360      State          : Scanner_State_Information;
1361      Deep           : Natural;
1362
1363   begin
1364      Resolve_Symbol
1365       (Self, 1, 1, False, False, False, Qname_Error, Qualified_Name);
1366
1367      if Qname_Error then
1368         return Error;
1369      end if;
1370
1371      --  [1] document ::=
1372      --       ( prolog element Misc* ) - ( Char* RestrictedChar Char* )
1373      --
1374      --  [39] element ::= EmptyElemTag | STag content ETag
1375      --
1376      --  [43] content ::=
1377      --         CharData?
1378      --         ((element | Reference | CDSect | PI | Comment) CharData?)*
1379      --
1380      --  Check that entity is referenced inside element content.
1381
1382      if Self.Element_Names.Is_Empty then
1383         Callbacks.Call_Fatal_Error
1384          (Self,
1385           League.Strings.To_Universal_String
1386            ("entity reference must be in content of element"));
1387
1388         return Error;
1389      end if;
1390
1391      Entity := General_Entity (Self.Symbols, Qualified_Name);
1392
1393      --  [XML1.1 4.1 WFC: Entity Declared]
1394      --
1395      --  "In a document without any DTD, a document with only an internal
1396      --  DTD subset which contains no parameter entity references, or a
1397      --  document with "standalone='yes'", for an entity reference that
1398      --  does not occur within the external subset or a parameter entity,
1399      --  the Name given in the entity reference MUST  match that in an
1400      --  entity declaration that does not occur within the external subset
1401      --  or a parameter entity, except that well-formed documents need not
1402      --  declare any of the following entities: amp, lt, gt, apos, quot.
1403      --  The declaration of a general entity MUST precede any reference
1404      --  to it which appears in a default value in an attribute-list
1405      --  declaration.
1406      --
1407      --  Note that non-validating processors are not obligated to to read
1408      --  and process entity declarations occurring in parameter entities
1409      --  or in the external subset; for such documents, the rule that an
1410      --  entity must be declared is a well-formedness constraint only if
1411      --  standalone='yes'."
1412      --
1413      --  Check whether entity is declared.
1414      --
1415      --  XXX This is probably too strong check, need to be arranged with
1416      --  standalone documents and validation.
1417
1418      if Entity = No_Entity then
1419         Callbacks.Call_Fatal_Error
1420          (Self,
1421           League.Strings.To_Universal_String
1422            ("[XML1.1 4.1 WFC: Entity Declared]"
1423               & " general entity must be declared"));
1424
1425         return Error;
1426
1427      elsif Enclosing_Entity (Self.Entities, Entity) = No_Entity then
1428         --  All predefined entities doesn't have enclosing entity.
1429
1430         null;
1431
1432      elsif Self.Is_Standalone
1433        and not Is_Parameter_Entity (Self.Entities, Self.Scanner_State.Entity)
1434        and not Is_External_Subset (Self.Entities, Self.Scanner_State.Entity)
1435        and not Is_Document_Entity
1436                 (Self.Entities, Enclosing_Entity (Self.Entities, Entity))
1437      then
1438         Callbacks.Call_Fatal_Error
1439          (Self,
1440           League.Strings.To_Universal_String
1441            ("[XML1.1 4.1 WFC: Entity Declared]"
1442               & " general entity must not be declared externally"));
1443
1444         return Error;
1445      end if;
1446
1447      --  [XML1.1 4.1 WFC: Parsed Entity]
1448      --
1449      --  "An entity reference MUST NOT contain the name of an unparsed
1450      --  entity. Unparsed entities may be referred to only in attribute
1451      --  values declared to be of type ENTITY or ENTITIES."
1452      --
1453      --  Check whether referenced entity is not unparsed external general
1454      --  entity.
1455
1456      if Is_External_Unparsed_General_Entity (Self.Entities, Entity) then
1457         Callbacks.Call_Fatal_Error
1458          (Self,
1459           League.Strings.To_Universal_String
1460            ("[XML1.1 4.1 WFC: Parsed Entity]"
1461               & " an entity reference must not contain the name of an"
1462               & " unparsed entity"));
1463
1464         return Error;
1465      end if;
1466
1467      --  [XML1.1 4.1 WFC: No Recursion]
1468      --
1469      --  "A parsed entity MUST NOT contain a recursive reference to itself,
1470      --  either directly or indirectly."
1471      --
1472      --  Check whether there is no replacement text of the same entity in the
1473      --  scanner stack.
1474
1475      if Self.Scanner_State.Entity = Entity then
1476         Callbacks.Call_Fatal_Error
1477          (Self,
1478           League.Strings.To_Universal_String
1479            ("[XML1.1 4.1 WFC: No Recursion]"
1480               & " parsed entity must not containt a direct recursive"
1481               & " reference to itself"));
1482
1483         return Error;
1484      end if;
1485
1486      for J in 1 .. Integer (Self.Scanner_Stack.Length) loop
1487         State := Self.Scanner_Stack.Element (J);
1488
1489         if State.Entity = Entity then
1490            Callbacks.Call_Fatal_Error
1491             (Self,
1492              League.Strings.To_Universal_String
1493               ("[XML1.1 4.1 WFC: No Recursion]"
1494                  & " parsed entity must not containt a indirect recursive"
1495                  & " reference to itself"));
1496
1497            return Error;
1498         end if;
1499      end loop;
1500
1501      Deep := Integer (Self.Scanner_Stack.Length);
1502
1503      if not Push_Entity
1504              (Self             => Self,
1505               Entity           => Entity,
1506               In_Document_Type => False,
1507               In_Literal       => False)
1508      then
1509         return Error;
1510
1511      elsif Deep = Integer (Self.Scanner_Stack.Length) then
1512         --  Entity doesn't pushed in stack because its replacement text
1513         --  is empty.
1514
1515         return End_Of_Input;
1516
1517      else
1518         Self.Scanner_State.Start_Issued := True;
1519
1520         return Token_Entity_Start;
1521      end if;
1522   end On_General_Entity_Reference_In_Document_Content;
1523
1524   -------------------------------------------------
1525   -- On_General_Entity_Reference_In_Entity_Value --
1526   -------------------------------------------------
1527
1528   function On_General_Entity_Reference_In_Entity_Value
1529    (Self : in out Simple_Reader'Class) return Token
1530   is
1531      Qualified_Name : Symbol_Identifier;
1532      Qname_Error    : Boolean;
1533
1534   begin
1535      Resolve_Symbol
1536       (Self, 1, 1, False, False, False, Qname_Error, Qualified_Name);
1537
1538      if Qname_Error then
1539         return Error;
1540
1541      else
1542         Set_String_Internal
1543          (Item          => Self.YYLVal,
1544           String        => YY_Text (Self),
1545           Is_Whitespace => False);
1546
1547         return Token_String_Segment;
1548      end if;
1549   end On_General_Entity_Reference_In_Entity_Value;
1550
1551   ------------------------------------------
1552   -- On_Less_Than_Sign_In_Attribute_Value --
1553   ------------------------------------------
1554
1555   function On_Less_Than_Sign_In_Attribute_Value
1556    (Self : in out Simple_Reader'Class) return Token is
1557   begin
1558      --  [3.1 WFC: No < in Attribute Values]
1559      --
1560      --  "The replacement text of any entity referred to directly or
1561      --  indirectly in an attribute value MUST NOT contain a <."
1562
1563      Callbacks.Call_Fatal_Error
1564       (Self,
1565        League.Strings.To_Universal_String
1566         ("[3.1 WFC: No < in Attribute Values]"
1567            & " '<' can't be used in attribute value"));
1568      Self.Error_Reported := True;
1569
1570      return Error;
1571   end On_Less_Than_Sign_In_Attribute_Value;
1572
1573   ----------------------------------------------------
1574   -- On_Name_In_Attribute_List_Declaration_Notation --
1575   ----------------------------------------------------
1576
1577   function On_Name_In_Attribute_List_Declaration_Notation
1578    (Self : in out Simple_Reader'Class) return Token
1579   is
1580      Qname_Error : Boolean;
1581
1582   begin
1583      --  [XMLNS 7]
1584      --
1585      --  "It follows that in a namespace-well-formed document:
1586      --
1587      --    - All element and attribute names contain either zero or one colon;
1588      --
1589      --    - No entity names, processing instruction targets, or notation
1590      --      names contain any colons."
1591      --
1592      --  This code is used to handle names in both NOTATION and enumeration
1593      --  attribute declarations, thus it must distinguish colon handling.
1594
1595      Resolve_Symbol
1596       (Self,
1597        0,
1598        0,
1599        False,
1600        True,
1601        not Self.Notation_Attribute,
1602        Qname_Error,
1603        Self.YYLVal.Symbol);
1604
1605      if Qname_Error then
1606         return Error;
1607
1608      else
1609         return Token_Name;
1610      end if;
1611   end On_Name_In_Attribute_List_Declaration_Notation;
1612
1613   ------------------------------------
1614   -- On_Name_In_Element_Declaration --
1615   ------------------------------------
1616
1617   function On_Name_In_Element_Declaration
1618    (Self : in out Simple_Reader'Class) return Token
1619   is
1620      Qname_Error : Boolean;
1621
1622   begin
1623      --  Production [45] requires whitespace after name and before content
1624      --  specification, so whitespace indicator is reset here.
1625
1626      Self.Whitespace_Matched := False;
1627
1628      Resolve_Symbol
1629       (Self, 0, 0, False, True, False, Qname_Error, Self.YYLVal.Symbol);
1630
1631      if Qname_Error then
1632         return Error;
1633
1634      else
1635         Enter_Start_Condition (Self, Tables.ELEMENT_DECL);
1636
1637         return Token_Name;
1638      end if;
1639   end On_Name_In_Element_Declaration;
1640
1641   ---------------------------------------------
1642   -- On_Name_In_Element_Declaration_Children --
1643   ---------------------------------------------
1644
1645   function On_Name_In_Element_Declaration_Children
1646    (Self : in out Simple_Reader'Class) return Token
1647   is
1648      Qname_Error : Boolean;
1649
1650   begin
1651      --  Production [48] checks that no whitespace separates Name from
1652      --  following multiplicity indicator, so whitespace indicator must be
1653      --  reset here.
1654
1655      Self.Whitespace_Matched := False;
1656
1657      Resolve_Symbol
1658       (Self, 0, 0, False, True, False, Qname_Error, Self.YYLVal.Symbol);
1659
1660      if Qname_Error then
1661         return Error;
1662
1663      else
1664         return Token_Name;
1665      end if;
1666   end On_Name_In_Element_Declaration_Children;
1667
1668   ----------------------------------
1669   -- On_Name_In_Element_Start_Tag --
1670   ----------------------------------
1671
1672   function On_Name_In_Element_Start_Tag
1673    (Self : in out Simple_Reader'Class) return Token
1674   is
1675      Qname_Error : Boolean;
1676
1677   begin
1678      if not Self.Whitespace_Matched then
1679         Callbacks.Call_Fatal_Error
1680          (Self,
1681           League.Strings.To_Universal_String
1682            ("whitespace is missing before attribute name"));
1683         --  XXX It is recoverable error.
1684
1685         return Error;
1686      end if;
1687
1688      Resolve_Symbol
1689       (Self, 0, 0, False, True, False, Qname_Error, Self.YYLVal.Symbol);
1690
1691      if Qname_Error then
1692         return Error;
1693
1694      else
1695         return Token_Name;
1696      end if;
1697   end On_Name_In_Element_Start_Tag;
1698
1699   -----------------------------------
1700   -- On_Name_In_Entity_Declaration --
1701   -----------------------------------
1702
1703   function On_Name_In_Entity_Declaration
1704    (Self : in out Simple_Reader'Class) return Token
1705   is
1706      Qname_Error : Boolean;
1707
1708   begin
1709      --  [XML1.1 4.2]
1710      --
1711      --  [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1712      --  [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1713      --
1714      --  Check whether whitespace is present before the name.
1715
1716      if not Self.Whitespace_Matched then
1717         Callbacks.Call_Fatal_Error
1718          (Self,
1719           League.Strings.To_Universal_String
1720            ("[XML1.1 4.2 productions [71], [72]]"
1721               & " whitespace must be present before the name"));
1722         --  XXX This is recoverable error.
1723
1724         return Error;
1725      end if;
1726
1727      Resolve_Symbol
1728       (Self, 0, 0, False, False, False, Qname_Error, Self.YYLVal.Symbol);
1729
1730      if Qname_Error then
1731         return Error;
1732
1733      else
1734         Self.Whitespace_Matched := False;
1735         Enter_Start_Condition (Self, Tables.ENTITY_DEF);
1736
1737         return Token_Name;
1738      end if;
1739   end On_Name_In_Entity_Declaration;
1740
1741   --------------------------------------------
1742   -- On_Name_In_Entity_Declaration_Notation --
1743   --------------------------------------------
1744
1745   function On_Name_In_Entity_Declaration_Notation
1746    (Self : in out Simple_Reader'Class) return Token
1747   is
1748      Qname_Error : Boolean;
1749
1750   begin
1751      --  [XML1.1 4.2.2]
1752      --
1753      --  [76] NDataDecl ::= S 'NDATA' S Name
1754      --
1755      --  Check whether whitespace is present before the name.
1756
1757      if not Self.Whitespace_Matched then
1758         Callbacks.Call_Fatal_Error
1759          (Self,
1760           League.Strings.To_Universal_String
1761            ("[XML1.1 4.2 production [76]]"
1762               & " whitespace must be present before the name of notation"));
1763         --  XXX This is recoverable error.
1764
1765         return Error;
1766      end if;
1767
1768      Resolve_Symbol
1769       (Self, 0, 0, False, False, False, Qname_Error, Self.YYLVal.Symbol);
1770
1771      if Qname_Error then
1772         return Error;
1773
1774      else
1775         Enter_Start_Condition (Self, Tables.ENTITY_DEF);
1776
1777         return Token_Name;
1778      end if;
1779   end On_Name_In_Entity_Declaration_Notation;
1780
1781   --------------
1782   -- On_NDATA --
1783   --------------
1784
1785   function On_NDATA (Self : in out Simple_Reader'Class) return Token is
1786   begin
1787      if not Self.Whitespace_Matched then
1788         --  XXX This is recoverable error.
1789
1790         Callbacks.Call_Fatal_Error
1791          (Self,
1792           League.Strings.To_Universal_String
1793            ("whitespace required before NDATA"));
1794         Self.Error_Reported := True;
1795
1796         return Error;
1797
1798      else
1799         Self.Whitespace_Matched := False;
1800         Enter_Start_Condition (Self, Tables.ENTITY_NDATA);
1801
1802         return Token_Ndata;
1803      end if;
1804   end On_NDATA;
1805
1806   ---------------------------
1807   -- On_No_XML_Declaration --
1808   ---------------------------
1809
1810   procedure On_No_XML_Declaration (Self : in out Simple_Reader'Class) is
1811   begin
1812      --  Move scanner's position back to the start of the document or external
1813      --  parsed entity. Entity's XML version and encoding are set up
1814      --  automatically.
1815
1816      YY_Move_Backward (Self);
1817      Pop_Start_Condition (Self);
1818   end On_No_XML_Declaration;
1819
1820   -------------------------------------------
1821   -- On_Open_Of_Attribute_List_Declaration --
1822   -------------------------------------------
1823
1824   function On_Open_Of_Attribute_List_Declaration
1825    (Self : in out Simple_Reader'Class) return Token is
1826   begin
1827      Enter_Start_Condition (Self, Tables.ATTLIST_NAME);
1828      Self.Whitespace_Matched := False;
1829
1830      return Token_Attlist_Decl_Open;
1831   end On_Open_Of_Attribute_List_Declaration;
1832
1833   ----------------------
1834   -- On_Open_Of_CDATA --
1835   ----------------------
1836
1837   function On_Open_Of_CDATA
1838    (Self : in out Simple_Reader'Class) return Token
1839   is
1840      Condition : Interfaces.Unsigned_32;
1841
1842   begin
1843      case Start_Condition (Self) is
1844         when Tables.DOCUMENT_10 =>
1845            Condition := Tables.CDATA_10;
1846
1847         when Tables.DOCUMENT_11 =>
1848            Condition := Tables.CDATA_11;
1849
1850         when Tables.DOCUMENT_U11 =>
1851            Condition := Tables.CDATA_U11;
1852
1853         when others =>
1854            raise Program_Error;
1855      end case;
1856
1857      Push_Current_And_Enter_Start_Condition (Self, Condition);
1858
1859      return Token_CData_Open;
1860   end On_Open_Of_CDATA;
1861
1862   ------------------------------------
1863   -- On_Open_Of_Conditional_Section --
1864   ------------------------------------
1865
1866   function On_Open_Of_Conditional_Section
1867    (Self : in out Simple_Reader'Class) return Token is
1868   begin
1869      --  [XML [28b], [31]] Conditional section can be present only in external
1870      --  subset of DTD.
1871
1872      if Is_Document_Entity (Self.Entities, Self.Scanner_State.Entity) then
1873         Callbacks.Call_Fatal_Error
1874          (Self,
1875           League.Strings.To_Universal_String
1876            ("[XML [28b], [31]]"
1877               & " conditional sections may only appear in the external"
1878               & " DTD subset"));
1879
1880         return Error;
1881      end if;
1882
1883      if Self.Ignore_Depth = 0 then
1884         Enter_Start_Condition (Self, Tables.CONDITIONAL_DIRECTIVE);
1885         Self.Conditional_Directive := False;
1886
1887      else
1888         Self.Conditional_Depth := Self.Conditional_Depth + 1;
1889         Self.Ignore_Depth := Self.Ignore_Depth + 1;
1890         Self.Conditional_Directive := True;
1891      end if;
1892
1893      return Token_Conditional_Open;
1894   end On_Open_Of_Conditional_Section;
1895
1896   --------------------------------------------
1897   -- On_Open_Of_Conditional_Section_Content --
1898   --------------------------------------------
1899
1900   function  On_Open_Of_Conditional_Section_Content
1901    (Self : in out Simple_Reader'Class) return Boolean is
1902   begin
1903      --  XXX Syntax rules must be checked!
1904
1905      if not Self.Conditional_Directive then
1906         Callbacks.Call_Fatal_Error
1907          (Self,
1908           League.Strings.To_Universal_String
1909            ("conditional directive is missing"));
1910
1911         return False;
1912      end if;
1913
1914      if Self.Ignore_Depth /= 0 then
1915         case Self.Version is
1916            when XML_1_0 =>
1917               Enter_Start_Condition (Self, Tables.CONDITIONAL_IGNORE_10);
1918
1919            when XML_1_1 =>
1920               Enter_Start_Condition (Self, Tables.CONDITIONAL_IGNORE_11);
1921         end case;
1922
1923      else
1924         case Self.Version is
1925            when XML_1_0 =>
1926               Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_10);
1927
1928            when XML_1_1 =>
1929               Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_11);
1930         end case;
1931      end if;
1932
1933      return True;
1934   end On_Open_Of_Conditional_Section_Content;
1935
1936   ------------------------------------------
1937   -- On_Open_Of_Document_Type_Declaration --
1938   ------------------------------------------
1939
1940   function On_Open_Of_Document_Type_Declaration
1941    (Self : in out Simple_Reader'Class) return Token
1942   is
1943      Qname_Error : Boolean;
1944
1945   begin
1946      Resolve_Symbol
1947       (Self, 10, 0, True, True, False, Qname_Error, Self.YYLVal.Symbol);
1948
1949      if Qname_Error then
1950         return Error;
1951
1952      else
1953         Enter_Start_Condition (Self, Tables.DOCTYPE_EXTINT);
1954
1955         return Token_Doctype_Decl_Open;
1956      end if;
1957   end On_Open_Of_Document_Type_Declaration;
1958
1959   ------------------------------------
1960   -- On_Open_Of_Element_Declaration --
1961   ------------------------------------
1962
1963   function On_Open_Of_Element_Declaration
1964    (Self : in out Simple_Reader'Class) return Token is
1965   begin
1966      Enter_Start_Condition (Self, Tables.ELEMENT_NAME);
1967
1968      return Token_Element_Decl_Open;
1969   end On_Open_Of_Element_Declaration;
1970
1971   ------------------------
1972   -- On_Open_Of_End_Tag --
1973   ------------------------
1974
1975   function On_Open_Of_End_Tag
1976    (Self : in out Simple_Reader'Class) return Token
1977   is
1978      Qname_Error : Boolean;
1979
1980   begin
1981      Resolve_Symbol
1982       (Self, 2, 0, False, True, False, Qname_Error, Self.YYLVal.Symbol);
1983
1984      if Qname_Error then
1985         return Error;
1986
1987      else
1988         Enter_Start_Condition (Self, Tables.ELEMENT_START);
1989
1990         return Token_End_Open;
1991      end if;
1992   end On_Open_Of_End_Tag;
1993
1994   --------------------------------
1995   -- On_Open_Of_Internal_Subset --
1996   --------------------------------
1997
1998   function On_Open_Of_Internal_Subset
1999    (Self : in out Simple_Reader'Class) return Token is
2000   begin
2001      case Self.Version is
2002         when XML_1_0 =>
2003            Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_10);
2004
2005         when XML_1_1 =>
2006            Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_11);
2007      end case;
2008
2009      return Token_Internal_Subset_Open;
2010   end On_Open_Of_Internal_Subset;
2011
2012   -------------------------------------
2013   -- On_Open_Of_Notation_Declaration --
2014   -------------------------------------
2015
2016   function On_Open_Of_Notation_Declaration
2017    (Self : in out Simple_Reader'Class) return Token
2018   is
2019      Qname_Error : Boolean;
2020
2021   begin
2022      Resolve_Symbol
2023       (Self, 10, 0, True, False, False, Qname_Error, Self.YYLVal.Symbol);
2024
2025      if Qname_Error then
2026         return Error;
2027
2028      else
2029         Push_Current_And_Enter_Start_Condition (Self, Tables.NOTATION_DECL);
2030
2031         return Token_Notation_Decl_Open;
2032      end if;
2033   end On_Open_Of_Notation_Declaration;
2034
2035   ---------------------------------------
2036   -- On_Open_Of_Processing_Instruction --
2037   ---------------------------------------
2038
2039   function On_Open_Of_Processing_Instruction
2040    (Self : in out Simple_Reader'Class) return Token
2041   is
2042      Qname_Error : Boolean;
2043
2044   begin
2045      Resolve_Symbol
2046       (Self, 2, 0, False, False, False, Qname_Error, Self.YYLVal.Symbol);
2047
2048      if Qname_Error then
2049         return Error;
2050
2051      else
2052         Push_And_Enter_Start_Condition
2053          (Self, Start_Condition (Self), Tables.PI);
2054         Reset_Whitespace_Matched (Self);
2055
2056         return Token_Pi_Open;
2057      end if;
2058   end On_Open_Of_Processing_Instruction;
2059
2060   --------------------------
2061   -- On_Open_Of_Start_Tag --
2062   --------------------------
2063
2064   function On_Open_Of_Start_Tag
2065    (Self : in out Simple_Reader'Class) return Token
2066   is
2067      Qname_Error : Boolean;
2068
2069   begin
2070      Resolve_Symbol
2071       (Self, 1, 0, False, True, False, Qname_Error, Self.YYLVal.Symbol);
2072
2073      if Qname_Error then
2074         return Error;
2075
2076      else
2077         Enter_Start_Condition (Self, Tables.ELEMENT_START);
2078
2079         return Token_Element_Open;
2080      end if;
2081   end On_Open_Of_Start_Tag;
2082
2083   ----------------------------------------
2084   -- On_Open_Of_XML_Or_Text_Declaration --
2085   ----------------------------------------
2086
2087   function On_Open_Of_XML_Or_Text_Declaration
2088    (Self : in out Simple_Reader'Class) return Token is
2089   begin
2090      Self.Whitespace_Matched := False;
2091
2092      Push_And_Enter_Start_Condition
2093       (Self, Start_Condition (Self), Tables.XML_DECL);
2094
2095      return Token_Xml_Decl_Open;
2096   end On_Open_Of_XML_Or_Text_Declaration;
2097
2098   ------------------------------------------------
2099   -- On_Open_Parenthesis_In_Content_Declaration --
2100   ------------------------------------------------
2101
2102   function On_Open_Parenthesis_In_Content_Declaration
2103    (Self : in out Simple_Reader'Class) return Token
2104   is
2105      use type Interfaces.Unsigned_32;
2106
2107   begin
2108      if Start_Condition (Self) = Tables.ELEMENT_DECL then
2109         --  Check whitespace from rule [45] elementdecl. This subprogram
2110         --  changes scanner's start condition, so handing of nested
2111         --  declarations skip check below.
2112
2113         if not Self.Whitespace_Matched then
2114            Callbacks.Call_Fatal_Error
2115             (Self,
2116              League.Strings.To_Universal_String
2117               ("[XML [45]] no whitespace after name"));
2118
2119            return Error;
2120         end if;
2121
2122         Enter_Start_Condition (Self, Tables.ELEMENT_CHILDREN);
2123      end if;
2124
2125      return Token_Open_Parenthesis;
2126   end On_Open_Parenthesis_In_Content_Declaration;
2127
2128   -----------------------------------------------
2129   -- On_Open_Parenthesis_In_Notation_Attribute --
2130   -----------------------------------------------
2131
2132   function On_Open_Parenthesis_In_Notation_Attribute
2133    (Self : in out Simple_Reader'Class) return Token is
2134   begin
2135      --  Checks ithat whitespace before open parenthesis is detected
2136      --  and report error when check fail.
2137
2138      if not Self.Whitespace_Matched then
2139         --  XXX This is recoverable error.
2140
2141         Callbacks.Call_Fatal_Error
2142          (Self,
2143           League.Strings.To_Universal_String
2144            ("whitespace required before open parenthesis"));
2145
2146         return Error;
2147      end if;
2148
2149      return Token_Open_Parenthesis;
2150   end On_Open_Parenthesis_In_Notation_Attribute;
2151
2152   -----------------------------------------------------------
2153   -- On_Parameter_Entity_Reference_In_Document_Declaration --
2154   -----------------------------------------------------------
2155
2156   function On_Parameter_Entity_Reference_In_Document_Declaration
2157    (Self : in out Simple_Reader'Class) return Token
2158   is
2159      Qualified_Name : Symbol_Identifier;
2160      Qname_Error    : Boolean;
2161      Entity         : Entity_Identifier;
2162      Deep           : Natural;
2163
2164   begin
2165      Resolve_Symbol
2166       (Self, 1, 1, False, False, False, Qname_Error, Qualified_Name);
2167
2168      if Qname_Error then
2169         return Error;
2170
2171      end if;
2172
2173      Entity := Parameter_Entity (Self.Symbols, Qualified_Name);
2174
2175      if Entity = No_Entity then
2176         Callbacks.Call_Fatal_Error
2177          (Self,
2178           League.Strings.To_Universal_String
2179            ("parameter entity must be declared"));
2180
2181         return Error;
2182      end if;
2183
2184      Deep := Integer (Self.Scanner_Stack.Length);
2185
2186      if not Push_Entity
2187              (Self             => Self,
2188               Entity           => Entity,
2189               In_Document_Type => False,
2190               In_Literal       => False)
2191      then
2192         return Error;
2193
2194      elsif Deep = Integer (Self.Scanner_Stack.Length) then
2195         --  Entity doesn't pushed in stack because its replacement text
2196         --  is empty.
2197
2198         return End_Of_Input;
2199
2200      else
2201         Self.Scanner_State.Start_Issued := True;
2202
2203         return Token_Entity_Start;
2204      end if;
2205   end On_Parameter_Entity_Reference_In_Document_Declaration;
2206
2207   ---------------------------------------------------
2208   -- On_Parameter_Entity_Reference_In_Entity_Value --
2209   ---------------------------------------------------
2210
2211   function On_Parameter_Entity_Reference_In_Entity_Value
2212    (Self : in out Simple_Reader'Class) return Boolean
2213   is
2214      Qualified_Name : Symbol_Identifier;
2215      Qname_Error    : Boolean;
2216      Entity         : Entity_Identifier;
2217
2218   begin
2219      Resolve_Symbol
2220       (Self, 1, 1, False, False, False, Qname_Error, Qualified_Name);
2221
2222      if Qname_Error then
2223         return False;
2224
2225      else
2226         Entity := Parameter_Entity (Self.Symbols, Qualified_Name);
2227
2228         --  XML WFC: PEs in Internal Subset
2229         --
2230         --  "In the internal DTD subset, parameter-entity references MUST NOT
2231         --  occur within markup declarations; they may occur where markup
2232         --  declarations can occur. (This does not apply to references that
2233         --  occur in external parameter entities or to the external subset.)"
2234         --
2235         --  Check whether parameter entity reference doesn't occure in the
2236         --  entity value of the entity declared in internal subset.
2237
2238         if Is_Document_Entity (Self.Entities, Self.Scanner_State.Entity) then
2239            Callbacks.Call_Fatal_Error
2240             (Self,
2241              League.Strings.To_Universal_String
2242               ("[XML 2.8 WFC: PEs in Internal Subset]"
2243                  & " parameter-entity reference in internal subset must not"
2244                  & " occur within markup declaration"));
2245
2246            return False;
2247         end if;
2248
2249         --  XML VC: Entity Declared
2250         --
2251         --  "In a document with an external subset or parameter entity
2252         --  references with "standalone='no'", the Name given in the entity
2253         --  reference MUST match that in an entity declaration. For
2254         --  interoperability, valid documents SHOULD declare the entities amp,
2255         --  lt, gt, apos, quot, in the form specified in 4.6 Predefined
2256         --  Entities. The declaration of a parameter entity MUST precede any
2257         --  reference to it. Similarly, the declaration of a general entity
2258         --  MUST precede any attribute-list declaration containing a default
2259         --  value with a direct or indirect reference to that general entity."
2260         --
2261         --  XXX Parameter entity must not be declared at the point of
2262         --  reference, except in some conditions in validating mode; so,
2263         --  check below must be improved, as well as behavior in
2264         --  non-validating mode must be checked.
2265
2266         if Entity = No_Entity then
2267            Callbacks.Call_Fatal_Error
2268             (Self,
2269              League.Strings.To_Universal_String
2270               ("parameter entity must be declared"));
2271
2272            return False;
2273         end if;
2274
2275         return
2276           Push_Entity
2277            (Self             => Self,
2278             Entity           => Entity,
2279             In_Document_Type => False,
2280             In_Literal       => True);
2281      end if;
2282   end On_Parameter_Entity_Reference_In_Entity_Value;
2283
2284   ---------------------------------------------------------
2285   -- On_Parameter_Entity_Reference_In_Markup_Declaration --
2286   ---------------------------------------------------------
2287
2288   function On_Parameter_Entity_Reference_In_Markup_Declaration
2289    (Self : in out Simple_Reader'Class) return Boolean
2290   is
2291      Qualified_Name : Symbol_Identifier;
2292      Qname_Error    : Boolean;
2293      Entity         : Entity_Identifier;
2294
2295   begin
2296      Resolve_Symbol
2297       (Self, 1, 1, False, False, False, Qname_Error, Qualified_Name);
2298
2299      if Qname_Error then
2300         return False;
2301
2302      else
2303         --  [XML 2.8] WFC: PEs in Internal Subset
2304         --
2305         --  "In the internal DTD subset, parameter-entity references MUST NOT
2306         --  occur within markup declarations; they may occur where markup
2307         --  declarations can occur. (This does not apply to references that
2308         --  occur in external parameter entities or to the external subset.)"
2309         --
2310         --  Check whether external subset is processed.
2311
2312         if not Self.External_Subset_Done then
2313            Callbacks.Call_Fatal_Error
2314             (Self,
2315              League.Strings.To_Universal_String
2316               ("[XML 2.8 WFC: PEs in Internal Subset]"
2317                  & " parameter-entity reference in internal subset must not"
2318                  & " occur within markup declaration"));
2319
2320            return False;
2321         end if;
2322
2323         Entity := Parameter_Entity (Self.Symbols, Qualified_Name);
2324
2325         if Entity = No_Entity then
2326            Callbacks.Call_Fatal_Error
2327             (Self,
2328              League.Strings.To_Universal_String
2329               ("parameter entity must be declared"));
2330
2331            return False;
2332         end if;
2333
2334         return
2335           Push_Entity
2336            (Self             => Self,
2337             Entity           => Entity,
2338             In_Document_Type => False,
2339             In_Literal       => True);
2340      end if;
2341   end On_Parameter_Entity_Reference_In_Markup_Declaration;
2342
2343   ---------------------
2344   -- On_Percent_Sign --
2345   ---------------------
2346
2347   function On_Percent_Sign
2348    (Self : in out Simple_Reader'Class) return Token is
2349   begin
2350      if not Self.Whitespace_Matched then
2351         Callbacks.Call_Fatal_Error
2352          (Self,
2353           League.Strings.To_Universal_String
2354            ("no whitespace before percent"));
2355         Self.Error_Reported := True;
2356         --  XXX This is recoverable error.
2357
2358         return Error;
2359
2360      else
2361         Self.Whitespace_Matched := False;
2362
2363         return Token_Percent;
2364      end if;
2365   end On_Percent_Sign;
2366
2367   ------------------------------------
2368   -- On_Plus_In_Content_Declaration --
2369   ------------------------------------
2370
2371   function On_Plus_In_Content_Declaration
2372    (Self : in out Simple_Reader'Class) return Token is
2373   begin
2374      if Self.Whitespace_Matched then
2375         Callbacks.Call_Fatal_Error
2376          (Self,
2377           League.Strings.To_Universal_String
2378            ("[XML [47], [48]] illegal whitespace before plus"));
2379
2380         return Error;
2381
2382      else
2383         return Token_Plus;
2384      end if;
2385   end On_Plus_In_Content_Declaration;
2386
2387   -----------------------
2388   -- On_Public_Literal --
2389   -----------------------
2390
2391   function On_Public_Literal
2392    (Self : in out Simple_Reader'Class) return Token
2393   is
2394      Next         : Utf16_String_Index
2395        := Self.Scanner_State.YY_Base_Position + 1;
2396      --  Skip literal open delimiter.
2397      Code         : Code_Point;
2398      Space_Before : Boolean := True;
2399
2400   begin
2401      if not Self.Whitespace_Matched then
2402         Callbacks.Call_Fatal_Error
2403          (Self,
2404           League.Strings.To_Universal_String
2405            ("[[75] ExternalID, [83] PublicID]"
2406               & " whitespace is required before pubid literal"));
2407
2408         return Error;
2409      end if;
2410
2411      Self.Whitespace_Matched := False;
2412      Enter_Start_Condition (Self, Tables.EXTERNAL_ID_SYS);
2413
2414      --  [XML 4.2.2] External Entities
2415      --
2416      --  "[Definition: In addition to a system identifier, an external
2417      --  identifier may include a public identifier.] An XML processor
2418      --  attempting to retrieve the entity's content may use any combination
2419      --  of the public and system identifiers as well as additional
2420      --  information outside the scope of this specification to try to
2421      --  generate an alternative URI reference. If the processor is unable to
2422      --  do so, it MUST use the URI reference specified in the system literal.
2423      --  Before a match is attempted, all strings of white space in the public
2424      --  identifier MUST be normalized to single space characters (#x20), and
2425      --  leading and trailing white space MUST be removed."
2426      --
2427      --  Normalize public identifier.
2428
2429      Matreshka.Internals.Strings.Operations.Reset (Self.Character_Data);
2430
2431      while Next /= Self.Scanner_State.YY_Current_Position - 1 loop
2432         --  Exclude literal close delimiter.
2433
2434         Unchecked_Next (Self.Scanner_State.Data.Value, Next, Code);
2435
2436         --  It can be reasonable to implement this step of normalization on
2437         --  SIMD.
2438
2439         if Code = Character_Tabulation
2440           or Code = Line_Feed
2441           or Code = Carriage_Return
2442         then
2443            Code := Space;
2444         end if;
2445
2446         if Code = Space then
2447            if not Space_Before then
2448               Matreshka.Internals.Strings.Operations.Unterminated_Append
2449                (Self.Character_Data, Code);
2450               Space_Before := True;
2451            end if;
2452
2453         else
2454            Matreshka.Internals.Strings.Operations.Unterminated_Append
2455             (Self.Character_Data, Code);
2456            Space_Before := False;
2457         end if;
2458      end loop;
2459
2460      if Space_Before and Self.Character_Data.Unused /= 0 then
2461         --  Remove traling space.
2462
2463         Self.Character_Data.Length := Self.Character_Data.Length - 1;
2464         Self.Character_Data.Unused := Self.Character_Data.Unused - 1;
2465      end if;
2466
2467      String_Handler.Fill_Null_Terminator (Self.Character_Data);
2468      Matreshka.Internals.Strings.Reference (Self.Character_Data);
2469      Set_String_Internal
2470       (Item          => Self.YYLVal,
2471        String        => Self.Character_Data,
2472        Is_Whitespace => False);
2473
2474      return Token_Public_Literal;
2475   end On_Public_Literal;
2476
2477   ---------------------------------------------
2478   -- On_Question_Mark_In_Content_Declaration --
2479   ---------------------------------------------
2480
2481   function On_Question_Mark_In_Content_Declaration
2482    (Self : in out Simple_Reader'Class) return Token is
2483   begin
2484      if Self.Whitespace_Matched then
2485         Callbacks.Call_Fatal_Error
2486          (Self,
2487           League.Strings.To_Universal_String
2488            ("[XML [47], [48]] illegal whitespace before question mark"));
2489
2490         return Error;
2491
2492      else
2493         return Token_Question;
2494      end if;
2495   end On_Question_Mark_In_Content_Declaration;
2496
2497   ---------------------------
2498   -- On_Standalone_Keyword --
2499   ---------------------------
2500
2501   function On_Standalone_Keyword
2502    (Self : in out Simple_Reader'Class) return Token is
2503   begin
2504      if not Self.Whitespace_Matched then
2505         Callbacks.Call_Fatal_Error
2506          (Self,
2507           League.Strings.To_Universal_String
2508            ("no whitespace before 'standalone'"));
2509         Self.Error_Reported := True;
2510         --  XXX This is recoverable error.
2511
2512         return Error;
2513
2514      else
2515         return Token_Standalone;
2516      end if;
2517   end On_Standalone_Keyword;
2518
2519   ----------------------------------------
2520   -- On_System_Keyword_In_Document_Type --
2521   ----------------------------------------
2522
2523   function On_System_Keyword_In_Document_Type
2524    (Self : in out Simple_Reader'Class) return Token is
2525   begin
2526      Reset_Whitespace_Matched (Self);
2527      Push_And_Enter_Start_Condition
2528       (Self, Tables.DOCTYPE_INT, Tables.EXTERNAL_ID_SYS);
2529
2530      return Token_System;
2531   end On_System_Keyword_In_Document_Type;
2532
2533   ---------------------------------------------
2534   -- On_System_Keyword_In_Entity_Or_Notation --
2535   ---------------------------------------------
2536
2537   function On_System_Keyword_In_Entity_Or_Notation
2538    (Self : in out Simple_Reader'Class) return Token is
2539   begin
2540      Reset_Whitespace_Matched (Self);
2541      Push_Current_And_Enter_Start_Condition (Self, Tables.EXTERNAL_ID_SYS);
2542
2543      return Token_System;
2544   end On_System_Keyword_In_Entity_Or_Notation;
2545
2546   -----------------------
2547   -- On_System_Literal --
2548   -----------------------
2549
2550   function On_System_Literal
2551    (Self : in out Simple_Reader'Class) return Token is
2552   begin
2553      if not Self.Whitespace_Matched then
2554         Callbacks.Call_Fatal_Error
2555          (Self,
2556           League.Strings.To_Universal_String
2557            ("[[75] ExternalID]"
2558               & " whitespace is required before system literal"));
2559
2560         return Error;
2561      end if;
2562
2563      Self.Whitespace_Matched := False;
2564      Pop_Start_Condition (Self);
2565      Set_String_Internal
2566       (Item          => Self.YYLVal,
2567        String        => YY_Text (Self, 1, 1),
2568        Is_Whitespace => False);
2569
2570      return Token_System_Literal;
2571   end On_System_Literal;
2572
2573   -----------------------------
2574   -- On_Unexpected_Character --
2575   -----------------------------
2576
2577   function On_Unexpected_Character
2578    (Self : in out Simple_Reader'Class) return Token is
2579   begin
2580      Callbacks.Call_Fatal_Error
2581       (Self,
2582        League.Strings.To_Universal_String ("unexpected character"));
2583
2584      return Error;
2585   end On_Unexpected_Character;
2586
2587   ------------------------
2588   -- On_Version_Keyword --
2589   ------------------------
2590
2591   function On_Version_Keyword
2592    (Self : in out Simple_Reader'Class) return Token is
2593   begin
2594      if not Self.Whitespace_Matched then
2595         Callbacks.Call_Fatal_Error
2596          (Self,
2597           League.Strings.To_Universal_String
2598            ("no whitespace before 'version'"));
2599         Self.Error_Reported := True;
2600         --  XXX This is recoverable error.
2601
2602         return Error;
2603
2604      else
2605         return Token_Version;
2606      end if;
2607   end On_Version_Keyword;
2608
2609   -------------------------------
2610   -- On_Whitespace_In_Document --
2611   -------------------------------
2612
2613   function On_Whitespace_In_Document
2614    (Self : in out Simple_Reader'Class) return Boolean
2615   is
2616      C : constant Code_Point
2617        := Code_Point
2618            (Self.Scanner_State.Data.Value
2619              (Self.Scanner_State.YY_Current_Position - 1));
2620
2621   begin
2622      if C = Less_Than_Sign or C = Ampersand then
2623         --  Move back when trailing context is available.
2624
2625         YY_Move_Backward (Self);
2626      end if;
2627
2628      if Self.Element_Names.Is_Empty then
2629         --  Document content not entered.
2630
2631         return False;
2632
2633      else
2634         Matreshka.Internals.Strings.Operations.Copy_Slice
2635          (Self.Character_Data,
2636           Self.Scanner_State.Data,
2637           Self.Scanner_State.YY_Base_Position,
2638           Self.Scanner_State.YY_Current_Position
2639             - Self.Scanner_State.YY_Base_Position,
2640           Self.Scanner_State.YY_Current_Index
2641             - Self.Scanner_State.YY_Base_Index);
2642
2643         Matreshka.Internals.Strings.Reference (Self.Character_Data);
2644         Set_String_Internal
2645          (Item          => Self.YYLVal,
2646           String        => Self.Character_Data,
2647           Is_Whitespace => True);
2648
2649         return True;
2650      end if;
2651   end On_Whitespace_In_Document;
2652
2653   ---------------------------------------------
2654   -- On_Whitespace_In_Processing_Instruction --
2655   ---------------------------------------------
2656
2657   procedure On_Whitespace_In_Processing_Instruction
2658    (Self : in out Simple_Reader'Class) is
2659   begin
2660      --  Whitespace between processing instruction's target and data are
2661      --  required, so set flag which indicates their presence.
2662
2663      Self.Whitespace_Matched := True;
2664
2665      case Self.Version is
2666         when XML_1_0 =>
2667            Enter_Start_Condition (Self, Tables.PI_DATA_10);
2668
2669         when XML_1_1 =>
2670            Enter_Start_Condition (Self, Tables.PI_DATA_11);
2671      end case;
2672   end On_Whitespace_In_Processing_Instruction;
2673
2674   --------------------
2675   -- Resolve_Symbol --
2676   --------------------
2677
2678   procedure Resolve_Symbol
2679    (Self            : in out Simple_Reader'Class;
2680     Trim_Left       : Natural;
2681     Trim_Right      : Natural;
2682     Trim_Whitespace : Boolean;
2683     Can_Be_Qname    : Boolean;
2684     Not_Qname       : Boolean;
2685     Error           : out Boolean;
2686     Symbol          : out Matreshka.Internals.XML.Symbol_Identifier)
2687   is
2688      --  Trailing and leading character as well as whitespace characters
2689      --  belongs to BMP and don't require expensive UTF-16 decoding.
2690
2691      FP : Utf16_String_Index
2692        := Self.Scanner_State.YY_Base_Position
2693             + Utf16_String_Index (Trim_Left);
2694      FI : Positive
2695        := Self.Scanner_State.YY_Base_Index + Trim_Left;
2696      LP : constant Utf16_String_Index
2697        := Self.Scanner_State.YY_Current_Position
2698             - Utf16_String_Index (Trim_Right);
2699      LI : constant Positive
2700        := Self.Scanner_State.YY_Current_Index - Trim_Right;
2701      C  : Code_Point;
2702      E  : Matreshka.Internals.XML.Symbol_Tables.Qualified_Name_Errors;
2703
2704   begin
2705      if Trim_Whitespace then
2706         loop
2707            C := Code_Point (Self.Scanner_State.Data.Value (FP));
2708
2709            exit when
2710              C /= Space
2711                and then C /= Character_Tabulation
2712                and then C /= Carriage_Return
2713                and then C /= Line_Feed;
2714
2715            FP := FP + 1;
2716            FI := FI + 1;
2717         end loop;
2718      end if;
2719
2720      Matreshka.Internals.XML.Symbol_Tables.Insert
2721       (Self.Symbols,
2722        Self.Scanner_State.Data,
2723        FP,
2724        LP - FP,
2725        LI - FI,
2726        Self.Namespaces.Enabled,
2727        E,
2728        Symbol);
2729
2730      Error := False;
2731
2732      if Self.Namespaces.Enabled and not Not_Qname then
2733         case E is
2734            when Valid =>
2735               if not Can_Be_Qname
2736                 and Local_Name (Self.Symbols, Symbol) /= Symbol
2737               then
2738                  Error := True;
2739                  Symbol := No_Symbol;
2740                  Callbacks.Call_Fatal_Error
2741                   (Self,
2742                    League.Strings.To_Universal_String
2743                     ("[NSXML1.1] qualified name must not be used here"));
2744               end if;
2745
2746            when Colon_At_Start =>
2747               Error := True;
2748               Symbol := No_Symbol;
2749               Callbacks.Call_Fatal_Error
2750                (Self,
2751                 League.Strings.To_Universal_String
2752                  ("[NSXML1.1]"
2753                     & " qualified name must not start with colon character"));
2754
2755            when Colon_At_End =>
2756               Error := True;
2757               Symbol := No_Symbol;
2758               Callbacks.Call_Fatal_Error
2759                (Self,
2760                 League.Strings.To_Universal_String
2761                  ("[NSXML1.1]"
2762                     & " qualified name must not end with colon character"));
2763
2764            when Multiple_Colons =>
2765               Error := True;
2766               Symbol := No_Symbol;
2767               Callbacks.Call_Fatal_Error
2768                (Self,
2769                 League.Strings.To_Universal_String
2770                  ("[NSXML1.1]"
2771                     & " qualified name must not contain more than one colon"
2772                     & " character"));
2773
2774            when First_Character_Is_Not_NS_Name_Start_Char =>
2775               Error := True;
2776               Symbol := No_Symbol;
2777               Callbacks.Call_Fatal_Error
2778                (Self,
2779                 League.Strings.To_Universal_String
2780                  ("[NSXML1.1] first character of local name is invalid"));
2781         end case;
2782      end if;
2783   end Resolve_Symbol;
2784
2785end XML.SAX.Simple_Readers.Scanner.Actions;
2786