1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT RUN-TIME COMPONENTS                         --
4--                                                                          --
5--                    G N A T . D E C O D E _ S T R I N G                   --
6--                                                                          --
7--                                 S p e c                                  --
8--                                                                          --
9--                     Copyright (C) 2007-2010, AdaCore                     --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
17--                                                                          --
18-- As a special exception under Section 7 of GPL version 3, you are granted --
19-- additional permissions described in the GCC Runtime Library Exception,   --
20-- version 3.1, as published by the Free Software Foundation.               --
21--                                                                          --
22-- You should have received a copy of the GNU General Public License and    --
23-- a copy of the GCC Runtime Library Exception along with this program;     --
24-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
25-- <http://www.gnu.org/licenses/>.                                          --
26--                                                                          --
27-- GNAT was originally developed  by the GNAT team at  New York University. --
28-- Extensive contributions were provided by Ada Core Technologies Inc.      --
29--                                                                          --
30------------------------------------------------------------------------------
31
32--  This package provides a utility routine for converting from an encoded
33--  string to a corresponding Wide_String or Wide_Wide_String value.
34
35with Interfaces; use Interfaces;
36
37with System.WCh_Cnv; use System.WCh_Cnv;
38with System.WCh_Con; use System.WCh_Con;
39
40package body GNAT.Decode_String is
41
42   -----------------------
43   -- Local Subprograms --
44   -----------------------
45
46   procedure Bad;
47   pragma No_Return (Bad);
48   --  Raise error for bad encoding
49
50   procedure Past_End;
51   pragma No_Return (Past_End);
52   --  Raise error for off end of string
53
54   ---------
55   -- Bad --
56   ---------
57
58   procedure Bad is
59   begin
60      raise Constraint_Error with
61        "bad encoding or character out of range";
62   end Bad;
63
64   ---------------------------
65   -- Decode_Wide_Character --
66   ---------------------------
67
68   procedure Decode_Wide_Character
69     (Input  : String;
70      Ptr    : in out Natural;
71      Result : out Wide_Character)
72   is
73      Char : Wide_Wide_Character;
74   begin
75      Decode_Wide_Wide_Character (Input, Ptr, Char);
76
77      if Wide_Wide_Character'Pos (Char) > 16#FFFF# then
78         Bad;
79      else
80         Result := Wide_Character'Val (Wide_Wide_Character'Pos (Char));
81      end if;
82   end Decode_Wide_Character;
83
84   ------------------------
85   -- Decode_Wide_String --
86   ------------------------
87
88   function Decode_Wide_String (S : String) return Wide_String is
89      Result : Wide_String (1 .. S'Length);
90      Length : Natural;
91   begin
92      Decode_Wide_String (S, Result, Length);
93      return Result (1 .. Length);
94   end Decode_Wide_String;
95
96   procedure Decode_Wide_String
97     (S      : String;
98      Result : out Wide_String;
99      Length : out Natural)
100   is
101      Ptr : Natural;
102
103   begin
104      Ptr := S'First;
105      Length := 0;
106      while Ptr <= S'Last loop
107         if Length >= Result'Last then
108            Past_End;
109         end if;
110
111         Length := Length + 1;
112         Decode_Wide_Character (S, Ptr, Result (Length));
113      end loop;
114   end Decode_Wide_String;
115
116   --------------------------------
117   -- Decode_Wide_Wide_Character --
118   --------------------------------
119
120   procedure Decode_Wide_Wide_Character
121     (Input  : String;
122      Ptr    : in out Natural;
123      Result : out Wide_Wide_Character)
124   is
125      C : Character;
126
127      function In_Char return Character;
128      pragma Inline (In_Char);
129      --  Function to get one input character
130
131      -------------
132      -- In_Char --
133      -------------
134
135      function In_Char return Character is
136      begin
137         if Ptr <= Input'Last then
138            Ptr := Ptr + 1;
139            return Input (Ptr - 1);
140         else
141            Past_End;
142         end if;
143      end In_Char;
144
145   --  Start of processing for Decode_Wide_Wide_Character
146
147   begin
148      C := In_Char;
149
150      --  Special fast processing for UTF-8 case
151
152      if Encoding_Method = WCEM_UTF8 then
153         UTF8 : declare
154            U : Unsigned_32;
155            W : Unsigned_32;
156
157            procedure Get_UTF_Byte;
158            pragma Inline (Get_UTF_Byte);
159            --  Used to interpret 2#10xxxxxx# continuation byte in UTF-8 mode.
160            --  Reads a byte, and raises CE if the first two bits are not 10.
161            --  Otherwise shifts W 6 bits left and or's in the 6 xxxxxx bits.
162
163            ------------------
164            -- Get_UTF_Byte --
165            ------------------
166
167            procedure Get_UTF_Byte is
168            begin
169               U := Unsigned_32 (Character'Pos (In_Char));
170
171               if (U and 2#11000000#) /= 2#10_000000# then
172                  Bad;
173               end if;
174
175               W := Shift_Left (W, 6) or (U and 2#00111111#);
176            end Get_UTF_Byte;
177
178         --  Start of processing for UTF8 case
179
180         begin
181            --  Note: for details of UTF8 encoding see RFC 3629
182
183            U := Unsigned_32 (Character'Pos (C));
184
185            --  16#00_0000#-16#00_007F#: 0xxxxxxx
186
187            if (U and 2#10000000#) = 2#00000000# then
188               Result := Wide_Wide_Character'Val (Character'Pos (C));
189
190            --  16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
191
192            elsif (U and 2#11100000#) = 2#110_00000# then
193               W := U and 2#00011111#;
194               Get_UTF_Byte;
195               Result := Wide_Wide_Character'Val (W);
196
197            --  16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx
198
199            elsif (U and 2#11110000#) = 2#1110_0000# then
200               W := U and 2#00001111#;
201               Get_UTF_Byte;
202               Get_UTF_Byte;
203               Result := Wide_Wide_Character'Val (W);
204
205            --  16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
206
207            elsif (U and 2#11111000#) = 2#11110_000# then
208               W := U and 2#00000111#;
209
210               for K in 1 .. 3 loop
211                  Get_UTF_Byte;
212               end loop;
213
214               Result := Wide_Wide_Character'Val (W);
215
216            --  16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
217            --                               10xxxxxx 10xxxxxx
218
219            elsif (U and 2#11111100#) = 2#111110_00# then
220               W := U and 2#00000011#;
221
222               for K in 1 .. 4 loop
223                  Get_UTF_Byte;
224               end loop;
225
226               Result := Wide_Wide_Character'Val (W);
227
228            --  All other cases are invalid, note that this includes:
229
230            --  16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
231            --                               10xxxxxx 10xxxxxx 10xxxxxx
232
233            --  since Wide_Wide_Character does not include code values
234            --  greater than 16#03FF_FFFF#.
235
236            else
237               Bad;
238            end if;
239         end UTF8;
240
241      --  All encoding functions other than UTF-8
242
243      else
244         Non_UTF8 : declare
245            function Char_Sequence_To_UTF is
246              new Char_Sequence_To_UTF_32 (In_Char);
247
248         begin
249            --  For brackets, must test for specific case of [ not followed by
250            --  quotation, where we must not call Char_Sequence_To_UTF, but
251            --  instead just return the bracket unchanged.
252
253            if Encoding_Method = WCEM_Brackets
254              and then C = '['
255              and then (Ptr > Input'Last or else Input (Ptr) /= '"')
256            then
257               Result := '[';
258
259            --  All other cases including [" with Brackets
260
261            else
262               Result :=
263                 Wide_Wide_Character'Val
264                   (Char_Sequence_To_UTF (C, Encoding_Method));
265            end if;
266         end Non_UTF8;
267      end if;
268   end Decode_Wide_Wide_Character;
269
270   -----------------------------
271   -- Decode_Wide_Wide_String --
272   -----------------------------
273
274   function Decode_Wide_Wide_String (S : String) return Wide_Wide_String is
275      Result : Wide_Wide_String (1 .. S'Length);
276      Length : Natural;
277   begin
278      Decode_Wide_Wide_String (S, Result, Length);
279      return Result (1 .. Length);
280   end Decode_Wide_Wide_String;
281
282   procedure Decode_Wide_Wide_String
283     (S      : String;
284      Result : out Wide_Wide_String;
285      Length : out Natural)
286   is
287      Ptr : Natural;
288
289   begin
290      Ptr := S'First;
291      Length := 0;
292      while Ptr <= S'Last loop
293         if Length >= Result'Last then
294            Past_End;
295         end if;
296
297         Length := Length + 1;
298         Decode_Wide_Wide_Character (S, Ptr, Result (Length));
299      end loop;
300   end Decode_Wide_Wide_String;
301
302   -------------------------
303   -- Next_Wide_Character --
304   -------------------------
305
306   procedure Next_Wide_Character (Input : String; Ptr : in out Natural) is
307   begin
308      if Ptr < Input'First then
309         Past_End;
310      end if;
311
312      --  Special efficient encoding for UTF-8 case
313
314      if Encoding_Method = WCEM_UTF8 then
315         UTF8 : declare
316            U : Unsigned_32;
317
318            procedure Getc;
319            pragma Inline (Getc);
320            --  Gets the character at Input (Ptr) and returns code in U as
321            --  Unsigned_32 value. On return Ptr is bumped past the character.
322
323            procedure Skip_UTF_Byte;
324            pragma Inline (Skip_UTF_Byte);
325            --  Skips past one encoded byte which must be 2#10xxxxxx#
326
327            ----------
328            -- Getc --
329            ----------
330
331            procedure Getc is
332            begin
333               if Ptr > Input'Last then
334                  Past_End;
335               else
336                  U := Unsigned_32 (Character'Pos (Input (Ptr)));
337                  Ptr := Ptr + 1;
338               end if;
339            end Getc;
340
341            -------------------
342            -- Skip_UTF_Byte --
343            -------------------
344
345            procedure Skip_UTF_Byte is
346            begin
347               Getc;
348
349               if (U and 2#11000000#) /= 2#10_000000# then
350                  Bad;
351               end if;
352            end Skip_UTF_Byte;
353
354         --  Start of processing for UTF-8 case
355
356         begin
357            --  16#00_0000#-16#00_007F#: 0xxxxxxx
358
359            Getc;
360
361            if (U and 2#10000000#) = 2#00000000# then
362               return;
363
364            --  16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
365
366            elsif (U and 2#11100000#) = 2#110_00000# then
367               Skip_UTF_Byte;
368
369            --  16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx
370
371            elsif (U and 2#11110000#) = 2#1110_0000# then
372               Skip_UTF_Byte;
373               Skip_UTF_Byte;
374
375            --  Any other code is invalid, note that this includes:
376
377            --  16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
378
379            --  16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
380            --                               10xxxxxx 10xxxxxx
381
382            --  16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
383            --                               10xxxxxx 10xxxxxx 10xxxxxx
384
385            --  since Wide_Character does not allow codes > 16#FFFF#
386
387            else
388               Bad;
389            end if;
390         end UTF8;
391
392      --  Non-UTF-8 case
393
394      else
395         declare
396            Discard : Wide_Character;
397         begin
398            Decode_Wide_Character (Input, Ptr, Discard);
399         end;
400      end if;
401   end Next_Wide_Character;
402
403   ------------------------------
404   -- Next_Wide_Wide_Character --
405   ------------------------------
406
407   procedure Next_Wide_Wide_Character (Input : String; Ptr : in out Natural) is
408   begin
409      --  Special efficient encoding for UTF-8 case
410
411      if Encoding_Method = WCEM_UTF8 then
412         UTF8 : declare
413            U : Unsigned_32;
414
415            procedure Getc;
416            pragma Inline (Getc);
417            --  Gets the character at Input (Ptr) and returns code in U as
418            --  Unsigned_32 value. On return Ptr is bumped past the character.
419
420            procedure Skip_UTF_Byte;
421            pragma Inline (Skip_UTF_Byte);
422            --  Skips past one encoded byte which must be 2#10xxxxxx#
423
424            ----------
425            -- Getc --
426            ----------
427
428            procedure Getc is
429            begin
430               if Ptr > Input'Last then
431                  Past_End;
432               else
433                  U := Unsigned_32 (Character'Pos (Input (Ptr)));
434                  Ptr := Ptr + 1;
435               end if;
436            end Getc;
437
438            -------------------
439            -- Skip_UTF_Byte --
440            -------------------
441
442            procedure Skip_UTF_Byte is
443            begin
444               Getc;
445
446               if (U and 2#11000000#) /= 2#10_000000# then
447                  Bad;
448               end if;
449            end Skip_UTF_Byte;
450
451         --  Start of processing for UTF-8 case
452
453         begin
454            if Ptr < Input'First then
455               Past_End;
456            end if;
457
458            --  16#00_0000#-16#00_007F#: 0xxxxxxx
459
460            Getc;
461
462            if (U and 2#10000000#) = 2#00000000# then
463               null;
464
465            --  16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
466
467            elsif (U and 2#11100000#) = 2#110_00000# then
468               Skip_UTF_Byte;
469
470            --  16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx
471
472            elsif (U and 2#11110000#) = 2#1110_0000# then
473               Skip_UTF_Byte;
474               Skip_UTF_Byte;
475
476            --  16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
477
478            elsif (U and 2#11111000#) = 2#11110_000# then
479               for K in 1 .. 3 loop
480                  Skip_UTF_Byte;
481               end loop;
482
483            --  16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
484            --                               10xxxxxx 10xxxxxx
485
486            elsif (U and 2#11111100#) = 2#111110_00# then
487               for K in 1 .. 4 loop
488                  Skip_UTF_Byte;
489               end loop;
490
491            --  Any other code is invalid, note that this includes:
492
493            --  16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
494            --                               10xxxxxx 10xxxxxx 10xxxxxx
495
496            --  since Wide_Wide_Character does not allow codes > 16#03FF_FFFF#
497
498            else
499               Bad;
500            end if;
501         end UTF8;
502
503      --  Non-UTF-8 case
504
505      else
506         declare
507            Discard : Wide_Wide_Character;
508         begin
509            Decode_Wide_Wide_Character (Input, Ptr, Discard);
510         end;
511      end if;
512   end Next_Wide_Wide_Character;
513
514   --------------
515   -- Past_End --
516   --------------
517
518   procedure Past_End is
519   begin
520      raise Constraint_Error with "past end of string";
521   end Past_End;
522
523   -------------------------
524   -- Prev_Wide_Character --
525   -------------------------
526
527   procedure Prev_Wide_Character (Input : String; Ptr : in out Natural) is
528   begin
529      if Ptr > Input'Last + 1 then
530         Past_End;
531      end if;
532
533      --  Special efficient encoding for UTF-8 case
534
535      if Encoding_Method = WCEM_UTF8 then
536         UTF8 : declare
537            U : Unsigned_32;
538
539            procedure Getc;
540            pragma Inline (Getc);
541            --  Gets the character at Input (Ptr - 1) and returns code in U as
542            --  Unsigned_32 value. On return Ptr is decremented by one.
543
544            procedure Skip_UTF_Byte;
545            pragma Inline (Skip_UTF_Byte);
546            --  Checks that U is 2#10xxxxxx# and then calls Get
547
548            ----------
549            -- Getc --
550            ----------
551
552            procedure Getc is
553            begin
554               if Ptr <= Input'First then
555                  Past_End;
556               else
557                  Ptr := Ptr - 1;
558                  U := Unsigned_32 (Character'Pos (Input (Ptr)));
559               end if;
560            end Getc;
561
562            -------------------
563            -- Skip_UTF_Byte --
564            -------------------
565
566            procedure Skip_UTF_Byte is
567            begin
568               if (U and 2#11000000#) = 2#10_000000# then
569                  Getc;
570               else
571                  Bad;
572               end if;
573            end Skip_UTF_Byte;
574
575         --  Start of processing for UTF-8 case
576
577         begin
578            --  16#00_0000#-16#00_007F#: 0xxxxxxx
579
580            Getc;
581
582            if (U and 2#10000000#) = 2#00000000# then
583               return;
584
585            --  16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
586
587            else
588               Skip_UTF_Byte;
589
590               if (U and 2#11100000#) = 2#110_00000# then
591                  return;
592
593               --  16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx
594
595               else
596                  Skip_UTF_Byte;
597
598                  if (U and 2#11110000#) = 2#1110_0000# then
599                     return;
600
601                     --  Any other code is invalid, note that this includes:
602
603                     --  16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx
604                     --                           10xxxxxx
605
606                     --  16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx
607                     --                               10xxxxxx 10xxxxxx
608                     --                               10xxxxxx
609
610                     --  16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx
611                     --                               10xxxxxx 10xxxxxx
612                     --                               10xxxxxx 10xxxxxx
613
614                     --  since Wide_Character does not allow codes > 16#FFFF#
615
616                  else
617                     Bad;
618                  end if;
619               end if;
620            end if;
621         end UTF8;
622
623      --  Special efficient encoding for brackets case
624
625      elsif Encoding_Method = WCEM_Brackets then
626         Brackets : declare
627            P : Natural;
628            S : Natural;
629
630         begin
631            --  See if we have "] at end positions
632
633            if Ptr > Input'First + 1
634              and then Input (Ptr - 1) = ']'
635              and then Input (Ptr - 2) = '"'
636            then
637               P := Ptr - 2;
638
639               --  Loop back looking for [" at start
640
641               while P >= Ptr - 10 loop
642                  if P <= Input'First + 1 then
643                     Bad;
644
645                  elsif Input (P - 1) = '"'
646                    and then Input (P - 2) = '['
647                  then
648                     --  Found ["..."], scan forward to check it
649
650                     S := P - 2;
651                     P := S;
652                     Next_Wide_Character (Input, P);
653
654                     --  OK if at original pointer, else error
655
656                     if P = Ptr then
657                        Ptr := S;
658                        return;
659                     else
660                        Bad;
661                     end if;
662                  end if;
663
664                  P := P - 1;
665               end loop;
666
667               --  Falling through loop means more than 8 chars between the
668               --  enclosing brackets (or simply a missing left bracket)
669
670               Bad;
671
672            --  Here if no bracket sequence present
673
674            else
675               if Ptr = Input'First then
676                  Past_End;
677               else
678                  Ptr := Ptr - 1;
679               end if;
680            end if;
681         end Brackets;
682
683      --  Non-UTF-8/Brackets. These are the inefficient cases where we have to
684      --  go to the start of the string and skip forwards till Ptr matches.
685
686      else
687         Non_UTF_Brackets : declare
688            Discard : Wide_Character;
689            PtrS    : Natural;
690            PtrP    : Natural;
691
692         begin
693            PtrS := Input'First;
694
695            if Ptr <= PtrS then
696               Past_End;
697            end if;
698
699            loop
700               PtrP := PtrS;
701               Decode_Wide_Character (Input, PtrS, Discard);
702
703               if PtrS = Ptr then
704                  Ptr := PtrP;
705                  return;
706
707               elsif PtrS > Ptr then
708                  Bad;
709               end if;
710            end loop;
711
712         exception
713            when Constraint_Error =>
714               Bad;
715         end Non_UTF_Brackets;
716      end if;
717   end Prev_Wide_Character;
718
719   ------------------------------
720   -- Prev_Wide_Wide_Character --
721   ------------------------------
722
723   procedure Prev_Wide_Wide_Character (Input : String; Ptr : in out Natural) is
724   begin
725      if Ptr > Input'Last + 1 then
726         Past_End;
727      end if;
728
729      --  Special efficient encoding for UTF-8 case
730
731      if Encoding_Method = WCEM_UTF8 then
732         UTF8 : declare
733            U : Unsigned_32;
734
735            procedure Getc;
736            pragma Inline (Getc);
737            --  Gets the character at Input (Ptr - 1) and returns code in U as
738            --  Unsigned_32 value. On return Ptr is decremented by one.
739
740            procedure Skip_UTF_Byte;
741            pragma Inline (Skip_UTF_Byte);
742            --  Checks that U is 2#10xxxxxx# and then calls Get
743
744            ----------
745            -- Getc --
746            ----------
747
748            procedure Getc is
749            begin
750               if Ptr <= Input'First then
751                  Past_End;
752               else
753                  Ptr := Ptr - 1;
754                  U := Unsigned_32 (Character'Pos (Input (Ptr)));
755               end if;
756            end Getc;
757
758            -------------------
759            -- Skip_UTF_Byte --
760            -------------------
761
762            procedure Skip_UTF_Byte is
763            begin
764               if (U and 2#11000000#) = 2#10_000000# then
765                  Getc;
766               else
767                  Bad;
768               end if;
769            end Skip_UTF_Byte;
770
771         --  Start of processing for UTF-8 case
772
773         begin
774            --  16#00_0000#-16#00_007F#: 0xxxxxxx
775
776            Getc;
777
778            if (U and 2#10000000#) = 2#00000000# then
779               return;
780
781            --  16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
782
783            else
784               Skip_UTF_Byte;
785
786               if (U and 2#11100000#) = 2#110_00000# then
787                  return;
788
789               --  16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx
790
791               else
792                  Skip_UTF_Byte;
793
794                  if (U and 2#11110000#) = 2#1110_0000# then
795                     return;
796
797                  --  16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx
798                  --                           10xxxxxx
799
800                  else
801                     Skip_UTF_Byte;
802
803                     if (U and 2#11111000#) = 2#11110_000# then
804                        return;
805
806                     --  16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx
807                     --                               10xxxxxx 10xxxxxx
808                     --                               10xxxxxx
809
810                     else
811                        Skip_UTF_Byte;
812
813                        if (U and 2#11111100#) = 2#111110_00# then
814                           return;
815
816                        --  Any other code is invalid, note that this includes:
817
818                        --  16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx
819                        --                               10xxxxxx 10xxxxxx
820                        --                               10xxxxxx 10xxxxxx
821
822                        --  since Wide_Wide_Character does not allow codes
823                        --  greater than 16#03FF_FFFF#
824
825                        else
826                           Bad;
827                        end if;
828                     end if;
829                  end if;
830               end if;
831            end if;
832         end UTF8;
833
834      --  Special efficient encoding for brackets case
835
836      elsif Encoding_Method = WCEM_Brackets then
837         Brackets : declare
838            P : Natural;
839            S : Natural;
840
841         begin
842            --  See if we have "] at end positions
843
844            if Ptr > Input'First + 1
845              and then Input (Ptr - 1) = ']'
846              and then Input (Ptr - 2) = '"'
847            then
848               P := Ptr - 2;
849
850               --  Loop back looking for [" at start
851
852               while P >= Ptr - 10 loop
853                  if P <= Input'First + 1 then
854                     Bad;
855
856                  elsif Input (P - 1) = '"'
857                    and then Input (P - 2) = '['
858                  then
859                     --  Found ["..."], scan forward to check it
860
861                     S := P - 2;
862                     P := S;
863                     Next_Wide_Wide_Character (Input, P);
864
865                     --  OK if at original pointer, else error
866
867                     if P = Ptr then
868                        Ptr := S;
869                        return;
870                     else
871                        Bad;
872                     end if;
873                  end if;
874
875                  P := P - 1;
876               end loop;
877
878               --  Falling through loop means more than 8 chars between the
879               --  enclosing brackets (or simply a missing left bracket)
880
881               Bad;
882
883            --  Here if no bracket sequence present
884
885            else
886               if Ptr = Input'First then
887                  Past_End;
888               else
889                  Ptr := Ptr - 1;
890               end if;
891            end if;
892         end Brackets;
893
894      --  Non-UTF-8/Brackets. These are the inefficient cases where we have to
895      --  go to the start of the string and skip forwards till Ptr matches.
896
897      else
898         Non_UTF8_Brackets : declare
899            Discard : Wide_Wide_Character;
900            PtrS    : Natural;
901            PtrP    : Natural;
902
903         begin
904            PtrS := Input'First;
905
906            if Ptr <= PtrS then
907               Past_End;
908            end if;
909
910            loop
911               PtrP := PtrS;
912               Decode_Wide_Wide_Character (Input, PtrS, Discard);
913
914               if PtrS = Ptr then
915                  Ptr := PtrP;
916                  return;
917
918               elsif PtrS > Ptr then
919                  Bad;
920               end if;
921            end loop;
922
923         exception
924            when Constraint_Error =>
925               Bad;
926         end Non_UTF8_Brackets;
927      end if;
928   end Prev_Wide_Wide_Character;
929
930   --------------------------
931   -- Validate_Wide_String --
932   --------------------------
933
934   function Validate_Wide_String (S : String) return Boolean is
935      Ptr : Natural;
936
937   begin
938      Ptr := S'First;
939      while Ptr <= S'Last loop
940         Next_Wide_Character (S, Ptr);
941      end loop;
942
943      return True;
944
945   exception
946      when Constraint_Error =>
947         return False;
948   end Validate_Wide_String;
949
950   -------------------------------
951   -- Validate_Wide_Wide_String --
952   -------------------------------
953
954   function Validate_Wide_Wide_String (S : String) return Boolean is
955      Ptr : Natural;
956
957   begin
958      Ptr := S'First;
959      while Ptr <= S'Last loop
960         Next_Wide_Wide_Character (S, Ptr);
961      end loop;
962
963      return True;
964
965   exception
966      when Constraint_Error =>
967         return False;
968   end Validate_Wide_Wide_String;
969
970end GNAT.Decode_String;
971