1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT RUN-TIME COMPONENTS                         --
4--                                                                          --
5--                    G N A T . D E C O D E _ S T R I N G                   --
6--                                                                          --
7--                                 S p e c                                  --
8--                                                                          --
9--                       Copyright (C) 2007-2010, AdaCore                   --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
17--                                                                          --
18-- As a special exception under Section 7 of GPL version 3, you are granted --
19-- additional permissions described in the GCC Runtime Library Exception,   --
20-- version 3.1, as published by the Free Software Foundation.               --
21--                                                                          --
22-- You should have received a copy of the GNU General Public License and    --
23-- a copy of the GCC Runtime Library Exception along with this program;     --
24-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
25-- <http://www.gnu.org/licenses/>.                                          --
26--                                                                          --
27-- GNAT was originally developed  by the GNAT team at  New York University. --
28-- Extensive contributions were provided by Ada Core Technologies Inc.      --
29--                                                                          --
30------------------------------------------------------------------------------
31
32--  This generic package provides utility routines for converting from an
33--  encoded string to a corresponding Wide_String or Wide_Wide_String value
34--  using a specified encoding convention, which is supplied as the generic
35--  parameter. UTF-8 is handled especially efficiently, and if the encoding
36--  method is known at compile time to be WCEM_UTF8, then the instantiation
37--  is specialized to handle only the UTF-8 case and exclude code for the
38--  other encoding methods. The package also provides positioning routines
39--  for skipping encoded characters in either direction, and for validating
40--  strings for correct encodings.
41
42--  Note: this package is only about decoding sequences of 8-bit characters
43--  into corresponding 16-bit Wide_String or 32-bit Wide_Wide_String values.
44--  It knows nothing at all about the character encodings being used for the
45--  resulting Wide_Character and Wide_Wide_Character values. Most often this
46--  will be Unicode/ISO-10646 as specified by the Ada RM, but this package
47--  does not make any assumptions about the character coding. See also the
48--  packages Ada.Wide_[Wide_]Characters.Unicode for unicode specific functions.
49
50--  Note on the use of brackets encoding (WCEM_Brackets). The brackets encoding
51--  method is ambiguous in the context of this package, since there is no way
52--  to tell if ["1234"] is eight unencoded characters or one encoded character.
53--  In the context of Ada sources, any sequence starting [" must be the start
54--  of an encoding (since that sequence is not valid in Ada source otherwise).
55--  The routines in this package use the same approach. If the input string
56--  contains the sequence [" then this is assumed to be the start of a brackets
57--  encoding sequence, and if it does not match the syntax, an error is raised.
58--  In the case of the Prev functions, a sequence ending with "] is assumed to
59--  be a valid brackets sequence, and an error is raised if it is not.
60
61with System.WCh_Con;
62
63generic
64   Encoding_Method : System.WCh_Con.WC_Encoding_Method;
65
66package GNAT.Decode_String is
67   pragma Pure;
68
69   function Decode_Wide_String (S : String) return Wide_String;
70   pragma Inline (Decode_Wide_String);
71   --  Decode the given String, which is encoded using the indicated coding
72   --  method, returning the corresponding decoded Wide_String value. If S
73   --  contains a character code that cannot be represented with the given
74   --  encoding, then Constraint_Error is raised.
75
76   procedure Decode_Wide_String
77     (S      : String;
78      Result : out Wide_String;
79      Length : out Natural);
80   --  Similar to the above function except that the result is stored in the
81   --  given Wide_String variable Result, starting at Result (Result'First). On
82   --  return, Length is set to the number of characters stored in Result. The
83   --  caller must ensure that Result is long enough (an easy choice is to set
84   --  the length equal to the S'Length, since decoding can never increase the
85   --  string length). If the length of Result is insufficient Constraint_Error
86   --  will be raised.
87
88   function Decode_Wide_Wide_String (S : String) return Wide_Wide_String;
89   pragma Inline (Decode_Wide_Wide_String);
90   --  Same as above function but for Wide_Wide_String output
91
92   procedure Decode_Wide_Wide_String
93     (S      : String;
94      Result : out Wide_Wide_String;
95      Length : out Natural);
96   --  Same as above procedure, but for Wide_Wide_String output
97
98   function Validate_Wide_String (S : String) return Boolean;
99   --  This function inspects the string S to determine if it contains only
100   --  valid encodings corresponding to Wide_Character values using the
101   --  given encoding. If a call to Decode_Wide_String (S) would return
102   --  without raising Constraint_Error, then Validate_Wide_String will
103   --  return True. If the call would have raised Constraint_Error, then
104   --  Validate_Wide_String will return False.
105
106   function Validate_Wide_Wide_String (S : String) return Boolean;
107   --  Similar to Validate_Wide_String, except that it succeeds if the string
108   --  contains only encodings corresponding to Wide_Wide_Character values.
109
110   procedure Decode_Wide_Character
111     (Input  : String;
112      Ptr    : in out Natural;
113      Result : out Wide_Character);
114   pragma Inline (Decode_Wide_Character);
115   --  This is a lower level procedure that decodes a single character using
116   --  the given encoding method. The encoded character is stored in Input,
117   --  starting at Input (Ptr). The resulting output character is stored in
118   --  Result, and on return Ptr is updated past the input character or
119   --  encoding sequence. Constraint_Error will be raised if the input has
120   --  has a character that cannot be represented using the given encoding,
121   --  or if Ptr is outside the bounds of the Input string.
122
123   procedure Decode_Wide_Wide_Character
124     (Input  : String;
125      Ptr    : in out Natural;
126      Result : out Wide_Wide_Character);
127   --  Same as above procedure but with Wide_Wide_Character input
128
129   procedure Next_Wide_Character (Input : String; Ptr : in out Natural);
130   --  This procedure examines the input string starting at Input (Ptr), and
131   --  advances Ptr past one character in the encoded string, so that on return
132   --  Ptr points to the next encoded character. Constraint_Error is raised if
133   --  an invalid encoding is encountered, or the end of the string is reached
134   --  or if Ptr is less than String'First on entry, or if the character
135   --  skipped is not a valid Wide_Character code. This call may be more
136   --  efficient than calling Decode_Wide_Character and discarding the result.
137
138   procedure Prev_Wide_Character (Input : String; Ptr : in out Natural);
139   --  This procedure is similar to Next_Encoded_Character except that it moves
140   --  backwards in the string, so that on return, Ptr is set to point to the
141   --  previous encoded character. Constraint_Error is raised if the start of
142   --  the string is encountered. It is valid for Ptr to be one past the end
143   --  of the string for this call (in which case on return it will point to
144   --  the last encoded character).
145   --
146   --  Note: it is not generally possible to do this function efficiently with
147   --  all encodings, the current implementation is only efficient for the case
148   --  of UTF-8 (Encoding_Method = WCEM_UTF8) and Brackets (Encoding_Method =
149   --  WCEM_Brackets). For all other encodings, we work by starting at the
150   --  beginning of the string and moving forward till Ptr is reached, which
151   --  is correct but slow.
152
153   procedure Next_Wide_Wide_Character (Input : String; Ptr : in out Natural);
154   --  Similar to Next_Wide_Character except that codes skipped must be valid
155   --  Wide_Wide_Character codes.
156
157   procedure Prev_Wide_Wide_Character (Input : String; Ptr : in out Natural);
158   --  Similar to Prev_Wide_Character except that codes skipped must be valid
159   --  Wide_Wide_Character codes.
160
161end GNAT.Decode_String;
162