1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT COMPILER COMPONENTS                         --
4--                                                                          --
5--                             W I D E C H A R                              --
6--                                                                          --
7--                                 B o d y                                  --
8--                                                                          --
9--          Copyright (C) 1992-2009 Free Software Foundation, Inc.          --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
17--                                                                          --
18-- As a special exception under Section 7 of GPL version 3, you are granted --
19-- additional permissions described in the GCC Runtime Library Exception,   --
20-- version 3.1, as published by the Free Software Foundation.               --
21--                                                                          --
22-- You should have received a copy of the GNU General Public License and    --
23-- a copy of the GCC Runtime Library Exception along with this program;     --
24-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
25-- <http://www.gnu.org/licenses/>.                                          --
26--                                                                          --
27-- GNAT was originally developed  by the GNAT team at  New York University. --
28-- Extensive contributions were provided by Ada Core Technologies Inc.      --
29--                                                                          --
30------------------------------------------------------------------------------
31
32--  Note: this package uses the generic subprograms in System.Wch_Cnv, which
33--  completely encapsulate the set of wide character encoding methods, so no
34--  modifications are required when adding new encoding methods.
35
36with Opt; use Opt;
37
38with System.WCh_Cnv; use System.WCh_Cnv;
39with System.WCh_Con; use System.WCh_Con;
40
41package body Widechar is
42
43   ---------------------------
44   -- Is_Start_Of_Wide_Char --
45   ---------------------------
46
47   function Is_Start_Of_Wide_Char
48     (S : Source_Buffer_Ptr;
49      P : Source_Ptr) return Boolean
50   is
51   begin
52      case Wide_Character_Encoding_Method is
53
54         --  For Hex mode, just test for an ESC character. The ESC character
55         --  cannot appear in any other context in a legal Ada program.
56
57         when WCEM_Hex =>
58            return S (P) = ASCII.ESC;
59
60         --  For brackets, just test ["x where x is a hex character. This is
61         --  sufficient test, since this sequence cannot otherwise appear in a
62         --  legal Ada program.
63
64         when WCEM_Brackets =>
65            return P <= S'Last - 2
66              and then S (P) = '['
67              and then S (P + 1) = '"'
68              and then (S (P + 2) in '0' .. '9'
69                            or else
70                           S (P + 2) in 'a' .. 'f'
71                            or else
72                        S (P + 2) in 'A' .. 'F');
73
74         --  All other encoding methods use the upper bit set in the first
75         --  character to uniquely represent a wide character.
76
77         when WCEM_Upper     |
78              WCEM_Shift_JIS |
79              WCEM_EUC       |
80              WCEM_UTF8      =>
81            return S (P) >= Character'Val (16#80#);
82      end case;
83   end Is_Start_Of_Wide_Char;
84
85   -----------------
86   -- Length_Wide --
87   -----------------
88
89   function Length_Wide return Nat is
90   begin
91      return WC_Longest_Sequence;
92   end Length_Wide;
93
94   ---------------
95   -- Scan_Wide --
96   ---------------
97
98   procedure Scan_Wide
99     (S   : Source_Buffer_Ptr;
100      P   : in out Source_Ptr;
101      C   : out Char_Code;
102      Err : out Boolean)
103   is
104      P_Init : constant Source_Ptr := P;
105      Chr    : Character;
106
107      function In_Char return Character;
108      --  Function to obtain characters of wide character escape sequence
109
110      -------------
111      -- In_Char --
112      -------------
113
114      function In_Char return Character is
115      begin
116         P := P + 1;
117         return S (P - 1);
118      end In_Char;
119
120      function WC_In is new Char_Sequence_To_UTF_32 (In_Char);
121
122   --  Start of processing for Scan_Wide
123
124   begin
125      Chr := In_Char;
126
127      --  Scan out the wide character. If the first character is a bracket,
128      --  we allow brackets encoding regardless of the standard encoding
129      --  method being used, but otherwise we use this standard method.
130
131      if Chr = '[' then
132         C := Char_Code (WC_In (Chr, WCEM_Brackets));
133      else
134         C := Char_Code (WC_In (Chr, Wide_Character_Encoding_Method));
135      end if;
136
137      Err := False;
138      Wide_Char_Byte_Count := Wide_Char_Byte_Count + Nat (P - P_Init - 1);
139
140   exception
141      when Constraint_Error =>
142         C := Char_Code (0);
143         P := P - 1;
144         Err := True;
145   end Scan_Wide;
146
147   --------------
148   -- Set_Wide --
149   --------------
150
151   procedure Set_Wide
152     (C : Char_Code;
153      S : in out String;
154      P : in out Natural)
155   is
156      procedure Out_Char (C : Character);
157      --  Procedure to store one character of wide character sequence
158
159      --------------
160      -- Out_Char --
161      --------------
162
163      procedure Out_Char (C : Character) is
164      begin
165         P := P + 1;
166         S (P) := C;
167      end Out_Char;
168
169      procedure WC_Out is new UTF_32_To_Char_Sequence (Out_Char);
170
171   --  Start of processing for Set_Wide
172
173   begin
174      WC_Out (UTF_32_Code (C), Wide_Character_Encoding_Method);
175   end Set_Wide;
176
177   ---------------
178   -- Skip_Wide --
179   ---------------
180
181   procedure Skip_Wide (S : String; P : in out Natural) is
182      P_Init : constant Natural := P;
183
184      function Skip_Char return Character;
185      --  Function to skip one character of wide character escape sequence
186
187      ---------------
188      -- Skip_Char --
189      ---------------
190
191      function Skip_Char return Character is
192      begin
193         P := P + 1;
194         return S (P - 1);
195      end Skip_Char;
196
197      function WC_Skip is new Char_Sequence_To_UTF_32 (Skip_Char);
198
199      Discard : UTF_32_Code;
200      pragma Warnings (Off, Discard);
201
202   --  Start of processing for Skip_Wide
203
204   begin
205      Discard := WC_Skip (Skip_Char, Wide_Character_Encoding_Method);
206      Wide_Char_Byte_Count := Wide_Char_Byte_Count + Nat (P - P_Init - 1);
207   end Skip_Wide;
208
209   ---------------
210   -- Skip_Wide --
211   ---------------
212
213   procedure Skip_Wide (S : Source_Buffer_Ptr; P : in out Source_Ptr) is
214      P_Init : constant Source_Ptr := P;
215
216      function Skip_Char return Character;
217      --  Function to skip one character of wide character escape sequence
218
219      ---------------
220      -- Skip_Char --
221      ---------------
222
223      function Skip_Char return Character is
224      begin
225         P := P + 1;
226         return S (P - 1);
227      end Skip_Char;
228
229      function WC_Skip is new Char_Sequence_To_UTF_32 (Skip_Char);
230
231      Discard : UTF_32_Code;
232      pragma Warnings (Off, Discard);
233
234   --  Start of processing for Skip_Wide
235
236   begin
237      Discard := WC_Skip (Skip_Char, Wide_Character_Encoding_Method);
238      Wide_Char_Byte_Count := Wide_Char_Byte_Count + Nat (P - P_Init - 1);
239   end Skip_Wide;
240
241end Widechar;
242