1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT RUNTIME COMPONENTS                          --
4--                                                                          --
5--                       S Y S T E M . W C H _ C N V                        --
6--                                                                          --
7--                                 B o d y                                  --
8--                                                                          --
9--          Copyright (C) 1992-2001 Free Software Foundation, Inc.          --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 2,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
17-- for  more details.  You should have  received  a copy of the GNU General --
18-- Public License  distributed with GNAT;  see file COPYING.  If not, write --
19-- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
20-- MA 02111-1307, USA.                                                      --
21--                                                                          --
22-- As a special exception,  if other files  instantiate  generics from this --
23-- unit, or you link  this unit with other files  to produce an executable, --
24-- this  unit  does not  by itself cause  the resulting  executable  to  be --
25-- covered  by the  GNU  General  Public  License.  This exception does not --
26-- however invalidate  any other reasons why  the executable file  might be --
27-- covered by the  GNU Public License.                                      --
28--                                                                          --
29-- GNAT was originally developed  by the GNAT team at  New York University. --
30-- Extensive contributions were provided by Ada Core Technologies Inc.      --
31--                                                                          --
32------------------------------------------------------------------------------
33
34--  This package contains generic subprograms used for converting between
35--  sequences of Character and Wide_Character. All access to wide character
36--  sequences is isolated in this unit.
37
38with Interfaces;     use Interfaces;
39with System.WCh_Con; use System.WCh_Con;
40with System.WCh_JIS; use System.WCh_JIS;
41
42package body System.WCh_Cnv is
43
44   --------------------------------
45   -- Char_Sequence_To_Wide_Char --
46   --------------------------------
47
48   function Char_Sequence_To_Wide_Char
49     (C    : Character;
50      EM   : WC_Encoding_Method)
51      return Wide_Character
52   is
53      B1 : Integer;
54      C1 : Character;
55      U  : Unsigned_16;
56      W  : Unsigned_16;
57
58      procedure Get_Hex (N : Character);
59      --  If N is a hex character, then set B1 to 16 * B1 + character N.
60      --  Raise Constraint_Error if character N is not a hex character.
61
62      -------------
63      -- Get_Hex --
64      -------------
65
66      procedure Get_Hex (N : Character) is
67         B2 : constant Integer := Character'Pos (N);
68
69      begin
70         if B2 in Character'Pos ('0') .. Character'Pos ('9') then
71            B1 := B1 * 16 + B2 - Character'Pos ('0');
72
73         elsif B2 in Character'Pos ('A') .. Character'Pos ('F') then
74            B1 := B1 * 16 + B2 - (Character'Pos ('A') - 10);
75
76         elsif B2 in Character'Pos ('a') .. Character'Pos ('f') then
77            B1 := B1 * 16 + B2 - (Character'Pos ('a') - 10);
78
79         else
80            raise Constraint_Error;
81         end if;
82      end Get_Hex;
83
84   --  Start of processing for Char_Sequence_To_Wide_Char
85
86   begin
87      case EM is
88
89         when WCEM_Hex =>
90            if C /= ASCII.ESC then
91               return Wide_Character'Val (Character'Pos (C));
92
93            else
94               B1 := 0;
95               Get_Hex (In_Char);
96               Get_Hex (In_Char);
97               Get_Hex (In_Char);
98               Get_Hex (In_Char);
99
100               return Wide_Character'Val (B1);
101            end if;
102
103         when WCEM_Upper =>
104            if C > ASCII.DEL then
105               return
106                 Wide_Character'Val
107                   (Integer (256 * Character'Pos (C)) +
108                    Character'Pos (In_Char));
109            else
110               return Wide_Character'Val (Character'Pos (C));
111            end if;
112
113         when WCEM_Shift_JIS =>
114            if C > ASCII.DEL then
115               return Shift_JIS_To_JIS (C, In_Char);
116            else
117               return Wide_Character'Val (Character'Pos (C));
118            end if;
119
120         when WCEM_EUC =>
121            if C > ASCII.DEL then
122               return EUC_To_JIS (C, In_Char);
123            else
124               return Wide_Character'Val (Character'Pos (C));
125            end if;
126
127         when WCEM_UTF8 =>
128            if C > ASCII.DEL then
129
130               --  16#0080#-16#07ff#: 2#110xxxxx# 2#10xxxxxx#
131               --  16#0800#-16#ffff#: 2#1110xxxx# 2#10xxxxxx# 2#10xxxxxx#
132
133               U := Unsigned_16 (Character'Pos (C));
134
135               if (U and 2#11100000#) = 2#11000000# then
136                  W := Shift_Left (U and 2#00011111#, 6);
137                  U := Unsigned_16 (Character'Pos (In_Char));
138
139                  if (U and 2#11000000#) /= 2#10000000# then
140                     raise Constraint_Error;
141                  end if;
142
143                  W := W or (U and 2#00111111#);
144
145               elsif (U and 2#11110000#) = 2#11100000# then
146                  W := Shift_Left (U and 2#00001111#, 12);
147                  U := Unsigned_16 (Character'Pos (In_Char));
148
149                  if (U and 2#11000000#) /= 2#10000000# then
150                     raise Constraint_Error;
151                  end if;
152
153                  W := W or Shift_Left (U and 2#00111111#, 6);
154                  U := Unsigned_16 (Character'Pos (In_Char));
155
156                  if (U and 2#11000000#) /= 2#10000000# then
157                     raise Constraint_Error;
158                  end if;
159
160                  W := W or (U and 2#00111111#);
161
162               else
163                  raise Constraint_Error;
164               end if;
165
166               return Wide_Character'Val (W);
167
168            else
169               return Wide_Character'Val (Character'Pos (C));
170            end if;
171
172         when WCEM_Brackets =>
173
174            if C /= '[' then
175               return Wide_Character'Val (Character'Pos (C));
176            end if;
177
178            if In_Char /= '"' then
179               raise Constraint_Error;
180            end if;
181
182            B1 := 0;
183            Get_Hex (In_Char);
184            Get_Hex (In_Char);
185            C1 := In_Char;
186
187            if C1 /= '"' then
188               Get_Hex (C1);
189               Get_Hex (In_Char);
190               C1 := In_Char;
191
192               if C1 /= '"' then
193                  raise Constraint_Error;
194               end if;
195            end if;
196
197            if In_Char /= ']' then
198               raise Constraint_Error;
199            end if;
200
201            return Wide_Character'Val (B1);
202
203      end case;
204   end Char_Sequence_To_Wide_Char;
205
206   --------------------------------
207   -- Wide_Char_To_Char_Sequence --
208   --------------------------------
209
210   procedure Wide_Char_To_Char_Sequence
211     (WC : Wide_Character;
212      EM : WC_Encoding_Method)
213   is
214      Val    : constant Natural := Wide_Character'Pos (WC);
215      Hexc   : constant array (0 .. 15) of Character := "0123456789ABCDEF";
216      C1, C2 : Character;
217      U      : Unsigned_16;
218
219   begin
220      case EM is
221
222         when WCEM_Hex =>
223            if Val < 256 then
224               Out_Char (Character'Val (Val));
225
226            else
227               Out_Char (ASCII.ESC);
228               Out_Char (Hexc (Val / (16**3)));
229               Out_Char (Hexc ((Val / (16**2)) mod 16));
230               Out_Char (Hexc ((Val / 16) mod 16));
231               Out_Char (Hexc (Val mod 16));
232            end if;
233
234         when WCEM_Upper =>
235            if Val < 128 then
236               Out_Char (Character'Val (Val));
237
238            elsif Val < 16#8000# then
239               raise Constraint_Error;
240
241            else
242               Out_Char (Character'Val (Val / 256));
243               Out_Char (Character'Val (Val mod 256));
244            end if;
245
246         when WCEM_Shift_JIS =>
247            if Val < 128 then
248               Out_Char (Character'Val (Val));
249            else
250               JIS_To_Shift_JIS (WC, C1, C2);
251               Out_Char (C1);
252               Out_Char (C2);
253            end if;
254
255         when WCEM_EUC =>
256            if Val < 128 then
257               Out_Char (Character'Val (Val));
258            else
259               JIS_To_EUC (WC, C1, C2);
260               Out_Char (C1);
261               Out_Char (C2);
262            end if;
263
264         when WCEM_UTF8 =>
265            U := Unsigned_16 (Val);
266
267            --  16#0000#-16#007f#: 2#0xxxxxxx#
268            --  16#0080#-16#07ff#: 2#110xxxxx# 2#10xxxxxx#
269            --  16#0800#-16#ffff#: 2#1110xxxx# 2#10xxxxxx# 2#10xxxxxx#
270
271            if U < 16#80# then
272               Out_Char (Character'Val (U));
273
274            elsif U < 16#0800# then
275               Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6)));
276               Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
277
278            else
279               Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12)));
280               Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
281                                                         and 2#00111111#)));
282               Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
283            end if;
284
285         when WCEM_Brackets =>
286
287            if Val < 256 then
288               Out_Char (Character'Val (Val));
289
290            else
291               Out_Char ('[');
292               Out_Char ('"');
293               Out_Char (Hexc (Val / (16**3)));
294               Out_Char (Hexc ((Val / (16**2)) mod 16));
295               Out_Char (Hexc ((Val / 16) mod 16));
296               Out_Char (Hexc (Val mod 16));
297               Out_Char ('"');
298               Out_Char (']');
299            end if;
300      end case;
301   end Wide_Char_To_Char_Sequence;
302
303end System.WCh_Cnv;
304