1------------------------------------------------------------------------------ 2-- -- 3-- GNAT RUNTIME COMPONENTS -- 4-- -- 5-- S Y S T E M . W C H _ C N V -- 6-- -- 7-- B o d y -- 8-- -- 9-- Copyright (C) 1992-2001 Free Software Foundation, Inc. -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 2, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- 17-- for more details. You should have received a copy of the GNU General -- 18-- Public License distributed with GNAT; see file COPYING. If not, write -- 19-- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, -- 20-- MA 02111-1307, USA. -- 21-- -- 22-- As a special exception, if other files instantiate generics from this -- 23-- unit, or you link this unit with other files to produce an executable, -- 24-- this unit does not by itself cause the resulting executable to be -- 25-- covered by the GNU General Public License. This exception does not -- 26-- however invalidate any other reasons why the executable file might be -- 27-- covered by the GNU Public License. -- 28-- -- 29-- GNAT was originally developed by the GNAT team at New York University. -- 30-- Extensive contributions were provided by Ada Core Technologies Inc. -- 31-- -- 32------------------------------------------------------------------------------ 33 34-- This package contains generic subprograms used for converting between 35-- sequences of Character and Wide_Character. All access to wide character 36-- sequences is isolated in this unit. 37 38with Interfaces; use Interfaces; 39with System.WCh_Con; use System.WCh_Con; 40with System.WCh_JIS; use System.WCh_JIS; 41 42package body System.WCh_Cnv is 43 44 -------------------------------- 45 -- Char_Sequence_To_Wide_Char -- 46 -------------------------------- 47 48 function Char_Sequence_To_Wide_Char 49 (C : Character; 50 EM : WC_Encoding_Method) 51 return Wide_Character 52 is 53 B1 : Integer; 54 C1 : Character; 55 U : Unsigned_16; 56 W : Unsigned_16; 57 58 procedure Get_Hex (N : Character); 59 -- If N is a hex character, then set B1 to 16 * B1 + character N. 60 -- Raise Constraint_Error if character N is not a hex character. 61 62 ------------- 63 -- Get_Hex -- 64 ------------- 65 66 procedure Get_Hex (N : Character) is 67 B2 : constant Integer := Character'Pos (N); 68 69 begin 70 if B2 in Character'Pos ('0') .. Character'Pos ('9') then 71 B1 := B1 * 16 + B2 - Character'Pos ('0'); 72 73 elsif B2 in Character'Pos ('A') .. Character'Pos ('F') then 74 B1 := B1 * 16 + B2 - (Character'Pos ('A') - 10); 75 76 elsif B2 in Character'Pos ('a') .. Character'Pos ('f') then 77 B1 := B1 * 16 + B2 - (Character'Pos ('a') - 10); 78 79 else 80 raise Constraint_Error; 81 end if; 82 end Get_Hex; 83 84 -- Start of processing for Char_Sequence_To_Wide_Char 85 86 begin 87 case EM is 88 89 when WCEM_Hex => 90 if C /= ASCII.ESC then 91 return Wide_Character'Val (Character'Pos (C)); 92 93 else 94 B1 := 0; 95 Get_Hex (In_Char); 96 Get_Hex (In_Char); 97 Get_Hex (In_Char); 98 Get_Hex (In_Char); 99 100 return Wide_Character'Val (B1); 101 end if; 102 103 when WCEM_Upper => 104 if C > ASCII.DEL then 105 return 106 Wide_Character'Val 107 (Integer (256 * Character'Pos (C)) + 108 Character'Pos (In_Char)); 109 else 110 return Wide_Character'Val (Character'Pos (C)); 111 end if; 112 113 when WCEM_Shift_JIS => 114 if C > ASCII.DEL then 115 return Shift_JIS_To_JIS (C, In_Char); 116 else 117 return Wide_Character'Val (Character'Pos (C)); 118 end if; 119 120 when WCEM_EUC => 121 if C > ASCII.DEL then 122 return EUC_To_JIS (C, In_Char); 123 else 124 return Wide_Character'Val (Character'Pos (C)); 125 end if; 126 127 when WCEM_UTF8 => 128 if C > ASCII.DEL then 129 130 -- 16#0080#-16#07ff#: 2#110xxxxx# 2#10xxxxxx# 131 -- 16#0800#-16#ffff#: 2#1110xxxx# 2#10xxxxxx# 2#10xxxxxx# 132 133 U := Unsigned_16 (Character'Pos (C)); 134 135 if (U and 2#11100000#) = 2#11000000# then 136 W := Shift_Left (U and 2#00011111#, 6); 137 U := Unsigned_16 (Character'Pos (In_Char)); 138 139 if (U and 2#11000000#) /= 2#10000000# then 140 raise Constraint_Error; 141 end if; 142 143 W := W or (U and 2#00111111#); 144 145 elsif (U and 2#11110000#) = 2#11100000# then 146 W := Shift_Left (U and 2#00001111#, 12); 147 U := Unsigned_16 (Character'Pos (In_Char)); 148 149 if (U and 2#11000000#) /= 2#10000000# then 150 raise Constraint_Error; 151 end if; 152 153 W := W or Shift_Left (U and 2#00111111#, 6); 154 U := Unsigned_16 (Character'Pos (In_Char)); 155 156 if (U and 2#11000000#) /= 2#10000000# then 157 raise Constraint_Error; 158 end if; 159 160 W := W or (U and 2#00111111#); 161 162 else 163 raise Constraint_Error; 164 end if; 165 166 return Wide_Character'Val (W); 167 168 else 169 return Wide_Character'Val (Character'Pos (C)); 170 end if; 171 172 when WCEM_Brackets => 173 174 if C /= '[' then 175 return Wide_Character'Val (Character'Pos (C)); 176 end if; 177 178 if In_Char /= '"' then 179 raise Constraint_Error; 180 end if; 181 182 B1 := 0; 183 Get_Hex (In_Char); 184 Get_Hex (In_Char); 185 C1 := In_Char; 186 187 if C1 /= '"' then 188 Get_Hex (C1); 189 Get_Hex (In_Char); 190 C1 := In_Char; 191 192 if C1 /= '"' then 193 raise Constraint_Error; 194 end if; 195 end if; 196 197 if In_Char /= ']' then 198 raise Constraint_Error; 199 end if; 200 201 return Wide_Character'Val (B1); 202 203 end case; 204 end Char_Sequence_To_Wide_Char; 205 206 -------------------------------- 207 -- Wide_Char_To_Char_Sequence -- 208 -------------------------------- 209 210 procedure Wide_Char_To_Char_Sequence 211 (WC : Wide_Character; 212 EM : WC_Encoding_Method) 213 is 214 Val : constant Natural := Wide_Character'Pos (WC); 215 Hexc : constant array (0 .. 15) of Character := "0123456789ABCDEF"; 216 C1, C2 : Character; 217 U : Unsigned_16; 218 219 begin 220 case EM is 221 222 when WCEM_Hex => 223 if Val < 256 then 224 Out_Char (Character'Val (Val)); 225 226 else 227 Out_Char (ASCII.ESC); 228 Out_Char (Hexc (Val / (16**3))); 229 Out_Char (Hexc ((Val / (16**2)) mod 16)); 230 Out_Char (Hexc ((Val / 16) mod 16)); 231 Out_Char (Hexc (Val mod 16)); 232 end if; 233 234 when WCEM_Upper => 235 if Val < 128 then 236 Out_Char (Character'Val (Val)); 237 238 elsif Val < 16#8000# then 239 raise Constraint_Error; 240 241 else 242 Out_Char (Character'Val (Val / 256)); 243 Out_Char (Character'Val (Val mod 256)); 244 end if; 245 246 when WCEM_Shift_JIS => 247 if Val < 128 then 248 Out_Char (Character'Val (Val)); 249 else 250 JIS_To_Shift_JIS (WC, C1, C2); 251 Out_Char (C1); 252 Out_Char (C2); 253 end if; 254 255 when WCEM_EUC => 256 if Val < 128 then 257 Out_Char (Character'Val (Val)); 258 else 259 JIS_To_EUC (WC, C1, C2); 260 Out_Char (C1); 261 Out_Char (C2); 262 end if; 263 264 when WCEM_UTF8 => 265 U := Unsigned_16 (Val); 266 267 -- 16#0000#-16#007f#: 2#0xxxxxxx# 268 -- 16#0080#-16#07ff#: 2#110xxxxx# 2#10xxxxxx# 269 -- 16#0800#-16#ffff#: 2#1110xxxx# 2#10xxxxxx# 2#10xxxxxx# 270 271 if U < 16#80# then 272 Out_Char (Character'Val (U)); 273 274 elsif U < 16#0800# then 275 Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6))); 276 Out_Char (Character'Val (2#10000000# or (U and 2#00111111#))); 277 278 else 279 Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12))); 280 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6) 281 and 2#00111111#))); 282 Out_Char (Character'Val (2#10000000# or (U and 2#00111111#))); 283 end if; 284 285 when WCEM_Brackets => 286 287 if Val < 256 then 288 Out_Char (Character'Val (Val)); 289 290 else 291 Out_Char ('['); 292 Out_Char ('"'); 293 Out_Char (Hexc (Val / (16**3))); 294 Out_Char (Hexc ((Val / (16**2)) mod 16)); 295 Out_Char (Hexc ((Val / 16) mod 16)); 296 Out_Char (Hexc (Val mod 16)); 297 Out_Char ('"'); 298 Out_Char (']'); 299 end if; 300 end case; 301 end Wide_Char_To_Char_Sequence; 302 303end System.WCh_Cnv; 304