1------------------------------------------------------------------------------ 2-- -- 3-- GNAT COMPILER COMPONENTS -- 4-- -- 5-- W I D E C H A R -- 6-- -- 7-- B o d y -- 8-- -- 9-- Copyright (C) 1992-2009 Free Software Foundation, Inc. -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 3, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. -- 17-- -- 18-- As a special exception under Section 7 of GPL version 3, you are granted -- 19-- additional permissions described in the GCC Runtime Library Exception, -- 20-- version 3.1, as published by the Free Software Foundation. -- 21-- -- 22-- You should have received a copy of the GNU General Public License and -- 23-- a copy of the GCC Runtime Library Exception along with this program; -- 24-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- 25-- <http://www.gnu.org/licenses/>. -- 26-- -- 27-- GNAT was originally developed by the GNAT team at New York University. -- 28-- Extensive contributions were provided by Ada Core Technologies Inc. -- 29-- -- 30------------------------------------------------------------------------------ 31 32-- Note: this package uses the generic subprograms in System.Wch_Cnv, which 33-- completely encapsulate the set of wide character encoding methods, so no 34-- modifications are required when adding new encoding methods. 35 36with Opt; use Opt; 37 38with System.WCh_Cnv; use System.WCh_Cnv; 39with System.WCh_Con; use System.WCh_Con; 40 41package body Widechar is 42 43 --------------------------- 44 -- Is_Start_Of_Wide_Char -- 45 --------------------------- 46 47 function Is_Start_Of_Wide_Char 48 (S : Source_Buffer_Ptr; 49 P : Source_Ptr) return Boolean 50 is 51 begin 52 case Wide_Character_Encoding_Method is 53 54 -- For Hex mode, just test for an ESC character. The ESC character 55 -- cannot appear in any other context in a legal Ada program. 56 57 when WCEM_Hex => 58 return S (P) = ASCII.ESC; 59 60 -- For brackets, just test ["x where x is a hex character. This is 61 -- sufficient test, since this sequence cannot otherwise appear in a 62 -- legal Ada program. 63 64 when WCEM_Brackets => 65 return P <= S'Last - 2 66 and then S (P) = '[' 67 and then S (P + 1) = '"' 68 and then (S (P + 2) in '0' .. '9' 69 or else 70 S (P + 2) in 'a' .. 'f' 71 or else 72 S (P + 2) in 'A' .. 'F'); 73 74 -- All other encoding methods use the upper bit set in the first 75 -- character to uniquely represent a wide character. 76 77 when WCEM_Upper | 78 WCEM_Shift_JIS | 79 WCEM_EUC | 80 WCEM_UTF8 => 81 return S (P) >= Character'Val (16#80#); 82 end case; 83 end Is_Start_Of_Wide_Char; 84 85 ----------------- 86 -- Length_Wide -- 87 ----------------- 88 89 function Length_Wide return Nat is 90 begin 91 return WC_Longest_Sequence; 92 end Length_Wide; 93 94 --------------- 95 -- Scan_Wide -- 96 --------------- 97 98 procedure Scan_Wide 99 (S : Source_Buffer_Ptr; 100 P : in out Source_Ptr; 101 C : out Char_Code; 102 Err : out Boolean) 103 is 104 P_Init : constant Source_Ptr := P; 105 Chr : Character; 106 107 function In_Char return Character; 108 -- Function to obtain characters of wide character escape sequence 109 110 ------------- 111 -- In_Char -- 112 ------------- 113 114 function In_Char return Character is 115 begin 116 P := P + 1; 117 return S (P - 1); 118 end In_Char; 119 120 function WC_In is new Char_Sequence_To_UTF_32 (In_Char); 121 122 -- Start of processing for Scan_Wide 123 124 begin 125 Chr := In_Char; 126 127 -- Scan out the wide character. If the first character is a bracket, 128 -- we allow brackets encoding regardless of the standard encoding 129 -- method being used, but otherwise we use this standard method. 130 131 if Chr = '[' then 132 C := Char_Code (WC_In (Chr, WCEM_Brackets)); 133 else 134 C := Char_Code (WC_In (Chr, Wide_Character_Encoding_Method)); 135 end if; 136 137 Err := False; 138 Wide_Char_Byte_Count := Wide_Char_Byte_Count + Nat (P - P_Init - 1); 139 140 exception 141 when Constraint_Error => 142 C := Char_Code (0); 143 P := P - 1; 144 Err := True; 145 end Scan_Wide; 146 147 -------------- 148 -- Set_Wide -- 149 -------------- 150 151 procedure Set_Wide 152 (C : Char_Code; 153 S : in out String; 154 P : in out Natural) 155 is 156 procedure Out_Char (C : Character); 157 -- Procedure to store one character of wide character sequence 158 159 -------------- 160 -- Out_Char -- 161 -------------- 162 163 procedure Out_Char (C : Character) is 164 begin 165 P := P + 1; 166 S (P) := C; 167 end Out_Char; 168 169 procedure WC_Out is new UTF_32_To_Char_Sequence (Out_Char); 170 171 -- Start of processing for Set_Wide 172 173 begin 174 WC_Out (UTF_32_Code (C), Wide_Character_Encoding_Method); 175 end Set_Wide; 176 177 --------------- 178 -- Skip_Wide -- 179 --------------- 180 181 procedure Skip_Wide (S : String; P : in out Natural) is 182 P_Init : constant Natural := P; 183 184 function Skip_Char return Character; 185 -- Function to skip one character of wide character escape sequence 186 187 --------------- 188 -- Skip_Char -- 189 --------------- 190 191 function Skip_Char return Character is 192 begin 193 P := P + 1; 194 return S (P - 1); 195 end Skip_Char; 196 197 function WC_Skip is new Char_Sequence_To_UTF_32 (Skip_Char); 198 199 Discard : UTF_32_Code; 200 pragma Warnings (Off, Discard); 201 202 -- Start of processing for Skip_Wide 203 204 begin 205 Discard := WC_Skip (Skip_Char, Wide_Character_Encoding_Method); 206 Wide_Char_Byte_Count := Wide_Char_Byte_Count + Nat (P - P_Init - 1); 207 end Skip_Wide; 208 209 --------------- 210 -- Skip_Wide -- 211 --------------- 212 213 procedure Skip_Wide (S : Source_Buffer_Ptr; P : in out Source_Ptr) is 214 P_Init : constant Source_Ptr := P; 215 216 function Skip_Char return Character; 217 -- Function to skip one character of wide character escape sequence 218 219 --------------- 220 -- Skip_Char -- 221 --------------- 222 223 function Skip_Char return Character is 224 begin 225 P := P + 1; 226 return S (P - 1); 227 end Skip_Char; 228 229 function WC_Skip is new Char_Sequence_To_UTF_32 (Skip_Char); 230 231 Discard : UTF_32_Code; 232 pragma Warnings (Off, Discard); 233 234 -- Start of processing for Skip_Wide 235 236 begin 237 Discard := WC_Skip (Skip_Char, Wide_Character_Encoding_Method); 238 Wide_Char_Byte_Count := Wide_Char_Byte_Count + Nat (P - P_Init - 1); 239 end Skip_Wide; 240 241end Widechar; 242