1------------------------------------------------------------------------------ 2-- -- 3-- GNAT RUN-TIME COMPONENTS -- 4-- -- 5-- G N A T . E N C O D E _ S T R I N G -- 6-- -- 7-- B o d y -- 8-- -- 9-- Copyright (C) 2007-2019, Free Software Foundation, Inc. -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 3, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. -- 17-- -- 18-- As a special exception under Section 7 of GPL version 3, you are granted -- 19-- additional permissions described in the GCC Runtime Library Exception, -- 20-- version 3.1, as published by the Free Software Foundation. -- 21-- -- 22-- You should have received a copy of the GNU General Public License and -- 23-- a copy of the GCC Runtime Library Exception along with this program; -- 24-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- 25-- <http://www.gnu.org/licenses/>. -- 26-- -- 27-- GNAT was originally developed by the GNAT team at New York University. -- 28-- Extensive contributions were provided by Ada Core Technologies Inc. -- 29-- -- 30------------------------------------------------------------------------------ 31 32with Interfaces; use Interfaces; 33 34with System.WCh_Con; use System.WCh_Con; 35with System.WCh_Cnv; use System.WCh_Cnv; 36 37package body GNAT.Encode_String is 38 39 ----------------------- 40 -- Local Subprograms -- 41 ----------------------- 42 43 procedure Bad; 44 pragma No_Return (Bad); 45 -- Raise error for bad character code 46 47 procedure Past_End; 48 pragma No_Return (Past_End); 49 -- Raise error for off end of string 50 51 --------- 52 -- Bad -- 53 --------- 54 55 procedure Bad is 56 begin 57 raise Constraint_Error with 58 "character cannot be encoded with given Encoding_Method"; 59 end Bad; 60 61 ------------------------ 62 -- Encode_Wide_String -- 63 ------------------------ 64 65 function Encode_Wide_String (S : Wide_String) return String is 66 Long : constant Natural := WC_Longest_Sequences (Encoding_Method); 67 Result : String (1 .. S'Length * Long); 68 Length : Natural; 69 begin 70 Encode_Wide_String (S, Result, Length); 71 return Result (1 .. Length); 72 end Encode_Wide_String; 73 74 procedure Encode_Wide_String 75 (S : Wide_String; 76 Result : out String; 77 Length : out Natural) 78 is 79 Ptr : Natural; 80 81 begin 82 Ptr := Result'First; 83 for J in S'Range loop 84 Encode_Wide_Character (S (J), Result, Ptr); 85 end loop; 86 87 Length := Ptr - Result'First; 88 end Encode_Wide_String; 89 90 ----------------------------- 91 -- Encode_Wide_Wide_String -- 92 ----------------------------- 93 94 function Encode_Wide_Wide_String (S : Wide_Wide_String) return String is 95 Long : constant Natural := WC_Longest_Sequences (Encoding_Method); 96 Result : String (1 .. S'Length * Long); 97 Length : Natural; 98 begin 99 Encode_Wide_Wide_String (S, Result, Length); 100 return Result (1 .. Length); 101 end Encode_Wide_Wide_String; 102 103 procedure Encode_Wide_Wide_String 104 (S : Wide_Wide_String; 105 Result : out String; 106 Length : out Natural) 107 is 108 Ptr : Natural; 109 110 begin 111 Ptr := Result'First; 112 for J in S'Range loop 113 Encode_Wide_Wide_Character (S (J), Result, Ptr); 114 end loop; 115 116 Length := Ptr - Result'First; 117 end Encode_Wide_Wide_String; 118 119 --------------------------- 120 -- Encode_Wide_Character -- 121 --------------------------- 122 123 procedure Encode_Wide_Character 124 (Char : Wide_Character; 125 Result : in out String; 126 Ptr : in out Natural) 127 is 128 begin 129 Encode_Wide_Wide_Character 130 (Wide_Wide_Character'Val (Wide_Character'Pos (Char)), Result, Ptr); 131 132 exception 133 when Constraint_Error => 134 Bad; 135 end Encode_Wide_Character; 136 137 -------------------------------- 138 -- Encode_Wide_Wide_Character -- 139 -------------------------------- 140 141 procedure Encode_Wide_Wide_Character 142 (Char : Wide_Wide_Character; 143 Result : in out String; 144 Ptr : in out Natural) 145 is 146 U : Unsigned_32; 147 148 procedure Out_Char (C : Character); 149 pragma Inline (Out_Char); 150 -- Procedure to store one character for instantiation below 151 152 -------------- 153 -- Out_Char -- 154 -------------- 155 156 procedure Out_Char (C : Character) is 157 begin 158 if Ptr > Result'Last then 159 Past_End; 160 else 161 Result (Ptr) := C; 162 Ptr := Ptr + 1; 163 end if; 164 end Out_Char; 165 166 -- Start of processing for Encode_Wide_Wide_Character; 167 168 begin 169 -- Efficient code for UTF-8 case 170 171 if Encoding_Method = WCEM_UTF8 then 172 173 -- Note: for details of UTF8 encoding see RFC 3629 174 175 U := Unsigned_32 (Wide_Wide_Character'Pos (Char)); 176 177 -- 16#00_0000#-16#00_007F#: 0xxxxxxx 178 179 if U <= 16#00_007F# then 180 Out_Char (Character'Val (U)); 181 182 -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx 183 184 elsif U <= 16#00_07FF# then 185 Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6))); 186 Out_Char (Character'Val (2#10000000# or (U and 2#00111111#))); 187 188 -- 16#00_0800#-16#00_FFFF#: 1110xxxx 10xxxxxx 10xxxxxx 189 190 elsif U <= 16#00_FFFF# then 191 Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12))); 192 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6) 193 and 2#00111111#))); 194 Out_Char (Character'Val (2#10000000# or (U and 2#00111111#))); 195 196 -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 197 198 elsif U <= 16#10_FFFF# then 199 Out_Char (Character'Val (2#11110000# or Shift_Right (U, 18))); 200 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12) 201 and 2#00111111#))); 202 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6) 203 and 2#00111111#))); 204 Out_Char (Character'Val (2#10000000# or (U and 2#00111111#))); 205 206 -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx 207 -- 10xxxxxx 10xxxxxx 208 209 elsif U <= 16#03FF_FFFF# then 210 Out_Char (Character'Val (2#11111000# or Shift_Right (U, 24))); 211 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18) 212 and 2#00111111#))); 213 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12) 214 and 2#00111111#))); 215 Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6) 216 and 2#00111111#))); 217 Out_Char (Character'Val (2#10000000# or (U and 2#00111111#))); 218 219 -- All other cases are invalid character codes, not this includes: 220 221 -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx 222 -- 10xxxxxx 10xxxxxx 10xxxxxx 223 224 -- since Wide_Wide_Character values cannot exceed 16#3F_FFFF# 225 226 else 227 Bad; 228 end if; 229 230 -- All encoding methods other than UTF-8 231 232 else 233 Non_UTF8 : declare 234 procedure UTF_32_To_String is 235 new UTF_32_To_Char_Sequence (Out_Char); 236 -- Instantiate conversion procedure with above Out_Char routine 237 238 begin 239 UTF_32_To_String 240 (UTF_32_Code (Wide_Wide_Character'Pos (Char)), Encoding_Method); 241 242 exception 243 when Constraint_Error => 244 Bad; 245 end Non_UTF8; 246 end if; 247 end Encode_Wide_Wide_Character; 248 249 -------------- 250 -- Past_End -- 251 -------------- 252 253 procedure Past_End is 254 begin 255 raise Constraint_Error with "past end of string"; 256 end Past_End; 257 258end GNAT.Encode_String; 259