1------------------------------------------------------------------------------ 2-- -- 3-- GNAT RUN-TIME COMPONENTS -- 4-- -- 5-- A D A . S T R I N G S . U T F _ E N C O D I N G -- 6-- -- 7-- B o d y -- 8-- -- 9-- Copyright (C) 2010, Free Software Foundation, Inc. -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 3, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. -- 17-- -- 18-- As a special exception under Section 7 of GPL version 3, you are granted -- 19-- additional permissions described in the GCC Runtime Library Exception, -- 20-- version 3.1, as published by the Free Software Foundation. -- 21-- -- 22-- You should have received a copy of the GNU General Public License and -- 23-- a copy of the GCC Runtime Library Exception along with this program; -- 24-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- 25-- <http://www.gnu.org/licenses/>. -- 26-- -- 27-- GNAT was originally developed by the GNAT team at New York University. -- 28-- Extensive contributions were provided by Ada Core Technologies Inc. -- 29-- -- 30------------------------------------------------------------------------------ 31 32package body Ada.Strings.UTF_Encoding is 33 use Interfaces; 34 35 -------------- 36 -- Encoding -- 37 -------------- 38 39 function Encoding 40 (Item : UTF_String; 41 Default : Encoding_Scheme := UTF_8) return Encoding_Scheme 42 is 43 begin 44 if Item'Length >= 2 then 45 if Item (Item'First .. Item'First + 1) = BOM_16BE then 46 return UTF_16BE; 47 48 elsif Item (Item'First .. Item'First + 1) = BOM_16LE then 49 return UTF_16LE; 50 51 elsif Item'Length >= 3 52 and then Item (Item'First .. Item'First + 2) = BOM_8 53 then 54 return UTF_8; 55 end if; 56 end if; 57 58 return Default; 59 end Encoding; 60 61 ----------------- 62 -- From_UTF_16 -- 63 ----------------- 64 65 function From_UTF_16 66 (Item : UTF_16_Wide_String; 67 Output_Scheme : UTF_XE_Encoding; 68 Output_BOM : Boolean := False) return UTF_String 69 is 70 BSpace : constant Natural := 2 * Boolean'Pos (Output_BOM); 71 Result : UTF_String (1 .. 2 * Item'Length + BSpace); 72 Len : Natural; 73 C : Unsigned_16; 74 Iptr : Natural; 75 76 begin 77 if Output_BOM then 78 Result (1 .. 2) := 79 (if Output_Scheme = UTF_16BE then BOM_16BE else BOM_16LE); 80 Len := 2; 81 else 82 Len := 0; 83 end if; 84 85 -- Skip input BOM 86 87 Iptr := Item'First; 88 89 if Iptr <= Item'Last and then Item (Iptr) = BOM_16 (1) then 90 Iptr := Iptr + 1; 91 end if; 92 93 -- UTF-16BE case 94 95 if Output_Scheme = UTF_16BE then 96 while Iptr <= Item'Last loop 97 C := To_Unsigned_16 (Item (Iptr)); 98 Result (Len + 1) := Character'Val (Shift_Right (C, 8)); 99 Result (Len + 2) := Character'Val (C and 16#00_FF#); 100 Len := Len + 2; 101 Iptr := Iptr + 1; 102 end loop; 103 104 -- UTF-16LE case 105 106 else 107 while Iptr <= Item'Last loop 108 C := To_Unsigned_16 (Item (Iptr)); 109 Result (Len + 1) := Character'Val (C and 16#00_FF#); 110 Result (Len + 2) := Character'Val (Shift_Right (C, 8)); 111 Len := Len + 2; 112 Iptr := Iptr + 1; 113 end loop; 114 end if; 115 116 return Result (1 .. Len); 117 end From_UTF_16; 118 119 -------------------------- 120 -- Raise_Encoding_Error -- 121 -------------------------- 122 123 procedure Raise_Encoding_Error (Index : Natural) is 124 Val : constant String := Index'Img; 125 begin 126 raise Encoding_Error with 127 "bad input at Item (" & Val (Val'First + 1 .. Val'Last) & ')'; 128 end Raise_Encoding_Error; 129 130 --------------- 131 -- To_UTF_16 -- 132 --------------- 133 134 function To_UTF_16 135 (Item : UTF_String; 136 Input_Scheme : UTF_XE_Encoding; 137 Output_BOM : Boolean := False) return UTF_16_Wide_String 138 is 139 Result : UTF_16_Wide_String (1 .. Item'Length / 2 + 1); 140 Len : Natural; 141 Iptr : Natural; 142 143 begin 144 if Item'Length mod 2 /= 0 then 145 raise Encoding_Error with "UTF-16BE/LE string has odd length"; 146 end if; 147 148 -- Deal with input BOM, skip if OK, error if bad BOM 149 150 Iptr := Item'First; 151 152 if Item'Length >= 2 then 153 if Item (Iptr .. Iptr + 1) = BOM_16BE then 154 if Input_Scheme = UTF_16BE then 155 Iptr := Iptr + 2; 156 else 157 Raise_Encoding_Error (Iptr); 158 end if; 159 160 elsif Item (Iptr .. Iptr + 1) = BOM_16LE then 161 if Input_Scheme = UTF_16LE then 162 Iptr := Iptr + 2; 163 else 164 Raise_Encoding_Error (Iptr); 165 end if; 166 167 elsif Item'Length >= 3 and then Item (Iptr .. Iptr + 2) = BOM_8 then 168 Raise_Encoding_Error (Iptr); 169 end if; 170 end if; 171 172 -- Output BOM if specified 173 174 if Output_BOM then 175 Result (1) := BOM_16 (1); 176 Len := 1; 177 else 178 Len := 0; 179 end if; 180 181 -- UTF-16BE case 182 183 if Input_Scheme = UTF_16BE then 184 while Iptr < Item'Last loop 185 Len := Len + 1; 186 Result (Len) := 187 Wide_Character'Val 188 (Character'Pos (Item (Iptr)) * 256 + 189 Character'Pos (Item (Iptr + 1))); 190 Iptr := Iptr + 2; 191 end loop; 192 193 -- UTF-16LE case 194 195 else 196 while Iptr < Item'Last loop 197 Len := Len + 1; 198 Result (Len) := 199 Wide_Character'Val 200 (Character'Pos (Item (Iptr)) + 201 Character'Pos (Item (Iptr + 1)) * 256); 202 Iptr := Iptr + 2; 203 end loop; 204 end if; 205 206 return Result (1 .. Len); 207 end To_UTF_16; 208 209end Ada.Strings.UTF_Encoding; 210