1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT RUN-TIME COMPONENTS                         --
4--                                                                          --
5--              A D A . S T R I N G S . U T F _ E N C O D I N G             --
6--                                                                          --
7--                                 B o d y                                  --
8--                                                                          --
9--             Copyright (C) 2010, Free Software Foundation, Inc.           --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
17--                                                                          --
18-- As a special exception under Section 7 of GPL version 3, you are granted --
19-- additional permissions described in the GCC Runtime Library Exception,   --
20-- version 3.1, as published by the Free Software Foundation.               --
21--                                                                          --
22-- You should have received a copy of the GNU General Public License and    --
23-- a copy of the GCC Runtime Library Exception along with this program;     --
24-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
25-- <http://www.gnu.org/licenses/>.                                          --
26--                                                                          --
27-- GNAT was originally developed  by the GNAT team at  New York University. --
28-- Extensive contributions were provided by Ada Core Technologies Inc.      --
29--                                                                          --
30------------------------------------------------------------------------------
31
32package body Ada.Strings.UTF_Encoding is
33   use Interfaces;
34
35   --------------
36   -- Encoding --
37   --------------
38
39   function Encoding
40     (Item    : UTF_String;
41      Default : Encoding_Scheme := UTF_8) return Encoding_Scheme
42   is
43   begin
44      if Item'Length >= 2 then
45         if Item (Item'First .. Item'First + 1) = BOM_16BE then
46            return UTF_16BE;
47
48         elsif Item (Item'First .. Item'First + 1) = BOM_16LE then
49            return UTF_16LE;
50
51         elsif Item'Length >= 3
52           and then Item (Item'First .. Item'First + 2) = BOM_8
53         then
54            return UTF_8;
55         end if;
56      end if;
57
58      return Default;
59   end Encoding;
60
61   -----------------
62   -- From_UTF_16 --
63   -----------------
64
65   function From_UTF_16
66     (Item          : UTF_16_Wide_String;
67      Output_Scheme : UTF_XE_Encoding;
68      Output_BOM    : Boolean := False) return UTF_String
69   is
70      BSpace : constant Natural := 2 * Boolean'Pos (Output_BOM);
71      Result : UTF_String (1 .. 2 * Item'Length + BSpace);
72      Len    : Natural;
73      C      : Unsigned_16;
74      Iptr   : Natural;
75
76   begin
77      if Output_BOM then
78         Result (1 .. 2) :=
79           (if Output_Scheme = UTF_16BE then BOM_16BE else BOM_16LE);
80         Len := 2;
81      else
82         Len := 0;
83      end if;
84
85      --  Skip input BOM
86
87      Iptr := Item'First;
88
89      if Iptr <= Item'Last and then Item (Iptr) = BOM_16 (1) then
90         Iptr := Iptr + 1;
91      end if;
92
93      --  UTF-16BE case
94
95      if Output_Scheme = UTF_16BE then
96         while Iptr <= Item'Last loop
97            C := To_Unsigned_16 (Item (Iptr));
98            Result (Len + 1) := Character'Val (Shift_Right (C, 8));
99            Result (Len + 2) := Character'Val (C and 16#00_FF#);
100            Len := Len + 2;
101            Iptr := Iptr + 1;
102         end loop;
103
104      --  UTF-16LE case
105
106      else
107         while Iptr <= Item'Last loop
108            C := To_Unsigned_16 (Item (Iptr));
109            Result (Len + 1) := Character'Val (C and 16#00_FF#);
110            Result (Len + 2) := Character'Val (Shift_Right (C, 8));
111            Len := Len + 2;
112            Iptr := Iptr + 1;
113         end loop;
114      end if;
115
116      return Result (1 .. Len);
117   end From_UTF_16;
118
119   --------------------------
120   -- Raise_Encoding_Error --
121   --------------------------
122
123   procedure Raise_Encoding_Error (Index : Natural) is
124      Val : constant String := Index'Img;
125   begin
126      raise Encoding_Error with
127        "bad input at Item (" & Val (Val'First + 1 .. Val'Last) & ')';
128   end Raise_Encoding_Error;
129
130   ---------------
131   -- To_UTF_16 --
132   ---------------
133
134   function To_UTF_16
135     (Item         : UTF_String;
136      Input_Scheme : UTF_XE_Encoding;
137      Output_BOM   : Boolean := False) return UTF_16_Wide_String
138   is
139      Result : UTF_16_Wide_String (1 .. Item'Length / 2 + 1);
140      Len    : Natural;
141      Iptr   : Natural;
142
143   begin
144      if Item'Length mod 2 /= 0 then
145         raise Encoding_Error with "UTF-16BE/LE string has odd length";
146      end if;
147
148      --  Deal with input BOM, skip if OK, error if bad BOM
149
150      Iptr := Item'First;
151
152      if Item'Length >= 2 then
153         if Item (Iptr .. Iptr + 1) = BOM_16BE then
154            if Input_Scheme = UTF_16BE then
155               Iptr := Iptr + 2;
156            else
157               Raise_Encoding_Error (Iptr);
158            end if;
159
160         elsif Item (Iptr .. Iptr + 1) = BOM_16LE then
161            if Input_Scheme = UTF_16LE then
162               Iptr := Iptr + 2;
163            else
164               Raise_Encoding_Error (Iptr);
165            end if;
166
167         elsif Item'Length >= 3 and then Item (Iptr .. Iptr + 2) = BOM_8 then
168            Raise_Encoding_Error (Iptr);
169         end if;
170      end if;
171
172      --  Output BOM if specified
173
174      if Output_BOM then
175         Result (1) := BOM_16 (1);
176         Len := 1;
177      else
178         Len := 0;
179      end if;
180
181      --  UTF-16BE case
182
183      if Input_Scheme = UTF_16BE then
184         while Iptr < Item'Last loop
185            Len := Len + 1;
186            Result (Len) :=
187              Wide_Character'Val
188                (Character'Pos (Item (Iptr)) * 256 +
189                   Character'Pos (Item (Iptr + 1)));
190            Iptr := Iptr + 2;
191         end loop;
192
193      --  UTF-16LE case
194
195      else
196         while Iptr < Item'Last loop
197            Len := Len + 1;
198            Result (Len) :=
199              Wide_Character'Val
200                (Character'Pos (Item (Iptr)) +
201                 Character'Pos (Item (Iptr + 1)) * 256);
202            Iptr := Iptr + 2;
203         end loop;
204      end if;
205
206      return Result (1 .. Len);
207   end To_UTF_16;
208
209end Ada.Strings.UTF_Encoding;
210