1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT RUN-TIME COMPONENTS                         --
4--                                                                          --
5--                    G N A T . E N C O D E _ S T R I N G                   --
6--                                                                          --
7--                                 B o d y                                  --
8--                                                                          --
9--           Copyright (C) 2007-2012, Free Software Foundation, Inc.        --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
17--                                                                          --
18-- As a special exception under Section 7 of GPL version 3, you are granted --
19-- additional permissions described in the GCC Runtime Library Exception,   --
20-- version 3.1, as published by the Free Software Foundation.               --
21--                                                                          --
22-- You should have received a copy of the GNU General Public License and    --
23-- a copy of the GCC Runtime Library Exception along with this program;     --
24-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
25-- <http://www.gnu.org/licenses/>.                                          --
26--                                                                          --
27-- GNAT was originally developed  by the GNAT team at  New York University. --
28-- Extensive contributions were provided by Ada Core Technologies Inc.      --
29--                                                                          --
30------------------------------------------------------------------------------
31
32with Interfaces; use Interfaces;
33
34with System.WCh_Con; use System.WCh_Con;
35with System.WCh_Cnv; use System.WCh_Cnv;
36
37package body GNAT.Encode_String is
38
39   -----------------------
40   -- Local Subprograms --
41   -----------------------
42
43   procedure Bad;
44   pragma No_Return (Bad);
45   --  Raise error for bad character code
46
47   procedure Past_End;
48   pragma No_Return (Past_End);
49   --  Raise error for off end of string
50
51   ---------
52   -- Bad --
53   ---------
54
55   procedure Bad is
56   begin
57      raise Constraint_Error with
58        "character cannot be encoded with given Encoding_Method";
59   end Bad;
60
61   ------------------------
62   -- Encode_Wide_String --
63   ------------------------
64
65   function Encode_Wide_String (S : Wide_String) return String is
66      Long : constant Natural := WC_Longest_Sequences (Encoding_Method);
67      Result : String (1 .. S'Length * Long);
68      Length : Natural;
69   begin
70      Encode_Wide_String (S, Result, Length);
71      return Result (1 .. Length);
72   end Encode_Wide_String;
73
74   procedure Encode_Wide_String
75     (S      : Wide_String;
76      Result : out String;
77      Length : out Natural)
78   is
79      Ptr : Natural;
80
81   begin
82      Ptr := S'First;
83      for J in S'Range loop
84         Encode_Wide_Character (S (J), Result, Ptr);
85      end loop;
86
87      Length := Ptr - S'First;
88   end Encode_Wide_String;
89
90   -----------------------------
91   -- Encode_Wide_Wide_String --
92   -----------------------------
93
94   function Encode_Wide_Wide_String (S : Wide_Wide_String) return String is
95      Long : constant Natural := WC_Longest_Sequences (Encoding_Method);
96      Result : String (1 .. S'Length * Long);
97      Length : Natural;
98   begin
99      Encode_Wide_Wide_String (S, Result, Length);
100      return Result (1 .. Length);
101   end Encode_Wide_Wide_String;
102
103   procedure Encode_Wide_Wide_String
104     (S      : Wide_Wide_String;
105      Result : out String;
106      Length : out Natural)
107   is
108      Ptr : Natural;
109
110   begin
111      Ptr := S'First;
112      for J in S'Range loop
113         Encode_Wide_Wide_Character (S (J), Result, Ptr);
114      end loop;
115
116      Length := Ptr - S'First;
117   end Encode_Wide_Wide_String;
118
119   ---------------------------
120   -- Encode_Wide_Character --
121   ---------------------------
122
123   procedure Encode_Wide_Character
124     (Char   : Wide_Character;
125      Result : in out String;
126      Ptr    : in out Natural)
127   is
128   begin
129      Encode_Wide_Wide_Character
130        (Wide_Wide_Character'Val (Wide_Character'Pos (Char)), Result, Ptr);
131
132   exception
133      when Constraint_Error =>
134         Bad;
135   end Encode_Wide_Character;
136
137   --------------------------------
138   -- Encode_Wide_Wide_Character --
139   --------------------------------
140
141   procedure Encode_Wide_Wide_Character
142     (Char   : Wide_Wide_Character;
143      Result : in out String;
144      Ptr    : in out Natural)
145   is
146      U : Unsigned_32;
147
148      procedure Out_Char (C : Character);
149      pragma Inline (Out_Char);
150      --  Procedure to store one character for instantiation below
151
152      --------------
153      -- Out_Char --
154      --------------
155
156      procedure Out_Char (C : Character) is
157      begin
158         if Ptr > Result'Last then
159            Past_End;
160         else
161            Result (Ptr) := C;
162            Ptr := Ptr + 1;
163         end if;
164      end Out_Char;
165
166   --  Start of processing for Encode_Wide_Wide_Character;
167
168   begin
169      --  Efficient code for UTF-8 case
170
171      if Encoding_Method = WCEM_UTF8 then
172
173         --  Note: for details of UTF8 encoding see RFC 3629
174
175         U := Unsigned_32 (Wide_Wide_Character'Pos (Char));
176
177         --  16#00_0000#-16#00_007F#: 0xxxxxxx
178
179         if U <= 16#00_007F# then
180            Out_Char (Character'Val (U));
181
182         --  16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
183
184         elsif U <= 16#00_07FF# then
185            Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6)));
186            Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
187
188         --  16#00_0800#-16#00_FFFF#: 1110xxxx 10xxxxxx 10xxxxxx
189
190         elsif U <= 16#00_FFFF# then
191            Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12)));
192            Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
193                                                          and 2#00111111#)));
194            Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
195
196         --  16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
197
198         elsif U <= 16#10_FFFF# then
199            Out_Char (Character'Val (2#11110000# or Shift_Right (U, 18)));
200            Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
201                                                          and 2#00111111#)));
202            Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
203                                                       and 2#00111111#)));
204            Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
205
206         --  16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
207         --                               10xxxxxx 10xxxxxx
208
209         elsif U <= 16#03FF_FFFF# then
210            Out_Char (Character'Val (2#11111000# or Shift_Right (U, 24)));
211            Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18)
212                                                       and 2#00111111#)));
213            Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
214                                                       and 2#00111111#)));
215            Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
216                                                       and 2#00111111#)));
217            Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));
218
219         --  All other cases are invalid character codes, not this includes:
220
221         --  16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
222         --                               10xxxxxx 10xxxxxx 10xxxxxx
223
224         --  since Wide_Wide_Character values cannot exceed 16#3F_FFFF#
225
226         else
227            Bad;
228         end if;
229
230      --  All encoding methods other than UTF-8
231
232      else
233         Non_UTF8 : declare
234            procedure UTF_32_To_String is
235              new UTF_32_To_Char_Sequence (Out_Char);
236            --  Instantiate conversion procedure with above Out_Char routine
237
238         begin
239            UTF_32_To_String
240              (UTF_32_Code (Wide_Wide_Character'Pos (Char)), Encoding_Method);
241
242         exception
243            when Constraint_Error =>
244               Bad;
245         end Non_UTF8;
246      end if;
247   end Encode_Wide_Wide_Character;
248
249   --------------
250   -- Past_End --
251   --------------
252
253   procedure Past_End is
254   begin
255      raise Constraint_Error with "past end of string";
256   end Past_End;
257
258end GNAT.Encode_String;
259