1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT RUN-TIME COMPONENTS                         --
4--                                                                          --
5--              A D A . C H A R A C T E R S . H A N D L I N G               --
6--                                                                          --
7--                                 B o d y                                  --
8--                                                                          --
9--          Copyright (C) 1992-2012, Free Software Foundation, Inc.         --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
17--                                                                          --
18-- As a special exception under Section 7 of GPL version 3, you are granted --
19-- additional permissions described in the GCC Runtime Library Exception,   --
20-- version 3.1, as published by the Free Software Foundation.               --
21--                                                                          --
22-- You should have received a copy of the GNU General Public License and    --
23-- a copy of the GCC Runtime Library Exception along with this program;     --
24-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
25-- <http://www.gnu.org/licenses/>.                                          --
26--                                                                          --
27-- GNAT was originally developed  by the GNAT team at  New York University. --
28-- Extensive contributions were provided by Ada Core Technologies Inc.      --
29--                                                                          --
30------------------------------------------------------------------------------
31
32with Ada.Characters.Latin_1;     use Ada.Characters.Latin_1;
33with Ada.Strings.Maps;           use Ada.Strings.Maps;
34with Ada.Strings.Maps.Constants; use Ada.Strings.Maps.Constants;
35
36package body Ada.Characters.Handling is
37
38   ------------------------------------
39   -- Character Classification Table --
40   ------------------------------------
41
42   type Character_Flags is mod 256;
43   for Character_Flags'Size use 8;
44
45   Control    : constant Character_Flags := 1;
46   Lower      : constant Character_Flags := 2;
47   Upper      : constant Character_Flags := 4;
48   Basic      : constant Character_Flags := 8;
49   Hex_Digit  : constant Character_Flags := 16;
50   Digit      : constant Character_Flags := 32;
51   Special    : constant Character_Flags := 64;
52
53   Letter     : constant Character_Flags := Lower or Upper;
54   Alphanum   : constant Character_Flags := Letter or Digit;
55   Graphic    : constant Character_Flags := Alphanum or Special;
56
57   Char_Map : constant array (Character) of Character_Flags :=
58   (
59     NUL                         => Control,
60     SOH                         => Control,
61     STX                         => Control,
62     ETX                         => Control,
63     EOT                         => Control,
64     ENQ                         => Control,
65     ACK                         => Control,
66     BEL                         => Control,
67     BS                          => Control,
68     HT                          => Control,
69     LF                          => Control,
70     VT                          => Control,
71     FF                          => Control,
72     CR                          => Control,
73     SO                          => Control,
74     SI                          => Control,
75
76     DLE                         => Control,
77     DC1                         => Control,
78     DC2                         => Control,
79     DC3                         => Control,
80     DC4                         => Control,
81     NAK                         => Control,
82     SYN                         => Control,
83     ETB                         => Control,
84     CAN                         => Control,
85     EM                          => Control,
86     SUB                         => Control,
87     ESC                         => Control,
88     FS                          => Control,
89     GS                          => Control,
90     RS                          => Control,
91     US                          => Control,
92
93     Space                       => Special,
94     Exclamation                 => Special,
95     Quotation                   => Special,
96     Number_Sign                 => Special,
97     Dollar_Sign                 => Special,
98     Percent_Sign                => Special,
99     Ampersand                   => Special,
100     Apostrophe                  => Special,
101     Left_Parenthesis            => Special,
102     Right_Parenthesis           => Special,
103     Asterisk                    => Special,
104     Plus_Sign                   => Special,
105     Comma                       => Special,
106     Hyphen                      => Special,
107     Full_Stop                   => Special,
108     Solidus                     => Special,
109
110     '0' .. '9'                  => Digit + Hex_Digit,
111
112     Colon                       => Special,
113     Semicolon                   => Special,
114     Less_Than_Sign              => Special,
115     Equals_Sign                 => Special,
116     Greater_Than_Sign           => Special,
117     Question                    => Special,
118     Commercial_At               => Special,
119
120     'A' .. 'F'                  => Upper + Basic + Hex_Digit,
121     'G' .. 'Z'                  => Upper + Basic,
122
123     Left_Square_Bracket         => Special,
124     Reverse_Solidus             => Special,
125     Right_Square_Bracket        => Special,
126     Circumflex                  => Special,
127     Low_Line                    => Special,
128     Grave                       => Special,
129
130     'a' .. 'f'                  => Lower + Basic + Hex_Digit,
131     'g' .. 'z'                  => Lower + Basic,
132
133     Left_Curly_Bracket          => Special,
134     Vertical_Line               => Special,
135     Right_Curly_Bracket         => Special,
136     Tilde                       => Special,
137
138     DEL                         => Control,
139     Reserved_128                => Control,
140     Reserved_129                => Control,
141     BPH                         => Control,
142     NBH                         => Control,
143     Reserved_132                => Control,
144     NEL                         => Control,
145     SSA                         => Control,
146     ESA                         => Control,
147     HTS                         => Control,
148     HTJ                         => Control,
149     VTS                         => Control,
150     PLD                         => Control,
151     PLU                         => Control,
152     RI                          => Control,
153     SS2                         => Control,
154     SS3                         => Control,
155
156     DCS                         => Control,
157     PU1                         => Control,
158     PU2                         => Control,
159     STS                         => Control,
160     CCH                         => Control,
161     MW                          => Control,
162     SPA                         => Control,
163     EPA                         => Control,
164
165     SOS                         => Control,
166     Reserved_153                => Control,
167     SCI                         => Control,
168     CSI                         => Control,
169     ST                          => Control,
170     OSC                         => Control,
171     PM                          => Control,
172     APC                         => Control,
173
174     No_Break_Space              => Special,
175     Inverted_Exclamation        => Special,
176     Cent_Sign                   => Special,
177     Pound_Sign                  => Special,
178     Currency_Sign               => Special,
179     Yen_Sign                    => Special,
180     Broken_Bar                  => Special,
181     Section_Sign                => Special,
182     Diaeresis                   => Special,
183     Copyright_Sign              => Special,
184     Feminine_Ordinal_Indicator  => Special,
185     Left_Angle_Quotation        => Special,
186     Not_Sign                    => Special,
187     Soft_Hyphen                 => Special,
188     Registered_Trade_Mark_Sign  => Special,
189     Macron                      => Special,
190     Degree_Sign                 => Special,
191     Plus_Minus_Sign             => Special,
192     Superscript_Two             => Special,
193     Superscript_Three           => Special,
194     Acute                       => Special,
195     Micro_Sign                  => Special,
196     Pilcrow_Sign                => Special,
197     Middle_Dot                  => Special,
198     Cedilla                     => Special,
199     Superscript_One             => Special,
200     Masculine_Ordinal_Indicator => Special,
201     Right_Angle_Quotation       => Special,
202     Fraction_One_Quarter        => Special,
203     Fraction_One_Half           => Special,
204     Fraction_Three_Quarters     => Special,
205     Inverted_Question           => Special,
206
207     UC_A_Grave                  => Upper,
208     UC_A_Acute                  => Upper,
209     UC_A_Circumflex             => Upper,
210     UC_A_Tilde                  => Upper,
211     UC_A_Diaeresis              => Upper,
212     UC_A_Ring                   => Upper,
213     UC_AE_Diphthong             => Upper + Basic,
214     UC_C_Cedilla                => Upper,
215     UC_E_Grave                  => Upper,
216     UC_E_Acute                  => Upper,
217     UC_E_Circumflex             => Upper,
218     UC_E_Diaeresis              => Upper,
219     UC_I_Grave                  => Upper,
220     UC_I_Acute                  => Upper,
221     UC_I_Circumflex             => Upper,
222     UC_I_Diaeresis              => Upper,
223     UC_Icelandic_Eth            => Upper + Basic,
224     UC_N_Tilde                  => Upper,
225     UC_O_Grave                  => Upper,
226     UC_O_Acute                  => Upper,
227     UC_O_Circumflex             => Upper,
228     UC_O_Tilde                  => Upper,
229     UC_O_Diaeresis              => Upper,
230
231     Multiplication_Sign         => Special,
232
233     UC_O_Oblique_Stroke         => Upper,
234     UC_U_Grave                  => Upper,
235     UC_U_Acute                  => Upper,
236     UC_U_Circumflex             => Upper,
237     UC_U_Diaeresis              => Upper,
238     UC_Y_Acute                  => Upper,
239     UC_Icelandic_Thorn          => Upper + Basic,
240
241     LC_German_Sharp_S           => Lower + Basic,
242     LC_A_Grave                  => Lower,
243     LC_A_Acute                  => Lower,
244     LC_A_Circumflex             => Lower,
245     LC_A_Tilde                  => Lower,
246     LC_A_Diaeresis              => Lower,
247     LC_A_Ring                   => Lower,
248     LC_AE_Diphthong             => Lower + Basic,
249     LC_C_Cedilla                => Lower,
250     LC_E_Grave                  => Lower,
251     LC_E_Acute                  => Lower,
252     LC_E_Circumflex             => Lower,
253     LC_E_Diaeresis              => Lower,
254     LC_I_Grave                  => Lower,
255     LC_I_Acute                  => Lower,
256     LC_I_Circumflex             => Lower,
257     LC_I_Diaeresis              => Lower,
258     LC_Icelandic_Eth            => Lower + Basic,
259     LC_N_Tilde                  => Lower,
260     LC_O_Grave                  => Lower,
261     LC_O_Acute                  => Lower,
262     LC_O_Circumflex             => Lower,
263     LC_O_Tilde                  => Lower,
264     LC_O_Diaeresis              => Lower,
265
266     Division_Sign               => Special,
267
268     LC_O_Oblique_Stroke         => Lower,
269     LC_U_Grave                  => Lower,
270     LC_U_Acute                  => Lower,
271     LC_U_Circumflex             => Lower,
272     LC_U_Diaeresis              => Lower,
273     LC_Y_Acute                  => Lower,
274     LC_Icelandic_Thorn          => Lower + Basic,
275     LC_Y_Diaeresis              => Lower
276   );
277
278   ---------------------
279   -- Is_Alphanumeric --
280   ---------------------
281
282   function Is_Alphanumeric (Item : Character) return Boolean is
283   begin
284      return (Char_Map (Item) and Alphanum) /= 0;
285   end Is_Alphanumeric;
286
287   --------------
288   -- Is_Basic --
289   --------------
290
291   function Is_Basic (Item : Character) return Boolean is
292   begin
293      return (Char_Map (Item) and Basic) /= 0;
294   end Is_Basic;
295
296   ------------------
297   -- Is_Character --
298   ------------------
299
300   function Is_Character (Item : Wide_Character) return Boolean is
301   begin
302      return Wide_Character'Pos (Item) < 256;
303   end Is_Character;
304
305   ----------------
306   -- Is_Control --
307   ----------------
308
309   function Is_Control (Item : Character) return Boolean is
310   begin
311      return (Char_Map (Item) and Control) /= 0;
312   end Is_Control;
313
314   --------------
315   -- Is_Digit --
316   --------------
317
318   function Is_Digit (Item : Character) return Boolean is
319   begin
320      return Item in '0' .. '9';
321   end Is_Digit;
322
323   ----------------
324   -- Is_Graphic --
325   ----------------
326
327   function Is_Graphic (Item : Character) return Boolean is
328   begin
329      return (Char_Map (Item) and Graphic) /= 0;
330   end Is_Graphic;
331
332   --------------------------
333   -- Is_Hexadecimal_Digit --
334   --------------------------
335
336   function Is_Hexadecimal_Digit (Item : Character) return Boolean is
337   begin
338      return (Char_Map (Item) and Hex_Digit) /= 0;
339   end Is_Hexadecimal_Digit;
340
341   ----------------
342   -- Is_ISO_646 --
343   ----------------
344
345   function Is_ISO_646 (Item : Character) return Boolean is
346   begin
347      return Item in ISO_646;
348   end Is_ISO_646;
349
350   --  Note: much more efficient coding of the following function is possible
351   --  by testing several 16#80# bits in a complete word in a single operation
352
353   function Is_ISO_646 (Item : String) return Boolean is
354   begin
355      for J in Item'Range loop
356         if Item (J) not in ISO_646 then
357            return False;
358         end if;
359      end loop;
360
361      return True;
362   end Is_ISO_646;
363
364   ---------------
365   -- Is_Letter --
366   ---------------
367
368   function Is_Letter (Item : Character) return Boolean is
369   begin
370      return (Char_Map (Item) and Letter) /= 0;
371   end Is_Letter;
372
373   --------------
374   -- Is_Lower --
375   --------------
376
377   function Is_Lower (Item : Character) return Boolean is
378   begin
379      return (Char_Map (Item) and Lower) /= 0;
380   end Is_Lower;
381
382   ----------------
383   -- Is_Special --
384   ----------------
385
386   function Is_Special (Item : Character) return Boolean is
387   begin
388      return (Char_Map (Item) and Special) /= 0;
389   end Is_Special;
390
391   ---------------
392   -- Is_String --
393   ---------------
394
395   function Is_String (Item : Wide_String) return Boolean is
396   begin
397      for J in Item'Range loop
398         if Wide_Character'Pos (Item (J)) >= 256 then
399            return False;
400         end if;
401      end loop;
402
403      return True;
404   end Is_String;
405
406   --------------
407   -- Is_Upper --
408   --------------
409
410   function Is_Upper (Item : Character) return Boolean is
411   begin
412      return (Char_Map (Item) and Upper) /= 0;
413   end Is_Upper;
414
415   --------------
416   -- To_Basic --
417   --------------
418
419   function To_Basic (Item : Character) return Character is
420   begin
421      return Value (Basic_Map, Item);
422   end To_Basic;
423
424   function To_Basic (Item : String) return String is
425   begin
426      return Result : String (1 .. Item'Length) do
427         for J in Item'Range loop
428            Result (J - (Item'First - 1)) := Value (Basic_Map, Item (J));
429         end loop;
430      end return;
431   end To_Basic;
432
433   ------------------
434   -- To_Character --
435   ------------------
436
437   function To_Character
438     (Item       : Wide_Character;
439      Substitute : Character := ' ') return Character
440   is
441   begin
442      if Is_Character (Item) then
443         return Character'Val (Wide_Character'Pos (Item));
444      else
445         return Substitute;
446      end if;
447   end To_Character;
448
449   ----------------
450   -- To_ISO_646 --
451   ----------------
452
453   function To_ISO_646
454     (Item       : Character;
455      Substitute : ISO_646 := ' ') return ISO_646
456   is
457   begin
458      return (if Item in ISO_646 then Item else Substitute);
459   end To_ISO_646;
460
461   function To_ISO_646
462     (Item       : String;
463      Substitute : ISO_646 := ' ') return String
464   is
465      Result : String (1 .. Item'Length);
466
467   begin
468      for J in Item'Range loop
469         Result (J - (Item'First - 1)) :=
470           (if Item (J) in ISO_646 then Item (J) else Substitute);
471      end loop;
472
473      return Result;
474   end To_ISO_646;
475
476   --------------
477   -- To_Lower --
478   --------------
479
480   function To_Lower (Item : Character) return Character is
481   begin
482      return Value (Lower_Case_Map, Item);
483   end To_Lower;
484
485   function To_Lower (Item : String) return String is
486   begin
487      return Result : String (1 .. Item'Length) do
488         for J in Item'Range loop
489            Result (J - (Item'First - 1)) := Value (Lower_Case_Map, Item (J));
490         end loop;
491      end return;
492   end To_Lower;
493
494   ---------------
495   -- To_String --
496   ---------------
497
498   function To_String
499     (Item       : Wide_String;
500      Substitute : Character := ' ') return String
501   is
502      Result : String (1 .. Item'Length);
503
504   begin
505      for J in Item'Range loop
506         Result (J - (Item'First - 1)) := To_Character (Item (J), Substitute);
507      end loop;
508
509      return Result;
510   end To_String;
511
512   --------------
513   -- To_Upper --
514   --------------
515
516   function To_Upper
517     (Item : Character) return Character
518   is
519   begin
520      return Value (Upper_Case_Map, Item);
521   end To_Upper;
522
523   function To_Upper
524     (Item : String) return String
525   is
526   begin
527      return Result : String (1 .. Item'Length) do
528         for J in Item'Range loop
529            Result (J - (Item'First - 1)) := Value (Upper_Case_Map, Item (J));
530         end loop;
531      end return;
532   end To_Upper;
533
534   -----------------------
535   -- To_Wide_Character --
536   -----------------------
537
538   function To_Wide_Character
539     (Item : Character) return Wide_Character
540   is
541   begin
542      return Wide_Character'Val (Character'Pos (Item));
543   end To_Wide_Character;
544
545   --------------------
546   -- To_Wide_String --
547   --------------------
548
549   function To_Wide_String
550     (Item : String) return Wide_String
551   is
552      Result : Wide_String (1 .. Item'Length);
553
554   begin
555      for J in Item'Range loop
556         Result (J - (Item'First - 1)) := To_Wide_Character (Item (J));
557      end loop;
558
559      return Result;
560   end To_Wide_String;
561
562end Ada.Characters.Handling;
563