1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT RUN-TIME COMPONENTS                         --
4--                                                                          --
5--              A D A . C H A R A C T E R S . H A N D L I N G               --
6--                                                                          --
7--                                 B o d y                                  --
8--                                                                          --
9--          Copyright (C) 1992-2013, Free Software Foundation, Inc.         --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
17--                                                                          --
18-- As a special exception under Section 7 of GPL version 3, you are granted --
19-- additional permissions described in the GCC Runtime Library Exception,   --
20-- version 3.1, as published by the Free Software Foundation.               --
21--                                                                          --
22-- You should have received a copy of the GNU General Public License and    --
23-- a copy of the GCC Runtime Library Exception along with this program;     --
24-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
25-- <http://www.gnu.org/licenses/>.                                          --
26--                                                                          --
27-- GNAT was originally developed  by the GNAT team at  New York University. --
28-- Extensive contributions were provided by Ada Core Technologies Inc.      --
29--                                                                          --
30------------------------------------------------------------------------------
31
32with Ada.Characters.Latin_1;     use Ada.Characters.Latin_1;
33with Ada.Strings.Maps;           use Ada.Strings.Maps;
34with Ada.Strings.Maps.Constants; use Ada.Strings.Maps.Constants;
35
36package body Ada.Characters.Handling is
37
38   ------------------------------------
39   -- Character Classification Table --
40   ------------------------------------
41
42   type Character_Flags is mod 256;
43   for Character_Flags'Size use 8;
44
45   Control    : constant Character_Flags := 1;
46   Lower      : constant Character_Flags := 2;
47   Upper      : constant Character_Flags := 4;
48   Basic      : constant Character_Flags := 8;
49   Hex_Digit  : constant Character_Flags := 16;
50   Digit      : constant Character_Flags := 32;
51   Special    : constant Character_Flags := 64;
52   Line_Term  : constant Character_Flags := 128;
53
54   Letter     : constant Character_Flags := Lower or Upper;
55   Alphanum   : constant Character_Flags := Letter or Digit;
56   Graphic    : constant Character_Flags := Alphanum or Special;
57
58   Char_Map : constant array (Character) of Character_Flags :=
59   (
60     NUL                         => Control,
61     SOH                         => Control,
62     STX                         => Control,
63     ETX                         => Control,
64     EOT                         => Control,
65     ENQ                         => Control,
66     ACK                         => Control,
67     BEL                         => Control,
68     BS                          => Control,
69     HT                          => Control,
70     LF                          => Control + Line_Term,
71     VT                          => Control + Line_Term,
72     FF                          => Control + Line_Term,
73     CR                          => Control + Line_Term,
74     SO                          => Control,
75     SI                          => Control,
76
77     DLE                         => Control,
78     DC1                         => Control,
79     DC2                         => Control,
80     DC3                         => Control,
81     DC4                         => Control,
82     NAK                         => Control,
83     SYN                         => Control,
84     ETB                         => Control,
85     CAN                         => Control,
86     EM                          => Control,
87     SUB                         => Control,
88     ESC                         => Control,
89     FS                          => Control,
90     GS                          => Control,
91     RS                          => Control,
92     US                          => Control,
93
94     Space                       => Special,
95     Exclamation                 => Special,
96     Quotation                   => Special,
97     Number_Sign                 => Special,
98     Dollar_Sign                 => Special,
99     Percent_Sign                => Special,
100     Ampersand                   => Special,
101     Apostrophe                  => Special,
102     Left_Parenthesis            => Special,
103     Right_Parenthesis           => Special,
104     Asterisk                    => Special,
105     Plus_Sign                   => Special,
106     Comma                       => Special,
107     Hyphen                      => Special,
108     Full_Stop                   => Special,
109     Solidus                     => Special,
110
111     '0' .. '9'                  => Digit + Hex_Digit,
112
113     Colon                       => Special,
114     Semicolon                   => Special,
115     Less_Than_Sign              => Special,
116     Equals_Sign                 => Special,
117     Greater_Than_Sign           => Special,
118     Question                    => Special,
119     Commercial_At               => Special,
120
121     'A' .. 'F'                  => Upper + Basic + Hex_Digit,
122     'G' .. 'Z'                  => Upper + Basic,
123
124     Left_Square_Bracket         => Special,
125     Reverse_Solidus             => Special,
126     Right_Square_Bracket        => Special,
127     Circumflex                  => Special,
128     Low_Line                    => Special,
129     Grave                       => Special,
130
131     'a' .. 'f'                  => Lower + Basic + Hex_Digit,
132     'g' .. 'z'                  => Lower + Basic,
133
134     Left_Curly_Bracket          => Special,
135     Vertical_Line               => Special,
136     Right_Curly_Bracket         => Special,
137     Tilde                       => Special,
138
139     DEL                         => Control,
140     Reserved_128                => Control,
141     Reserved_129                => Control,
142     BPH                         => Control,
143     NBH                         => Control,
144     Reserved_132                => Control,
145     NEL                         => Control + Line_Term,
146     SSA                         => Control,
147     ESA                         => Control,
148     HTS                         => Control,
149     HTJ                         => Control,
150     VTS                         => Control,
151     PLD                         => Control,
152     PLU                         => Control,
153     RI                          => Control,
154     SS2                         => Control,
155     SS3                         => Control,
156
157     DCS                         => Control,
158     PU1                         => Control,
159     PU2                         => Control,
160     STS                         => Control,
161     CCH                         => Control,
162     MW                          => Control,
163     SPA                         => Control,
164     EPA                         => Control,
165
166     SOS                         => Control,
167     Reserved_153                => Control,
168     SCI                         => Control,
169     CSI                         => Control,
170     ST                          => Control,
171     OSC                         => Control,
172     PM                          => Control,
173     APC                         => Control,
174
175     No_Break_Space              => Special,
176     Inverted_Exclamation        => Special,
177     Cent_Sign                   => Special,
178     Pound_Sign                  => Special,
179     Currency_Sign               => Special,
180     Yen_Sign                    => Special,
181     Broken_Bar                  => Special,
182     Section_Sign                => Special,
183     Diaeresis                   => Special,
184     Copyright_Sign              => Special,
185     Feminine_Ordinal_Indicator  => Special,
186     Left_Angle_Quotation        => Special,
187     Not_Sign                    => Special,
188     Soft_Hyphen                 => Special,
189     Registered_Trade_Mark_Sign  => Special,
190     Macron                      => Special,
191     Degree_Sign                 => Special,
192     Plus_Minus_Sign             => Special,
193     Superscript_Two             => Special,
194     Superscript_Three           => Special,
195     Acute                       => Special,
196     Micro_Sign                  => Special,
197     Pilcrow_Sign                => Special,
198     Middle_Dot                  => Special,
199     Cedilla                     => Special,
200     Superscript_One             => Special,
201     Masculine_Ordinal_Indicator => Special,
202     Right_Angle_Quotation       => Special,
203     Fraction_One_Quarter        => Special,
204     Fraction_One_Half           => Special,
205     Fraction_Three_Quarters     => Special,
206     Inverted_Question           => Special,
207
208     UC_A_Grave                  => Upper,
209     UC_A_Acute                  => Upper,
210     UC_A_Circumflex             => Upper,
211     UC_A_Tilde                  => Upper,
212     UC_A_Diaeresis              => Upper,
213     UC_A_Ring                   => Upper,
214     UC_AE_Diphthong             => Upper + Basic,
215     UC_C_Cedilla                => Upper,
216     UC_E_Grave                  => Upper,
217     UC_E_Acute                  => Upper,
218     UC_E_Circumflex             => Upper,
219     UC_E_Diaeresis              => Upper,
220     UC_I_Grave                  => Upper,
221     UC_I_Acute                  => Upper,
222     UC_I_Circumflex             => Upper,
223     UC_I_Diaeresis              => Upper,
224     UC_Icelandic_Eth            => Upper + Basic,
225     UC_N_Tilde                  => Upper,
226     UC_O_Grave                  => Upper,
227     UC_O_Acute                  => Upper,
228     UC_O_Circumflex             => Upper,
229     UC_O_Tilde                  => Upper,
230     UC_O_Diaeresis              => Upper,
231
232     Multiplication_Sign         => Special,
233
234     UC_O_Oblique_Stroke         => Upper,
235     UC_U_Grave                  => Upper,
236     UC_U_Acute                  => Upper,
237     UC_U_Circumflex             => Upper,
238     UC_U_Diaeresis              => Upper,
239     UC_Y_Acute                  => Upper,
240     UC_Icelandic_Thorn          => Upper + Basic,
241
242     LC_German_Sharp_S           => Lower + Basic,
243     LC_A_Grave                  => Lower,
244     LC_A_Acute                  => Lower,
245     LC_A_Circumflex             => Lower,
246     LC_A_Tilde                  => Lower,
247     LC_A_Diaeresis              => Lower,
248     LC_A_Ring                   => Lower,
249     LC_AE_Diphthong             => Lower + Basic,
250     LC_C_Cedilla                => Lower,
251     LC_E_Grave                  => Lower,
252     LC_E_Acute                  => Lower,
253     LC_E_Circumflex             => Lower,
254     LC_E_Diaeresis              => Lower,
255     LC_I_Grave                  => Lower,
256     LC_I_Acute                  => Lower,
257     LC_I_Circumflex             => Lower,
258     LC_I_Diaeresis              => Lower,
259     LC_Icelandic_Eth            => Lower + Basic,
260     LC_N_Tilde                  => Lower,
261     LC_O_Grave                  => Lower,
262     LC_O_Acute                  => Lower,
263     LC_O_Circumflex             => Lower,
264     LC_O_Tilde                  => Lower,
265     LC_O_Diaeresis              => Lower,
266
267     Division_Sign               => Special,
268
269     LC_O_Oblique_Stroke         => Lower,
270     LC_U_Grave                  => Lower,
271     LC_U_Acute                  => Lower,
272     LC_U_Circumflex             => Lower,
273     LC_U_Diaeresis              => Lower,
274     LC_Y_Acute                  => Lower,
275     LC_Icelandic_Thorn          => Lower + Basic,
276     LC_Y_Diaeresis              => Lower
277   );
278
279   ---------------------
280   -- Is_Alphanumeric --
281   ---------------------
282
283   function Is_Alphanumeric (Item : Character) return Boolean is
284   begin
285      return (Char_Map (Item) and Alphanum) /= 0;
286   end Is_Alphanumeric;
287
288   --------------
289   -- Is_Basic --
290   --------------
291
292   function Is_Basic (Item : Character) return Boolean is
293   begin
294      return (Char_Map (Item) and Basic) /= 0;
295   end Is_Basic;
296
297   ------------------
298   -- Is_Character --
299   ------------------
300
301   function Is_Character (Item : Wide_Character) return Boolean is
302   begin
303      return Wide_Character'Pos (Item) < 256;
304   end Is_Character;
305
306   ----------------
307   -- Is_Control --
308   ----------------
309
310   function Is_Control (Item : Character) return Boolean is
311   begin
312      return (Char_Map (Item) and Control) /= 0;
313   end Is_Control;
314
315   --------------
316   -- Is_Digit --
317   --------------
318
319   function Is_Digit (Item : Character) return Boolean is
320   begin
321      return Item in '0' .. '9';
322   end Is_Digit;
323
324   ----------------
325   -- Is_Graphic --
326   ----------------
327
328   function Is_Graphic (Item : Character) return Boolean is
329   begin
330      return (Char_Map (Item) and Graphic) /= 0;
331   end Is_Graphic;
332
333   --------------------------
334   -- Is_Hexadecimal_Digit --
335   --------------------------
336
337   function Is_Hexadecimal_Digit (Item : Character) return Boolean is
338   begin
339      return (Char_Map (Item) and Hex_Digit) /= 0;
340   end Is_Hexadecimal_Digit;
341
342   ----------------
343   -- Is_ISO_646 --
344   ----------------
345
346   function Is_ISO_646 (Item : Character) return Boolean is
347   begin
348      return Item in ISO_646;
349   end Is_ISO_646;
350
351   --  Note: much more efficient coding of the following function is possible
352   --  by testing several 16#80# bits in a complete word in a single operation
353
354   function Is_ISO_646 (Item : String) return Boolean is
355   begin
356      for J in Item'Range loop
357         if Item (J) not in ISO_646 then
358            return False;
359         end if;
360      end loop;
361
362      return True;
363   end Is_ISO_646;
364
365   ---------------
366   -- Is_Letter --
367   ---------------
368
369   function Is_Letter (Item : Character) return Boolean is
370   begin
371      return (Char_Map (Item) and Letter) /= 0;
372   end Is_Letter;
373
374   ------------------------
375   -- Is_Line_Terminator --
376   ------------------------
377
378   function Is_Line_Terminator (Item : Character) return Boolean is
379   begin
380      return (Char_Map (Item) and Line_Term) /= 0;
381   end Is_Line_Terminator;
382
383   --------------
384   -- Is_Lower --
385   --------------
386
387   function Is_Lower (Item : Character) return Boolean is
388   begin
389      return (Char_Map (Item) and Lower) /= 0;
390   end Is_Lower;
391
392   -------------
393   -- Is_Mark --
394   -------------
395
396   function Is_Mark (Item : Character) return Boolean is
397      pragma Unreferenced (Item);
398   begin
399      return False;
400   end Is_Mark;
401
402   ---------------------
403   -- Is_Other_Format --
404   ---------------------
405
406   function Is_Other_Format (Item : Character) return Boolean is
407   begin
408      return Item = Soft_Hyphen;
409   end Is_Other_Format;
410
411   ------------------------------
412   -- Is_Punctuation_Connector --
413   ------------------------------
414
415   function Is_Punctuation_Connector (Item : Character) return Boolean is
416   begin
417      return Item = '_';
418   end Is_Punctuation_Connector;
419
420   --------------
421   -- Is_Space --
422   --------------
423
424   function Is_Space (Item : Character) return Boolean is
425   begin
426      return Item = ' ' or else Item = No_Break_Space;
427   end Is_Space;
428
429   ----------------
430   -- Is_Special --
431   ----------------
432
433   function Is_Special (Item : Character) return Boolean is
434   begin
435      return (Char_Map (Item) and Special) /= 0;
436   end Is_Special;
437
438   ---------------
439   -- Is_String --
440   ---------------
441
442   function Is_String (Item : Wide_String) return Boolean is
443   begin
444      for J in Item'Range loop
445         if Wide_Character'Pos (Item (J)) >= 256 then
446            return False;
447         end if;
448      end loop;
449
450      return True;
451   end Is_String;
452
453   --------------
454   -- Is_Upper --
455   --------------
456
457   function Is_Upper (Item : Character) return Boolean is
458   begin
459      return (Char_Map (Item) and Upper) /= 0;
460   end Is_Upper;
461
462   --------------
463   -- To_Basic --
464   --------------
465
466   function To_Basic (Item : Character) return Character is
467   begin
468      return Value (Basic_Map, Item);
469   end To_Basic;
470
471   function To_Basic (Item : String) return String is
472   begin
473      return Result : String (1 .. Item'Length) do
474         for J in Item'Range loop
475            Result (J - (Item'First - 1)) := Value (Basic_Map, Item (J));
476         end loop;
477      end return;
478   end To_Basic;
479
480   ------------------
481   -- To_Character --
482   ------------------
483
484   function To_Character
485     (Item       : Wide_Character;
486      Substitute : Character := ' ') return Character
487   is
488   begin
489      if Is_Character (Item) then
490         return Character'Val (Wide_Character'Pos (Item));
491      else
492         return Substitute;
493      end if;
494   end To_Character;
495
496   ----------------
497   -- To_ISO_646 --
498   ----------------
499
500   function To_ISO_646
501     (Item       : Character;
502      Substitute : ISO_646 := ' ') return ISO_646
503   is
504   begin
505      return (if Item in ISO_646 then Item else Substitute);
506   end To_ISO_646;
507
508   function To_ISO_646
509     (Item       : String;
510      Substitute : ISO_646 := ' ') return String
511   is
512      Result : String (1 .. Item'Length);
513
514   begin
515      for J in Item'Range loop
516         Result (J - (Item'First - 1)) :=
517           (if Item (J) in ISO_646 then Item (J) else Substitute);
518      end loop;
519
520      return Result;
521   end To_ISO_646;
522
523   --------------
524   -- To_Lower --
525   --------------
526
527   function To_Lower (Item : Character) return Character is
528   begin
529      return Value (Lower_Case_Map, Item);
530   end To_Lower;
531
532   function To_Lower (Item : String) return String is
533   begin
534      return Result : String (1 .. Item'Length) do
535         for J in Item'Range loop
536            Result (J - (Item'First - 1)) := Value (Lower_Case_Map, Item (J));
537         end loop;
538      end return;
539   end To_Lower;
540
541   ---------------
542   -- To_String --
543   ---------------
544
545   function To_String
546     (Item       : Wide_String;
547      Substitute : Character := ' ') return String
548   is
549      Result : String (1 .. Item'Length);
550
551   begin
552      for J in Item'Range loop
553         Result (J - (Item'First - 1)) := To_Character (Item (J), Substitute);
554      end loop;
555
556      return Result;
557   end To_String;
558
559   --------------
560   -- To_Upper --
561   --------------
562
563   function To_Upper
564     (Item : Character) return Character
565   is
566   begin
567      return Value (Upper_Case_Map, Item);
568   end To_Upper;
569
570   function To_Upper
571     (Item : String) return String
572   is
573   begin
574      return Result : String (1 .. Item'Length) do
575         for J in Item'Range loop
576            Result (J - (Item'First - 1)) := Value (Upper_Case_Map, Item (J));
577         end loop;
578      end return;
579   end To_Upper;
580
581   -----------------------
582   -- To_Wide_Character --
583   -----------------------
584
585   function To_Wide_Character
586     (Item : Character) return Wide_Character
587   is
588   begin
589      return Wide_Character'Val (Character'Pos (Item));
590   end To_Wide_Character;
591
592   --------------------
593   -- To_Wide_String --
594   --------------------
595
596   function To_Wide_String
597     (Item : String) return Wide_String
598   is
599      Result : Wide_String (1 .. Item'Length);
600
601   begin
602      for J in Item'Range loop
603         Result (J - (Item'First - 1)) := To_Wide_Character (Item (J));
604      end loop;
605
606      return Result;
607   end To_Wide_String;
608
609end Ada.Characters.Handling;
610