1------------------------------------------------------------------------------ 2-- -- 3-- GNAT RUN-TIME COMPONENTS -- 4-- -- 5-- A D A . C H A R A C T E R S . H A N D L I N G -- 6-- -- 7-- B o d y -- 8-- -- 9-- Copyright (C) 1992-2012, Free Software Foundation, Inc. -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 3, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. -- 17-- -- 18-- As a special exception under Section 7 of GPL version 3, you are granted -- 19-- additional permissions described in the GCC Runtime Library Exception, -- 20-- version 3.1, as published by the Free Software Foundation. -- 21-- -- 22-- You should have received a copy of the GNU General Public License and -- 23-- a copy of the GCC Runtime Library Exception along with this program; -- 24-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- 25-- <http://www.gnu.org/licenses/>. -- 26-- -- 27-- GNAT was originally developed by the GNAT team at New York University. -- 28-- Extensive contributions were provided by Ada Core Technologies Inc. -- 29-- -- 30------------------------------------------------------------------------------ 31 32with Ada.Characters.Latin_1; use Ada.Characters.Latin_1; 33with Ada.Strings.Maps; use Ada.Strings.Maps; 34with Ada.Strings.Maps.Constants; use Ada.Strings.Maps.Constants; 35 36package body Ada.Characters.Handling is 37 38 ------------------------------------ 39 -- Character Classification Table -- 40 ------------------------------------ 41 42 type Character_Flags is mod 256; 43 for Character_Flags'Size use 8; 44 45 Control : constant Character_Flags := 1; 46 Lower : constant Character_Flags := 2; 47 Upper : constant Character_Flags := 4; 48 Basic : constant Character_Flags := 8; 49 Hex_Digit : constant Character_Flags := 16; 50 Digit : constant Character_Flags := 32; 51 Special : constant Character_Flags := 64; 52 53 Letter : constant Character_Flags := Lower or Upper; 54 Alphanum : constant Character_Flags := Letter or Digit; 55 Graphic : constant Character_Flags := Alphanum or Special; 56 57 Char_Map : constant array (Character) of Character_Flags := 58 ( 59 NUL => Control, 60 SOH => Control, 61 STX => Control, 62 ETX => Control, 63 EOT => Control, 64 ENQ => Control, 65 ACK => Control, 66 BEL => Control, 67 BS => Control, 68 HT => Control, 69 LF => Control, 70 VT => Control, 71 FF => Control, 72 CR => Control, 73 SO => Control, 74 SI => Control, 75 76 DLE => Control, 77 DC1 => Control, 78 DC2 => Control, 79 DC3 => Control, 80 DC4 => Control, 81 NAK => Control, 82 SYN => Control, 83 ETB => Control, 84 CAN => Control, 85 EM => Control, 86 SUB => Control, 87 ESC => Control, 88 FS => Control, 89 GS => Control, 90 RS => Control, 91 US => Control, 92 93 Space => Special, 94 Exclamation => Special, 95 Quotation => Special, 96 Number_Sign => Special, 97 Dollar_Sign => Special, 98 Percent_Sign => Special, 99 Ampersand => Special, 100 Apostrophe => Special, 101 Left_Parenthesis => Special, 102 Right_Parenthesis => Special, 103 Asterisk => Special, 104 Plus_Sign => Special, 105 Comma => Special, 106 Hyphen => Special, 107 Full_Stop => Special, 108 Solidus => Special, 109 110 '0' .. '9' => Digit + Hex_Digit, 111 112 Colon => Special, 113 Semicolon => Special, 114 Less_Than_Sign => Special, 115 Equals_Sign => Special, 116 Greater_Than_Sign => Special, 117 Question => Special, 118 Commercial_At => Special, 119 120 'A' .. 'F' => Upper + Basic + Hex_Digit, 121 'G' .. 'Z' => Upper + Basic, 122 123 Left_Square_Bracket => Special, 124 Reverse_Solidus => Special, 125 Right_Square_Bracket => Special, 126 Circumflex => Special, 127 Low_Line => Special, 128 Grave => Special, 129 130 'a' .. 'f' => Lower + Basic + Hex_Digit, 131 'g' .. 'z' => Lower + Basic, 132 133 Left_Curly_Bracket => Special, 134 Vertical_Line => Special, 135 Right_Curly_Bracket => Special, 136 Tilde => Special, 137 138 DEL => Control, 139 Reserved_128 => Control, 140 Reserved_129 => Control, 141 BPH => Control, 142 NBH => Control, 143 Reserved_132 => Control, 144 NEL => Control, 145 SSA => Control, 146 ESA => Control, 147 HTS => Control, 148 HTJ => Control, 149 VTS => Control, 150 PLD => Control, 151 PLU => Control, 152 RI => Control, 153 SS2 => Control, 154 SS3 => Control, 155 156 DCS => Control, 157 PU1 => Control, 158 PU2 => Control, 159 STS => Control, 160 CCH => Control, 161 MW => Control, 162 SPA => Control, 163 EPA => Control, 164 165 SOS => Control, 166 Reserved_153 => Control, 167 SCI => Control, 168 CSI => Control, 169 ST => Control, 170 OSC => Control, 171 PM => Control, 172 APC => Control, 173 174 No_Break_Space => Special, 175 Inverted_Exclamation => Special, 176 Cent_Sign => Special, 177 Pound_Sign => Special, 178 Currency_Sign => Special, 179 Yen_Sign => Special, 180 Broken_Bar => Special, 181 Section_Sign => Special, 182 Diaeresis => Special, 183 Copyright_Sign => Special, 184 Feminine_Ordinal_Indicator => Special, 185 Left_Angle_Quotation => Special, 186 Not_Sign => Special, 187 Soft_Hyphen => Special, 188 Registered_Trade_Mark_Sign => Special, 189 Macron => Special, 190 Degree_Sign => Special, 191 Plus_Minus_Sign => Special, 192 Superscript_Two => Special, 193 Superscript_Three => Special, 194 Acute => Special, 195 Micro_Sign => Special, 196 Pilcrow_Sign => Special, 197 Middle_Dot => Special, 198 Cedilla => Special, 199 Superscript_One => Special, 200 Masculine_Ordinal_Indicator => Special, 201 Right_Angle_Quotation => Special, 202 Fraction_One_Quarter => Special, 203 Fraction_One_Half => Special, 204 Fraction_Three_Quarters => Special, 205 Inverted_Question => Special, 206 207 UC_A_Grave => Upper, 208 UC_A_Acute => Upper, 209 UC_A_Circumflex => Upper, 210 UC_A_Tilde => Upper, 211 UC_A_Diaeresis => Upper, 212 UC_A_Ring => Upper, 213 UC_AE_Diphthong => Upper + Basic, 214 UC_C_Cedilla => Upper, 215 UC_E_Grave => Upper, 216 UC_E_Acute => Upper, 217 UC_E_Circumflex => Upper, 218 UC_E_Diaeresis => Upper, 219 UC_I_Grave => Upper, 220 UC_I_Acute => Upper, 221 UC_I_Circumflex => Upper, 222 UC_I_Diaeresis => Upper, 223 UC_Icelandic_Eth => Upper + Basic, 224 UC_N_Tilde => Upper, 225 UC_O_Grave => Upper, 226 UC_O_Acute => Upper, 227 UC_O_Circumflex => Upper, 228 UC_O_Tilde => Upper, 229 UC_O_Diaeresis => Upper, 230 231 Multiplication_Sign => Special, 232 233 UC_O_Oblique_Stroke => Upper, 234 UC_U_Grave => Upper, 235 UC_U_Acute => Upper, 236 UC_U_Circumflex => Upper, 237 UC_U_Diaeresis => Upper, 238 UC_Y_Acute => Upper, 239 UC_Icelandic_Thorn => Upper + Basic, 240 241 LC_German_Sharp_S => Lower + Basic, 242 LC_A_Grave => Lower, 243 LC_A_Acute => Lower, 244 LC_A_Circumflex => Lower, 245 LC_A_Tilde => Lower, 246 LC_A_Diaeresis => Lower, 247 LC_A_Ring => Lower, 248 LC_AE_Diphthong => Lower + Basic, 249 LC_C_Cedilla => Lower, 250 LC_E_Grave => Lower, 251 LC_E_Acute => Lower, 252 LC_E_Circumflex => Lower, 253 LC_E_Diaeresis => Lower, 254 LC_I_Grave => Lower, 255 LC_I_Acute => Lower, 256 LC_I_Circumflex => Lower, 257 LC_I_Diaeresis => Lower, 258 LC_Icelandic_Eth => Lower + Basic, 259 LC_N_Tilde => Lower, 260 LC_O_Grave => Lower, 261 LC_O_Acute => Lower, 262 LC_O_Circumflex => Lower, 263 LC_O_Tilde => Lower, 264 LC_O_Diaeresis => Lower, 265 266 Division_Sign => Special, 267 268 LC_O_Oblique_Stroke => Lower, 269 LC_U_Grave => Lower, 270 LC_U_Acute => Lower, 271 LC_U_Circumflex => Lower, 272 LC_U_Diaeresis => Lower, 273 LC_Y_Acute => Lower, 274 LC_Icelandic_Thorn => Lower + Basic, 275 LC_Y_Diaeresis => Lower 276 ); 277 278 --------------------- 279 -- Is_Alphanumeric -- 280 --------------------- 281 282 function Is_Alphanumeric (Item : Character) return Boolean is 283 begin 284 return (Char_Map (Item) and Alphanum) /= 0; 285 end Is_Alphanumeric; 286 287 -------------- 288 -- Is_Basic -- 289 -------------- 290 291 function Is_Basic (Item : Character) return Boolean is 292 begin 293 return (Char_Map (Item) and Basic) /= 0; 294 end Is_Basic; 295 296 ------------------ 297 -- Is_Character -- 298 ------------------ 299 300 function Is_Character (Item : Wide_Character) return Boolean is 301 begin 302 return Wide_Character'Pos (Item) < 256; 303 end Is_Character; 304 305 ---------------- 306 -- Is_Control -- 307 ---------------- 308 309 function Is_Control (Item : Character) return Boolean is 310 begin 311 return (Char_Map (Item) and Control) /= 0; 312 end Is_Control; 313 314 -------------- 315 -- Is_Digit -- 316 -------------- 317 318 function Is_Digit (Item : Character) return Boolean is 319 begin 320 return Item in '0' .. '9'; 321 end Is_Digit; 322 323 ---------------- 324 -- Is_Graphic -- 325 ---------------- 326 327 function Is_Graphic (Item : Character) return Boolean is 328 begin 329 return (Char_Map (Item) and Graphic) /= 0; 330 end Is_Graphic; 331 332 -------------------------- 333 -- Is_Hexadecimal_Digit -- 334 -------------------------- 335 336 function Is_Hexadecimal_Digit (Item : Character) return Boolean is 337 begin 338 return (Char_Map (Item) and Hex_Digit) /= 0; 339 end Is_Hexadecimal_Digit; 340 341 ---------------- 342 -- Is_ISO_646 -- 343 ---------------- 344 345 function Is_ISO_646 (Item : Character) return Boolean is 346 begin 347 return Item in ISO_646; 348 end Is_ISO_646; 349 350 -- Note: much more efficient coding of the following function is possible 351 -- by testing several 16#80# bits in a complete word in a single operation 352 353 function Is_ISO_646 (Item : String) return Boolean is 354 begin 355 for J in Item'Range loop 356 if Item (J) not in ISO_646 then 357 return False; 358 end if; 359 end loop; 360 361 return True; 362 end Is_ISO_646; 363 364 --------------- 365 -- Is_Letter -- 366 --------------- 367 368 function Is_Letter (Item : Character) return Boolean is 369 begin 370 return (Char_Map (Item) and Letter) /= 0; 371 end Is_Letter; 372 373 -------------- 374 -- Is_Lower -- 375 -------------- 376 377 function Is_Lower (Item : Character) return Boolean is 378 begin 379 return (Char_Map (Item) and Lower) /= 0; 380 end Is_Lower; 381 382 ---------------- 383 -- Is_Special -- 384 ---------------- 385 386 function Is_Special (Item : Character) return Boolean is 387 begin 388 return (Char_Map (Item) and Special) /= 0; 389 end Is_Special; 390 391 --------------- 392 -- Is_String -- 393 --------------- 394 395 function Is_String (Item : Wide_String) return Boolean is 396 begin 397 for J in Item'Range loop 398 if Wide_Character'Pos (Item (J)) >= 256 then 399 return False; 400 end if; 401 end loop; 402 403 return True; 404 end Is_String; 405 406 -------------- 407 -- Is_Upper -- 408 -------------- 409 410 function Is_Upper (Item : Character) return Boolean is 411 begin 412 return (Char_Map (Item) and Upper) /= 0; 413 end Is_Upper; 414 415 -------------- 416 -- To_Basic -- 417 -------------- 418 419 function To_Basic (Item : Character) return Character is 420 begin 421 return Value (Basic_Map, Item); 422 end To_Basic; 423 424 function To_Basic (Item : String) return String is 425 begin 426 return Result : String (1 .. Item'Length) do 427 for J in Item'Range loop 428 Result (J - (Item'First - 1)) := Value (Basic_Map, Item (J)); 429 end loop; 430 end return; 431 end To_Basic; 432 433 ------------------ 434 -- To_Character -- 435 ------------------ 436 437 function To_Character 438 (Item : Wide_Character; 439 Substitute : Character := ' ') return Character 440 is 441 begin 442 if Is_Character (Item) then 443 return Character'Val (Wide_Character'Pos (Item)); 444 else 445 return Substitute; 446 end if; 447 end To_Character; 448 449 ---------------- 450 -- To_ISO_646 -- 451 ---------------- 452 453 function To_ISO_646 454 (Item : Character; 455 Substitute : ISO_646 := ' ') return ISO_646 456 is 457 begin 458 return (if Item in ISO_646 then Item else Substitute); 459 end To_ISO_646; 460 461 function To_ISO_646 462 (Item : String; 463 Substitute : ISO_646 := ' ') return String 464 is 465 Result : String (1 .. Item'Length); 466 467 begin 468 for J in Item'Range loop 469 Result (J - (Item'First - 1)) := 470 (if Item (J) in ISO_646 then Item (J) else Substitute); 471 end loop; 472 473 return Result; 474 end To_ISO_646; 475 476 -------------- 477 -- To_Lower -- 478 -------------- 479 480 function To_Lower (Item : Character) return Character is 481 begin 482 return Value (Lower_Case_Map, Item); 483 end To_Lower; 484 485 function To_Lower (Item : String) return String is 486 begin 487 return Result : String (1 .. Item'Length) do 488 for J in Item'Range loop 489 Result (J - (Item'First - 1)) := Value (Lower_Case_Map, Item (J)); 490 end loop; 491 end return; 492 end To_Lower; 493 494 --------------- 495 -- To_String -- 496 --------------- 497 498 function To_String 499 (Item : Wide_String; 500 Substitute : Character := ' ') return String 501 is 502 Result : String (1 .. Item'Length); 503 504 begin 505 for J in Item'Range loop 506 Result (J - (Item'First - 1)) := To_Character (Item (J), Substitute); 507 end loop; 508 509 return Result; 510 end To_String; 511 512 -------------- 513 -- To_Upper -- 514 -------------- 515 516 function To_Upper 517 (Item : Character) return Character 518 is 519 begin 520 return Value (Upper_Case_Map, Item); 521 end To_Upper; 522 523 function To_Upper 524 (Item : String) return String 525 is 526 begin 527 return Result : String (1 .. Item'Length) do 528 for J in Item'Range loop 529 Result (J - (Item'First - 1)) := Value (Upper_Case_Map, Item (J)); 530 end loop; 531 end return; 532 end To_Upper; 533 534 ----------------------- 535 -- To_Wide_Character -- 536 ----------------------- 537 538 function To_Wide_Character 539 (Item : Character) return Wide_Character 540 is 541 begin 542 return Wide_Character'Val (Character'Pos (Item)); 543 end To_Wide_Character; 544 545 -------------------- 546 -- To_Wide_String -- 547 -------------------- 548 549 function To_Wide_String 550 (Item : String) return Wide_String 551 is 552 Result : Wide_String (1 .. Item'Length); 553 554 begin 555 for J in Item'Range loop 556 Result (J - (Item'First - 1)) := To_Wide_Character (Item (J)); 557 end loop; 558 559 return Result; 560 end To_Wide_String; 561 562end Ada.Characters.Handling; 563