1------------------------------------------------------------------------------ 2-- -- 3-- GNAT RUN-TIME COMPONENTS -- 4-- -- 5-- A D A . C H A R A C T E R S . H A N D L I N G -- 6-- -- 7-- B o d y -- 8-- -- 9-- Copyright (C) 1992-2018, Free Software Foundation, Inc. -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 3, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. -- 17-- -- 18-- As a special exception under Section 7 of GPL version 3, you are granted -- 19-- additional permissions described in the GCC Runtime Library Exception, -- 20-- version 3.1, as published by the Free Software Foundation. -- 21-- -- 22-- You should have received a copy of the GNU General Public License and -- 23-- a copy of the GCC Runtime Library Exception along with this program; -- 24-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- 25-- <http://www.gnu.org/licenses/>. -- 26-- -- 27-- GNAT was originally developed by the GNAT team at New York University. -- 28-- Extensive contributions were provided by Ada Core Technologies Inc. -- 29-- -- 30------------------------------------------------------------------------------ 31 32with Ada.Characters.Latin_1; use Ada.Characters.Latin_1; 33with Ada.Strings.Maps; use Ada.Strings.Maps; 34with Ada.Strings.Maps.Constants; use Ada.Strings.Maps.Constants; 35 36package body Ada.Characters.Handling is 37 38 ------------------------------------ 39 -- Character Classification Table -- 40 ------------------------------------ 41 42 type Character_Flags is mod 256; 43 for Character_Flags'Size use 8; 44 45 Control : constant Character_Flags := 1; 46 Lower : constant Character_Flags := 2; 47 Upper : constant Character_Flags := 4; 48 Basic : constant Character_Flags := 8; 49 Hex_Digit : constant Character_Flags := 16; 50 Digit : constant Character_Flags := 32; 51 Special : constant Character_Flags := 64; 52 Line_Term : constant Character_Flags := 128; 53 54 Letter : constant Character_Flags := Lower or Upper; 55 Alphanum : constant Character_Flags := Letter or Digit; 56 Graphic : constant Character_Flags := Alphanum or Special; 57 58 Char_Map : constant array (Character) of Character_Flags := 59 ( 60 NUL => Control, 61 SOH => Control, 62 STX => Control, 63 ETX => Control, 64 EOT => Control, 65 ENQ => Control, 66 ACK => Control, 67 BEL => Control, 68 BS => Control, 69 HT => Control, 70 LF => Control + Line_Term, 71 VT => Control + Line_Term, 72 FF => Control + Line_Term, 73 CR => Control + Line_Term, 74 SO => Control, 75 SI => Control, 76 77 DLE => Control, 78 DC1 => Control, 79 DC2 => Control, 80 DC3 => Control, 81 DC4 => Control, 82 NAK => Control, 83 SYN => Control, 84 ETB => Control, 85 CAN => Control, 86 EM => Control, 87 SUB => Control, 88 ESC => Control, 89 FS => Control, 90 GS => Control, 91 RS => Control, 92 US => Control, 93 94 Space => Special, 95 Exclamation => Special, 96 Quotation => Special, 97 Number_Sign => Special, 98 Dollar_Sign => Special, 99 Percent_Sign => Special, 100 Ampersand => Special, 101 Apostrophe => Special, 102 Left_Parenthesis => Special, 103 Right_Parenthesis => Special, 104 Asterisk => Special, 105 Plus_Sign => Special, 106 Comma => Special, 107 Hyphen => Special, 108 Full_Stop => Special, 109 Solidus => Special, 110 111 '0' .. '9' => Digit + Hex_Digit, 112 113 Colon => Special, 114 Semicolon => Special, 115 Less_Than_Sign => Special, 116 Equals_Sign => Special, 117 Greater_Than_Sign => Special, 118 Question => Special, 119 Commercial_At => Special, 120 121 'A' .. 'F' => Upper + Basic + Hex_Digit, 122 'G' .. 'Z' => Upper + Basic, 123 124 Left_Square_Bracket => Special, 125 Reverse_Solidus => Special, 126 Right_Square_Bracket => Special, 127 Circumflex => Special, 128 Low_Line => Special, 129 Grave => Special, 130 131 'a' .. 'f' => Lower + Basic + Hex_Digit, 132 'g' .. 'z' => Lower + Basic, 133 134 Left_Curly_Bracket => Special, 135 Vertical_Line => Special, 136 Right_Curly_Bracket => Special, 137 Tilde => Special, 138 139 DEL => Control, 140 Reserved_128 => Control, 141 Reserved_129 => Control, 142 BPH => Control, 143 NBH => Control, 144 Reserved_132 => Control, 145 NEL => Control + Line_Term, 146 SSA => Control, 147 ESA => Control, 148 HTS => Control, 149 HTJ => Control, 150 VTS => Control, 151 PLD => Control, 152 PLU => Control, 153 RI => Control, 154 SS2 => Control, 155 SS3 => Control, 156 157 DCS => Control, 158 PU1 => Control, 159 PU2 => Control, 160 STS => Control, 161 CCH => Control, 162 MW => Control, 163 SPA => Control, 164 EPA => Control, 165 166 SOS => Control, 167 Reserved_153 => Control, 168 SCI => Control, 169 CSI => Control, 170 ST => Control, 171 OSC => Control, 172 PM => Control, 173 APC => Control, 174 175 No_Break_Space => Special, 176 Inverted_Exclamation => Special, 177 Cent_Sign => Special, 178 Pound_Sign => Special, 179 Currency_Sign => Special, 180 Yen_Sign => Special, 181 Broken_Bar => Special, 182 Section_Sign => Special, 183 Diaeresis => Special, 184 Copyright_Sign => Special, 185 Feminine_Ordinal_Indicator => Special, 186 Left_Angle_Quotation => Special, 187 Not_Sign => Special, 188 Soft_Hyphen => Special, 189 Registered_Trade_Mark_Sign => Special, 190 Macron => Special, 191 Degree_Sign => Special, 192 Plus_Minus_Sign => Special, 193 Superscript_Two => Special, 194 Superscript_Three => Special, 195 Acute => Special, 196 Micro_Sign => Special, 197 Pilcrow_Sign => Special, 198 Middle_Dot => Special, 199 Cedilla => Special, 200 Superscript_One => Special, 201 Masculine_Ordinal_Indicator => Special, 202 Right_Angle_Quotation => Special, 203 Fraction_One_Quarter => Special, 204 Fraction_One_Half => Special, 205 Fraction_Three_Quarters => Special, 206 Inverted_Question => Special, 207 208 UC_A_Grave => Upper, 209 UC_A_Acute => Upper, 210 UC_A_Circumflex => Upper, 211 UC_A_Tilde => Upper, 212 UC_A_Diaeresis => Upper, 213 UC_A_Ring => Upper, 214 UC_AE_Diphthong => Upper + Basic, 215 UC_C_Cedilla => Upper, 216 UC_E_Grave => Upper, 217 UC_E_Acute => Upper, 218 UC_E_Circumflex => Upper, 219 UC_E_Diaeresis => Upper, 220 UC_I_Grave => Upper, 221 UC_I_Acute => Upper, 222 UC_I_Circumflex => Upper, 223 UC_I_Diaeresis => Upper, 224 UC_Icelandic_Eth => Upper + Basic, 225 UC_N_Tilde => Upper, 226 UC_O_Grave => Upper, 227 UC_O_Acute => Upper, 228 UC_O_Circumflex => Upper, 229 UC_O_Tilde => Upper, 230 UC_O_Diaeresis => Upper, 231 232 Multiplication_Sign => Special, 233 234 UC_O_Oblique_Stroke => Upper, 235 UC_U_Grave => Upper, 236 UC_U_Acute => Upper, 237 UC_U_Circumflex => Upper, 238 UC_U_Diaeresis => Upper, 239 UC_Y_Acute => Upper, 240 UC_Icelandic_Thorn => Upper + Basic, 241 242 LC_German_Sharp_S => Lower + Basic, 243 LC_A_Grave => Lower, 244 LC_A_Acute => Lower, 245 LC_A_Circumflex => Lower, 246 LC_A_Tilde => Lower, 247 LC_A_Diaeresis => Lower, 248 LC_A_Ring => Lower, 249 LC_AE_Diphthong => Lower + Basic, 250 LC_C_Cedilla => Lower, 251 LC_E_Grave => Lower, 252 LC_E_Acute => Lower, 253 LC_E_Circumflex => Lower, 254 LC_E_Diaeresis => Lower, 255 LC_I_Grave => Lower, 256 LC_I_Acute => Lower, 257 LC_I_Circumflex => Lower, 258 LC_I_Diaeresis => Lower, 259 LC_Icelandic_Eth => Lower + Basic, 260 LC_N_Tilde => Lower, 261 LC_O_Grave => Lower, 262 LC_O_Acute => Lower, 263 LC_O_Circumflex => Lower, 264 LC_O_Tilde => Lower, 265 LC_O_Diaeresis => Lower, 266 267 Division_Sign => Special, 268 269 LC_O_Oblique_Stroke => Lower, 270 LC_U_Grave => Lower, 271 LC_U_Acute => Lower, 272 LC_U_Circumflex => Lower, 273 LC_U_Diaeresis => Lower, 274 LC_Y_Acute => Lower, 275 LC_Icelandic_Thorn => Lower + Basic, 276 LC_Y_Diaeresis => Lower 277 ); 278 279 --------------------- 280 -- Is_Alphanumeric -- 281 --------------------- 282 283 function Is_Alphanumeric (Item : Character) return Boolean is 284 begin 285 return (Char_Map (Item) and Alphanum) /= 0; 286 end Is_Alphanumeric; 287 288 -------------- 289 -- Is_Basic -- 290 -------------- 291 292 function Is_Basic (Item : Character) return Boolean is 293 begin 294 return (Char_Map (Item) and Basic) /= 0; 295 end Is_Basic; 296 297 ------------------ 298 -- Is_Character -- 299 ------------------ 300 301 function Is_Character (Item : Wide_Character) return Boolean is 302 begin 303 return Wide_Character'Pos (Item) < 256; 304 end Is_Character; 305 306 ---------------- 307 -- Is_Control -- 308 ---------------- 309 310 function Is_Control (Item : Character) return Boolean is 311 begin 312 return (Char_Map (Item) and Control) /= 0; 313 end Is_Control; 314 315 -------------- 316 -- Is_Digit -- 317 -------------- 318 319 function Is_Digit (Item : Character) return Boolean is 320 begin 321 return Item in '0' .. '9'; 322 end Is_Digit; 323 324 ---------------- 325 -- Is_Graphic -- 326 ---------------- 327 328 function Is_Graphic (Item : Character) return Boolean is 329 begin 330 return (Char_Map (Item) and Graphic) /= 0; 331 end Is_Graphic; 332 333 -------------------------- 334 -- Is_Hexadecimal_Digit -- 335 -------------------------- 336 337 function Is_Hexadecimal_Digit (Item : Character) return Boolean is 338 begin 339 return (Char_Map (Item) and Hex_Digit) /= 0; 340 end Is_Hexadecimal_Digit; 341 342 ---------------- 343 -- Is_ISO_646 -- 344 ---------------- 345 346 function Is_ISO_646 (Item : Character) return Boolean is 347 begin 348 return Item in ISO_646; 349 end Is_ISO_646; 350 351 -- Note: much more efficient coding of the following function is possible 352 -- by testing several 16#80# bits in a complete word in a single operation 353 354 function Is_ISO_646 (Item : String) return Boolean is 355 begin 356 for J in Item'Range loop 357 if Item (J) not in ISO_646 then 358 return False; 359 end if; 360 end loop; 361 362 return True; 363 end Is_ISO_646; 364 365 --------------- 366 -- Is_Letter -- 367 --------------- 368 369 function Is_Letter (Item : Character) return Boolean is 370 begin 371 return (Char_Map (Item) and Letter) /= 0; 372 end Is_Letter; 373 374 ------------------------ 375 -- Is_Line_Terminator -- 376 ------------------------ 377 378 function Is_Line_Terminator (Item : Character) return Boolean is 379 begin 380 return (Char_Map (Item) and Line_Term) /= 0; 381 end Is_Line_Terminator; 382 383 -------------- 384 -- Is_Lower -- 385 -------------- 386 387 function Is_Lower (Item : Character) return Boolean is 388 begin 389 return (Char_Map (Item) and Lower) /= 0; 390 end Is_Lower; 391 392 ------------- 393 -- Is_Mark -- 394 ------------- 395 396 function Is_Mark (Item : Character) return Boolean is 397 pragma Unreferenced (Item); 398 begin 399 return False; 400 end Is_Mark; 401 402 --------------------- 403 -- Is_Other_Format -- 404 --------------------- 405 406 function Is_Other_Format (Item : Character) return Boolean is 407 begin 408 return Item = Soft_Hyphen; 409 end Is_Other_Format; 410 411 ------------------------------ 412 -- Is_Punctuation_Connector -- 413 ------------------------------ 414 415 function Is_Punctuation_Connector (Item : Character) return Boolean is 416 begin 417 return Item = '_'; 418 end Is_Punctuation_Connector; 419 420 -------------- 421 -- Is_Space -- 422 -------------- 423 424 function Is_Space (Item : Character) return Boolean is 425 begin 426 return Item = ' ' or else Item = No_Break_Space; 427 end Is_Space; 428 429 ---------------- 430 -- Is_Special -- 431 ---------------- 432 433 function Is_Special (Item : Character) return Boolean is 434 begin 435 return (Char_Map (Item) and Special) /= 0; 436 end Is_Special; 437 438 --------------- 439 -- Is_String -- 440 --------------- 441 442 function Is_String (Item : Wide_String) return Boolean is 443 begin 444 for J in Item'Range loop 445 if Wide_Character'Pos (Item (J)) >= 256 then 446 return False; 447 end if; 448 end loop; 449 450 return True; 451 end Is_String; 452 453 -------------- 454 -- Is_Upper -- 455 -------------- 456 457 function Is_Upper (Item : Character) return Boolean is 458 begin 459 return (Char_Map (Item) and Upper) /= 0; 460 end Is_Upper; 461 462 -------------- 463 -- To_Basic -- 464 -------------- 465 466 function To_Basic (Item : Character) return Character is 467 begin 468 return Value (Basic_Map, Item); 469 end To_Basic; 470 471 function To_Basic (Item : String) return String is 472 begin 473 return Result : String (1 .. Item'Length) do 474 for J in Item'Range loop 475 Result (J - (Item'First - 1)) := Value (Basic_Map, Item (J)); 476 end loop; 477 end return; 478 end To_Basic; 479 480 ------------------ 481 -- To_Character -- 482 ------------------ 483 484 function To_Character 485 (Item : Wide_Character; 486 Substitute : Character := ' ') return Character 487 is 488 begin 489 if Is_Character (Item) then 490 return Character'Val (Wide_Character'Pos (Item)); 491 else 492 return Substitute; 493 end if; 494 end To_Character; 495 496 ---------------- 497 -- To_ISO_646 -- 498 ---------------- 499 500 function To_ISO_646 501 (Item : Character; 502 Substitute : ISO_646 := ' ') return ISO_646 503 is 504 begin 505 return (if Item in ISO_646 then Item else Substitute); 506 end To_ISO_646; 507 508 function To_ISO_646 509 (Item : String; 510 Substitute : ISO_646 := ' ') return String 511 is 512 Result : String (1 .. Item'Length); 513 514 begin 515 for J in Item'Range loop 516 Result (J - (Item'First - 1)) := 517 (if Item (J) in ISO_646 then Item (J) else Substitute); 518 end loop; 519 520 return Result; 521 end To_ISO_646; 522 523 -------------- 524 -- To_Lower -- 525 -------------- 526 527 function To_Lower (Item : Character) return Character is 528 begin 529 return Value (Lower_Case_Map, Item); 530 end To_Lower; 531 532 function To_Lower (Item : String) return String is 533 begin 534 return Result : String (1 .. Item'Length) do 535 for J in Item'Range loop 536 Result (J - (Item'First - 1)) := Value (Lower_Case_Map, Item (J)); 537 end loop; 538 end return; 539 end To_Lower; 540 541 --------------- 542 -- To_String -- 543 --------------- 544 545 function To_String 546 (Item : Wide_String; 547 Substitute : Character := ' ') return String 548 is 549 Result : String (1 .. Item'Length); 550 551 begin 552 for J in Item'Range loop 553 Result (J - (Item'First - 1)) := To_Character (Item (J), Substitute); 554 end loop; 555 556 return Result; 557 end To_String; 558 559 -------------- 560 -- To_Upper -- 561 -------------- 562 563 function To_Upper 564 (Item : Character) return Character 565 is 566 begin 567 return Value (Upper_Case_Map, Item); 568 end To_Upper; 569 570 function To_Upper 571 (Item : String) return String 572 is 573 begin 574 return Result : String (1 .. Item'Length) do 575 for J in Item'Range loop 576 Result (J - (Item'First - 1)) := Value (Upper_Case_Map, Item (J)); 577 end loop; 578 end return; 579 end To_Upper; 580 581 ----------------------- 582 -- To_Wide_Character -- 583 ----------------------- 584 585 function To_Wide_Character 586 (Item : Character) return Wide_Character 587 is 588 begin 589 return Wide_Character'Val (Character'Pos (Item)); 590 end To_Wide_Character; 591 592 -------------------- 593 -- To_Wide_String -- 594 -------------------- 595 596 function To_Wide_String 597 (Item : String) return Wide_String 598 is 599 Result : Wide_String (1 .. Item'Length); 600 601 begin 602 for J in Item'Range loop 603 Result (J - (Item'First - 1)) := To_Wide_Character (Item (J)); 604 end loop; 605 606 return Result; 607 end To_Wide_String; 608 609end Ada.Characters.Handling; 610