1 /* 2 This file is part of GNU APL, a free implementation of the 3 ISO/IEC Standard 13751, "Programming Language APL, Extended" 4 5 Copyright (C) 2008-2015 Dr. Jürgen Sauermann 6 7 This program is free software: you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation, either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 21 #ifndef __AVEC_HH_DEFINED__ 22 #define __AVEC_HH_DEFINED__ 23 24 #include "Common.hh" 25 26 class Token; 27 28 29 /// The valid indices the Atomic Vector of the APL interpreter 30 enum CHT_Index 31 { 32 Invalid_CHT = -1, 33 #define char_def( n, _u, _t, _f, _p) AV_ ## n, 34 #define char_df1(_n, _u, _t, _f, _p) 35 #include "Avec.def" 36 MAX_AV, 37 }; 38 39 /// Some flags helping to classify characters 40 enum CharacterFlag 41 { 42 FLG_NONE = 0x0000, ///< no flag 43 FLG_SYMBOL = 0x0001, ///< valid char in a user defined name 44 FLG_DIGIT = 0x0002, ///< 0-9 45 FLG_NO_SPACE_AFTER = 0x0004, ///< never need a space after this char 46 FLG_NO_SPACE_BEFORE = 0x0008, ///< never need a space before this char 47 48 FLG_NO_SPACE = FLG_NO_SPACE_AFTER | FLG_NO_SPACE_BEFORE, 49 FLG_NUMERIC = FLG_DIGIT | FLG_SYMBOL, ///< 0-9, A-Z, a-z, ∆, ⍙ 50 }; 51 52 /** 53 class Avec is a collection of static functions related to the Atomic 54 Vector of the APL interpreter 55 */ 56 /// Static helper functions related to ⎕AV 57 class Avec 58 { 59 public: 60 /// init the static tables of this class and check them 61 static void init(); 62 63 /// Return the UNICODE of char table entry \b av 64 static Unicode unicode(CHT_Index av); 65 66 /// Return the UNICODE of char table entry \b av 67 static uint32_t get_av_pos(CHT_Index av); 68 69 /// Return a token containing \b av 70 static Token uni_to_token(Unicode & av, const char * loc); 71 72 /// Return \b true iff \b av is a valid char in a user defined symbol 73 static bool is_symbol_char(Unicode av); 74 75 /// return \b true iff \b av is a whitespace char (ASCII 0..32 (including)) is_white(Unicode av)76 static bool is_white(Unicode av) 77 { return av >= 0 && av <= ' '; } 78 79 /// return \b true iff \b av is one of the single quote characters ' ‘ or ’ is_single_quote(Unicode av)80 static bool is_single_quote(Unicode av) 81 { return av == UNI_SINGLE_QUOTE || 82 av == UNI_SINGLE_QUOTE1 || 83 av == UNI_SINGLE_QUOTE2; } 84 85 /// return \b true iff \b av is one of the various diamond characters is_diamond(Unicode av)86 static bool is_diamond(Unicode av) 87 { return av == UNI_DIAMOND || av == 0x22C4 || av == 0x2662 || 88 av == 0x2B25 || av == 0x2B26 || av == 0x2B27; } 89 90 /// return \b true iff \b av is a control char (ASCII 0..32 (excluding)) is_control(Unicode av)91 static bool is_control(Unicode av) 92 { return av >= 0 && av < ' '; } 93 94 /// Return \b true iff \b av is a valid char in a user defined symbol 95 static bool is_first_symbol_char(Unicode uni); 96 97 /// Return \b true iff \b av is a digit (i.e. 0 ≤ av ≤ 9) is_digit(Unicode uni)98 static bool is_digit(Unicode uni) 99 { return (uni <= UNI_ASCII_9 && uni >= UNI_ASCII_0); } 100 101 /// Return \b true iff \b av is a digit (i.e. 0 ≤ av ≤ 9) is_hex_digit(Unicode uni)102 static bool is_hex_digit(Unicode uni) 103 { return is_digit(uni) 104 || (uni <= UNI_ASCII_F && uni >= UNI_ASCII_A) 105 || (uni <= UNI_ASCII_f && uni >= UNI_ASCII_a); } 106 107 /// Return \b true iff \b av is a digit or a space is_digit_or_space(Unicode uni)108 static bool is_digit_or_space(Unicode uni) 109 { return is_digit(uni) || is_white(uni); } 110 111 /// return \b true iff \b av is a number char (digit, .) is_number(Unicode uni)112 static bool is_number(Unicode uni) 113 { return is_digit(uni) || (uni == UNI_OVERBAR); } 114 115 /// return true if unicode \b is defined by a char_def() or char_df1() macro 116 static bool is_known_char(Unicode uni); 117 118 /// return true if unicode \b uni is a quad (⎕ or ▯) is_quad(Unicode uni)119 static bool is_quad(Unicode uni) 120 { return uni == UNI_Quad_Quad || uni == UNI_Quad_Quad1; } 121 122 /// return true if unicode \b uni needs ⎕UCS in 2 ⎕TF or )OUT 123 static bool need_UCS(Unicode uni); 124 125 /// return \b true iff a token printed after \b av never needs a space 126 static bool no_space_after(Unicode uni); 127 128 /// return \b true iff a token printed before \b av never needs a space 129 static bool no_space_before(Unicode av); 130 131 /// return the subscript char for digit i 132 static Unicode subscript(uint32_t i); 133 134 /// return the superscript char for digit i 135 static Unicode superscript(uint32_t i); 136 137 /// Find \b av in \b character_table. Return AV position if found or MAX_AV 138 static uint32_t find_av_pos(Unicode av); 139 140 /// Find \b av in \b character_table. Return position or Invalid_CHT 141 static CHT_Index find_char(Unicode av); 142 143 /// Find \b return position of \b alt_av, or Invalid_AV if not found 144 static CHT_Index map_alternative_char(Unicode alt_av); 145 146 /// a pointer to 256 Unicode characters that are exactly the APL2 character 147 /// set (⎕AV) shown in lrm Appendix A page 470. The ⎕AV of GNU APL is 148 /// similar, but contains characters like ≢ that are not in IBM's ⎕AV 149 /// IBM's ⎕AV is used in the )IN command 150 static const Unicode * IBM_quad_AV(); 151 152 /// search uni in \b inverse_ibm_av and return its position in 153 /// the IBM ⎕AV (= its code in .ATF files). Return 0xB0 if not found. 154 static unsigned char unicode_to_cp(Unicode uni); 155 156 /// recompute \b inverse_ibm_av from \b ibm_av and print it 157 static void print_inverse_IBM_quad_AV(); 158 159 protected: 160 /// a Unicode and its position in the ⎕AV of IBM APL2 161 struct Unicode_to_IBM_codepoint 162 { 163 uint32_t uni; ///< the Unicode 164 uint32_t cp; ///< the IBM char for uni 165 }; 166 167 /// Unicode_to_IBM_codepoint table sorted by Unicode (for bsearch()) 168 static Unicode_to_IBM_codepoint inverse_ibm_av[]; 169 170 /// print an error position on cerr, and then Assert(0); 171 static void show_error_pos(int i, int line, bool cond, int def_line); 172 173 /// check that the character table that is used in this class is correct 174 static void check_av_table(); 175 176 /// check that all characters in the UTF-8 encoded file are known 177 /// (through char_def() or char_df1() macros) 178 static void check_file(const char * filename); 179 180 /// compare the unicodes of two entries ua and u2 in \b inverse_IBM_quad_AV 181 static int compare_uni(const void * u1, const void * u2); 182 }; 183 184 #endif // __AVEC_HH_DEFINED__ 185