1 /* 2 This file is part of GNU APL, a free implementation of the 3 ISO/IEC Standard 13751, "Programming Language APL, Extended" 4 5 Copyright (C) 2008-2015 Dr. Jürgen Sauermann 6 7 This program is free software: you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation, either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 21 #ifndef __TOKENIZER_HH_DEFINED__ 22 #define __TOKENIZER_HH_DEFINED__ 23 24 #include "Token.hh" 25 #include "UCS_string.hh" 26 27 class Token; 28 29 //----------------------------------------------------------------------------- 30 /// An iterator for UCS_string 31 class Unicode_source 32 { 33 public: 34 /// constructor: iterate over the entire string. Unicode_source(const UCS_string & s)35 Unicode_source(const UCS_string & s) 36 : str(s), 37 idx(0), 38 end(s.size()) 39 {} 40 41 /// constructor: iterate from \b from to \b to Unicode_source(const Unicode_source & src,int32_t from,int32_t to)42 Unicode_source(const Unicode_source & src, int32_t from, int32_t to) 43 : str(src.str), 44 idx(src.idx + from), 45 end(src.idx + from + to) 46 { 47 if (end > src.str.size()) end = src.str.size(); 48 if (idx > end) idx = end; 49 } 50 51 /// return the number of remaining items rest() const52 int32_t rest() const 53 { return end - idx; } 54 55 /// lookup next item operator [](int32_t i) const56 const Unicode & operator[](int32_t i) const 57 { i += idx; Assert(uint32_t(i) < uint32_t(end)); return str[i]; } 58 59 /// get next item get()60 const Unicode & get() 61 { Assert(idx < end); return str[idx++]; } 62 63 /// lookup next item without removing it operator *() const64 const Unicode & operator *() const 65 { Assert(idx < end); return str[idx]; } 66 67 /// skip the first element operator ++()68 void operator ++() 69 { Assert(idx < end); ++idx; } 70 71 /// undo skip of the current element operator --()72 void operator --() 73 { Assert(idx > 0); --idx; } 74 75 /// shrink the source to rest \b new_rest set_rest(int32_t new_rest)76 void set_rest(int32_t new_rest) 77 { Assert(new_rest <= rest()); end = idx + new_rest; } 78 79 /// skip \b count elements skip(int32_t count)80 void skip(int32_t count) 81 { idx += count; if (idx > end) idx = end; } 82 83 protected: 84 /// the source string 85 const UCS_string & str; 86 87 /// the current position 88 int32_t idx; 89 90 /// the end position (excluding) 91 int32_t end; 92 }; 93 //----------------------------------------------------------------------------- 94 /// The converter from APL input characters to APL tokens 95 class Tokenizer 96 { 97 public: 98 /// Constructor Tokenizer(ParseMode pm,const char * _loc,bool mac)99 Tokenizer(ParseMode pm, const char * _loc, bool mac) 100 : pmode(pm), 101 macro(mac), 102 loc(_loc), 103 rest_1(0), 104 rest_2(0) 105 {} 106 107 /// tokenize UTF-8 string \b input into token string \b tos. 108 ErrorCode tokenize(const UCS_string & input, Token_string & tos); 109 110 /// tokenize a primitive (1-character) function 111 static Token tokenize_function(Unicode uni); 112 113 protected: 114 /// tokenize UCS string \b input into token string \b tos. 115 void do_tokenize(const UCS_string & input, Token_string & tos); 116 117 /// tokenize a function 118 void tokenize_function(Unicode_source & src, Token_string & tos); 119 120 /// tokenize a Quad function or variable 121 void tokenize_quad(Unicode_source & src, Token_string & tos); 122 123 /// tokenize a single quoted string 124 void tokenize_string1(Unicode_source & src, Token_string & tos); 125 126 /// tokenize a double quoted string 127 void tokenize_string2(Unicode_source & src, Token_string & tos); 128 129 /// tokenize a number (integer, floating point, or complex). 130 void tokenize_number(Unicode_source & src, Token_string & tos); 131 132 /// tokenize a real number (integer or floating point). 133 bool tokenize_real(Unicode_source &src, bool & need_float, 134 APL_Float & flt_val, APL_Integer & int_val); 135 136 /// a locale-independent sscanf() 137 static int scan_real(const char * strg, APL_Float & result, 138 int E_pos, int minus_pos); 139 140 /// tokenize a symbol 141 void tokenize_symbol(Unicode_source & src, Token_string & tos); 142 143 /// the parsing mode of this parser 144 const ParseMode pmode; 145 146 /// tokenize macro code 147 const bool macro; 148 149 /// caller of this Tokenizer 150 const char * loc; 151 152 /// the characters afer caret 1 153 int rest_1; 154 155 /// the characters afer caret 2 156 int rest_2; 157 }; 158 159 #endif // __TOKENIZER_HH_DEFINED__ 160