1 /* 2 Copyright (C) 2004 - 2009 by Philippe Plantier <ayin@anathas.org> 3 Copyright (C) 2010 - 2018 by Guillaume Melquiond <guillaume.melquiond@gmail.com> 4 Part of the Battle for Wesnoth Project https://www.wesnoth.org 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2 of the License, or 9 (at your option) any later version. 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY. 12 13 See the COPYING file for more details. 14 */ 15 16 #pragma once 17 18 //#define DEBUG_TOKENIZER 19 20 #include "buffered_istream.hpp" 21 22 #include <istream> 23 #include <string> 24 25 struct token 26 { tokentoken27 token() : 28 type(END), 29 value() 30 {} 31 32 enum token_type 33 { 34 STRING, 35 QSTRING, 36 UNTERMINATED_QSTRING, 37 MISC, 38 39 LF = '\n', 40 EQUALS = '=', 41 COMMA = ',', 42 PLUS = '+', 43 SLASH = '/', 44 OPEN_BRACKET = '[', 45 CLOSE_BRACKET = ']', 46 UNDERSCORE = '_', 47 END 48 }; 49 50 token_type type; 51 std::string value; 52 }; 53 54 /** Abstract baseclass for the tokenizer. */ 55 class tokenizer 56 { 57 public: 58 tokenizer(std::istream& in); 59 ~tokenizer(); 60 61 const token &next_token(); 62 current_token() const63 const token ¤t_token() const 64 { 65 return token_; 66 } 67 68 #ifdef DEBUG_TOKENIZER previous_token() const69 const token &previous_token() const 70 { 71 return previous_token_; 72 } 73 #endif 74 textdomain() const75 const std::string &textdomain() const 76 { 77 return textdomain_; 78 } 79 get_file() const80 const std::string &get_file() const 81 { 82 return file_; 83 } 84 get_start_line() const85 int get_start_line() const 86 { 87 return startlineno_; 88 } 89 90 private: 91 tokenizer(); 92 int current_; 93 int lineno_; 94 int startlineno_; 95 next_char()96 void next_char() 97 { 98 if (current_ == '\n') 99 ++lineno_; 100 next_char_fast(); 101 } 102 next_char_fast()103 void next_char_fast() 104 { 105 do { 106 current_ = in_.get(); 107 } while (current_ == '\r'); 108 #if 0 109 /// @todo disabled until the campaign server is fixed 110 if(in_.good()) { 111 current_ = in_.get(); 112 if (current_ == '\r') 113 { 114 // we assume that there is only one '\r' 115 if(in_.good()) { 116 current_ = in_.get(); 117 } else { 118 current_ = EOF; 119 } 120 } 121 } else { 122 current_ = EOF; 123 } 124 #endif 125 } 126 peek_char()127 int peek_char() 128 { 129 return in_.peek(); 130 } 131 132 enum 133 { 134 TOK_NONE = 0, 135 TOK_SPACE = 1, 136 TOK_NUMERIC = 2, 137 TOK_ALPHA = 4 138 }; 139 char_type(unsigned c) const140 int char_type(unsigned c) const 141 { 142 return c < 128 ? char_types_[c] : 0; 143 } 144 is_space(int c) const145 bool is_space(int c) const 146 { 147 return (char_type(c) & TOK_SPACE) == TOK_SPACE; 148 } 149 is_num(int c) const150 bool is_num(int c) const 151 { 152 return (char_type(c) & TOK_NUMERIC) == TOK_NUMERIC; 153 } 154 is_alnum(int c) const155 bool is_alnum(int c) const 156 { 157 return (char_type(c) & (TOK_ALPHA | TOK_NUMERIC)) != TOK_NONE; 158 } 159 160 void skip_comment(); 161 162 /** 163 * Returns true if the next characters are the one from @a cmd 164 * followed by a space. Skips all the matching characters. 165 */ 166 bool skip_command(char const *cmd); 167 168 std::string textdomain_; 169 std::string file_; 170 token token_; 171 #ifdef DEBUG_TOKENIZER 172 token previous_token_; 173 #endif 174 buffered_istream in_; 175 char char_types_[128]; 176 }; 177