1 2 /* 3 * File CParser.hpp. 4 * 5 * This file is part of the source code of the software program 6 * Vampire. It is protected by applicable 7 * copyright laws. 8 * 9 * This source code is distributed under the licence found here 10 * https://vprover.github.io/license.html 11 * and in the source directory 12 * 13 * In summary, you are allowed to use Vampire for non-commercial 14 * purposes but not allowed to distribute, modify, copy, create derivatives, 15 * or use in competitions. 16 * For other uses of Vampire please contact developers for a different 17 * licence, which we will make an effort to provide. 18 */ 19 /** 20 * @file CParser.hpp 21 * Defines class CParser for lexical analysis of C programs. 22 * 23 * @since 13/01/2011 Manchester 24 */ 25 26 #ifndef __CParser__ 27 #define __CParser__ 28 29 #include <vector> 30 31 #include "Lib/Exception.hpp" 32 #include "Lib/VString.hpp" 33 34 using namespace std; 35 36 namespace Shell { 37 38 /** 39 * Class CParser, implements a C language parser. 40 * @since 13/01/2011 Manchester 41 */ 42 class CParser 43 { 44 public: 45 /** 46 * Implements lexer exceptions. 47 * @since 14/01/2011 Manchester 48 */ 49 class LexerException 50 : public Lib::Exception 51 { 52 public: 53 LexerException(const CParser&,unsigned pos,Lib::vstring message); 54 void cry(ostream&); ~LexerException()55 ~LexerException() {} 56 protected: 57 Lib::vstring _message; 58 unsigned _pos; 59 }; // CParser::LexerException 60 61 /** 62 * Implements parser exceptions. 63 * @since 17/01/2011 Manchester 64 */ 65 class ParserException 66 : public Lib::Exception 67 { 68 public: 69 ParserException(const CParser&,unsigned pos,Lib::vstring message); 70 void cry(ostream&); ~ParserException()71 ~ParserException() {} 72 protected: 73 Lib::vstring _message; 74 unsigned _pos; 75 }; // CParser::ParserException 76 77 /** lexer token types */ 78 enum LTType { 79 /** identifier */ 80 LT_IDENTIFIER, 81 82 /** keyword auto */ 83 LT_AUTO, 84 /** keyword break */ 85 LT_BREAK, 86 /** keyword case */ 87 LT_CASE, 88 /** keyword char */ 89 LT_CHAR, 90 /** keyword const */ 91 LT_CONST, 92 /** keyword continue */ 93 LT_CONTINUE, 94 /** keyword default */ 95 LT_DEFAULT, 96 /** keyword do */ 97 LT_DO, 98 /** keyword double */ 99 LT_DOUBLE, 100 /** keyword else */ 101 LT_ELSE, 102 /** keyword enum */ 103 LT_ENUM, 104 /** keyword extern */ 105 LT_EXTERN, 106 /** keyword float */ 107 LT_FLOAT, 108 /** keyword for */ 109 LT_FOR, 110 /** keyword goto */ 111 LT_GOTO, 112 /** keyword if */ 113 LT_IF, 114 /** keyword inline */ 115 LT_INLINE, 116 /** keyword int */ 117 LT_INT, 118 /** keyword long */ 119 LT_LONG, 120 /** keyword register */ 121 LT_REGISTER, 122 /** keyword restrict */ 123 LT_RESTRICT, 124 /** keyword return */ 125 LT_RETURN, 126 /** keyword short */ 127 LT_SHORT, 128 /** keyword signed */ 129 LT_SIGNED, 130 /** keyword sizeof */ 131 LT_SIZEOF, 132 /** keyword struct */ 133 LT_STRUCT, 134 /** keyword switch */ 135 LT_SWITCH, 136 /** keyword typedef */ 137 LT_TYPEDEF, 138 /** keyword union */ 139 LT_UNION, 140 /** keyword unsigned */ 141 LT_UNSIGNED, 142 /** keyword void */ 143 LT_VOID, 144 /** keyword volatile */ 145 LT_VOLATILE, 146 /** keyword while */ 147 LT_WHILE, 148 149 /** { */ 150 LT_LBRACE, 151 /** } */ 152 LT_RBRACE, 153 /** { */ 154 LT_LPAR, 155 /** } */ 156 LT_RPAR, 157 /** ; */ 158 LT_SEMICOLON, 159 /** == */ 160 LT_EQ_OP, 161 /** = */ 162 LT_ASSIGN, 163 /** += */ 164 LT_ADD_ASSIGN, 165 /** ++ */ 166 LT_INC_OP, 167 /** + */ 168 LT_ADD, 169 /** *= */ 170 LT_MULT_ASSIGN, 171 /** * */ 172 LT_MULT, 173 /** ... */ 174 LT_ELLIPSIS, 175 /** dot */ 176 LT_DOT, 177 /** >= */ 178 LT_GE_OP, 179 /** > */ 180 LT_GREATER, 181 /** >>= */ 182 LT_RIGHT_ASSIGN, 183 /** >> */ 184 LT_RIGHT_OP, 185 /** <= */ 186 LT_LE_OP, 187 /** [ */ 188 LT_LBRACKET, 189 /** < */ 190 LT_LESS, 191 /** <<= */ 192 LT_LEFT_ASSIGN, 193 /** << */ 194 LT_LEFT_OP, 195 /** -= */ 196 LT_SUB_ASSIGN, 197 /** -- */ 198 LT_DEC_OP, 199 /** -> */ 200 LT_PTR_OP, 201 /** - */ 202 LT_MINUS, 203 /** /= */ 204 LT_DIV_ASSIGN, 205 /** / */ 206 LT_DIV, 207 /** %= */ 208 LT_MOD_ASSIGN, 209 /** % */ 210 LT_MOD, 211 /** &= */ 212 LT_AND_ASSIGN, 213 /** && */ 214 LT_AND_OP, 215 /** & */ 216 LT_AMP, 217 /** |= */ 218 LT_OR_ASSIGN, 219 /** || */ 220 LT_OR_OP, 221 /** | */ 222 LT_BAR, 223 /** ^= */ 224 LT_XOR_ASSIGN, 225 /** ^ */ 226 LT_XOR, 227 /** != */ 228 LT_NE_OP, 229 /** ! */ 230 LT_EXCLAMATION, 231 /** : */ 232 LT_COLON, 233 /** ] */ 234 LT_RBRACKET, 235 /** , */ 236 LT_COMMA, 237 /** ~ */ 238 LT_TILDE, 239 /** ? */ 240 LT_QUESTION, 241 242 /** an integer constant */ 243 LT_INT_CONST, 244 /** a long constant */ 245 LT_LONG_CONST, 246 /** an unsigned integer constant */ 247 LT_UINT_CONST, 248 /** an unsigned long constant */ 249 LT_ULONG_CONST, 250 /** a floating point constant */ 251 LT_FLOAT_CONST, 252 /** a double floating point constant */ 253 LT_DOUBLE_CONST, 254 /** a string constant */ 255 LT_STRING_CONST, 256 /** a character constant */ 257 LT_CHAR_CONST, 258 /** end-of-input */ 259 LT_EOF, 260 }; 261 262 struct Token { 263 LTType type; 264 unsigned start; 265 unsigned end; 266 }; 267 268 /** parser token types */ 269 enum PTType { 270 /** constant expression */ 271 PT_CONSTANT_EXPRESSION, 272 /** array application */ 273 PT_ARRAY_APPLICATION, 274 /** identifier */ 275 PT_IDENTIFIER, 276 }; 277 278 /** the base class for all parsed expressions */ 279 class Unit { 280 public: 281 /** the parser type of this unit */ type() const282 PTType type() const { return _type; } 283 /** create a new unit of a given type */ Unit(PTType tp,unsigned start,unsigned end)284 Unit(PTType tp,unsigned start,unsigned end) 285 : _type(tp), _start(start), _end(end) 286 {} 287 /** the start position of the unit in the array of tokens */ start() const288 unsigned start() const { return _start; } 289 /** the end position of the unit in the array of tokens */ end() const290 unsigned end() const { return _end; } 291 protected: 292 /** unit type */ 293 PTType _type; 294 /** start position in the array of tokens */ 295 unsigned _start; 296 /** end position in the array of tokens */ 297 unsigned _end; 298 }; 299 300 /** constant expression */ 301 class ConstantExpression 302 : public Unit 303 { 304 public: 305 /** create a new constant expression */ 306 ConstantExpression(unsigned start,unsigned end); 307 }; 308 309 /** identifier */ 310 class Identifier 311 : public Unit 312 { 313 public: 314 /** create a new identifier */ 315 Identifier(unsigned start,unsigned end); 316 }; 317 318 /** identifier */ 319 class ArrayApplication 320 : public Unit 321 { 322 public: 323 /** create a new array application expression */ ArrayApplication(unsigned start,unsigned end,Unit * lhs,Unit * rhs)324 ArrayApplication(unsigned start,unsigned end,Unit* lhs,Unit* rhs): 325 Unit(PT_ARRAY_APPLICATION,start,end) 326 {} 327 }; 328 329 CParser(const char* input); 330 ~CParser(); 331 void tokenize(); 332 /** return the input string */ input() const333 const char* input() const { return _input; } 334 335 #if VDEBUG 336 void output(ostream& str); 337 static const char* toString(LTType); 338 #endif 339 340 private: 341 // Lexer procedures 342 unsigned skipWhiteSpacesAndComments(unsigned pos); 343 unsigned skipToEndOfLine(unsigned pos); 344 unsigned skipToEndOfComment(unsigned pos); 345 346 LTType keyword(unsigned pos,unsigned end); 347 static bool keyword(const char* txt,const char* word,int chars); 348 349 unsigned integerTypeSuffix(unsigned start,LTType&); 350 unsigned decimalLiteral(unsigned start,LTType&); 351 unsigned octalLiteral(unsigned start,LTType&); 352 unsigned hexLiteral(unsigned start,LTType&); 353 unsigned floatingPointLiteral(unsigned start,LTType&); 354 unsigned exponent(unsigned start); 355 unsigned identifier(unsigned start); 356 unsigned numericConstant(unsigned start,LTType&); 357 unsigned stringConstant(unsigned start); 358 unsigned charConstant(unsigned start); 359 360 /** true if c is a digit 0-9 */ digit(char c)361 static bool digit(char c) { return c >= '0' && c <= '9'; } 362 static bool letter(char c); 363 static bool floatTypeSuffix(char c,LTType&); 364 static bool hexDigit(char c); 365 366 // parser procedures 367 unsigned primaryExpression(unsigned pos,bool backtrack); 368 unsigned postfixExpression(unsigned pos,bool backtrack); 369 unsigned unaryExpression(unsigned pos,bool backtrack); 370 unsigned multiplicativeExpression(unsigned pos,bool backtrack); 371 unsigned additiveExpression(unsigned pos,bool backtrack); 372 unsigned shiftExpression(unsigned pos,bool backtrack); 373 unsigned relationalExpression(unsigned pos,bool backtrack); 374 unsigned equalityExpression(unsigned pos,bool backtrack); 375 unsigned andExpression(unsigned pos,bool backtrack); 376 unsigned xorExpression(unsigned pos,bool backtrack); 377 unsigned orExpression(unsigned pos,bool backtrack); 378 unsigned logicalAndExpression(unsigned pos,bool backtrack); 379 unsigned logicalOrExpression(unsigned pos,bool backtrack); 380 unsigned conditionalExpression(unsigned pos,bool backtrack); 381 unsigned assignmentExpression(unsigned pos,bool backtrack); 382 unsigned expression(unsigned pos,bool backtrack); 383 bool consumeToken(LTType t,unsigned pos,bool backtrack); 384 unsigned argumentExpressionList(unsigned pos,bool backtrack); 385 386 /** the input string, must be null-terminated */ 387 const char* _input; 388 /** the collected list of tokens */ 389 vector<Token> _tokens; 390 /** the collected list of parsed units */ 391 vector<Unit*> _units; 392 }; // class CParser 393 394 } 395 396 #endif 397 398