1 /* 2 * This program source code file is part of KICAD, a free EDA CAD application. 3 * 4 * Copyright (C) 2007-2010 SoftPLC Corporation, Dick Hollenbeck <dick@softplc.com> 5 * Copyright (C) 2007-2021 Kicad Developers, see change_log.txt for contributors. 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 2 10 * of the License, or (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, you may find one here: 19 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html 20 * or you may search the http://www.gnu.org website for the version 2 license, 21 * or you may write to the Free Software Foundation, Inc., 22 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 23 */ 24 25 #ifndef DSNLEXER_H_ 26 #define DSNLEXER_H_ 27 28 #include <cstdio> 29 #include <hashtables.h> 30 #include <string> 31 #include <vector> 32 33 #include <richio.h> 34 35 #ifndef SWIG 36 /** 37 * Hold a keyword string and its unique integer token. 38 */ 39 struct KEYWORD 40 { 41 const char* name; ///< unique keyword. 42 int token; ///< a zero based index into an array of KEYWORDs 43 }; 44 #endif // SWIG 45 46 // something like this macro can be used to help initialize a KEYWORD table. 47 // see SPECCTRA_DB::keywords[] as an example. 48 49 //#define TOKDEF(x) { #x, T_##x } 50 51 52 /** 53 * List all the DSN lexer's tokens that are supported in lexing. 54 * 55 * It is up to the parser if it wants also to support them. 56 */ 57 enum DSN_SYNTAX_T 58 { 59 DSN_NONE = -11, 60 DSN_COMMENT = -10, 61 DSN_STRING_QUOTE = -9, 62 DSN_QUOTE_DEF = -8, 63 DSN_DASH = -7, 64 DSN_SYMBOL = -6, 65 DSN_NUMBER = -5, 66 DSN_RIGHT = -4, // right bracket, ')' 67 DSN_LEFT = -3, // left bracket, '(' 68 DSN_STRING = -2, // a quoted string, stripped of the quotes 69 DSN_EOF = -1 // special case for end of file 70 }; 71 72 73 /** 74 * Implement a lexical analyzer for the SPECCTRA DSN file format. 75 * 76 * It reads lexical tokens from the current #LINE_READER through the #NextTok() function. 77 */ 78 class DSNLEXER 79 { 80 public: 81 82 /** 83 * Initialize a DSN lexer and prepares to read from aFile which is already open and has 84 * \a aFilename. 85 * 86 * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This 87 * token table need not contain the lexer separators such as '(' ')', etc. 88 * @param aKeywordCount is the count of tokens in aKeywordTable. 89 * @param aFile is an open file, which will be closed when this is destructed. 90 * @param aFileName is the name of the file 91 */ 92 DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, 93 FILE* aFile, const wxString& aFileName ); 94 95 /** 96 * Initialize a DSN lexer and prepares to read from @a aSExpression. 97 * 98 * @param aKeywordTable is an array of KEYWORDS holding \a aKeywordCount. This 99 * token table need not contain the lexer separators such as '(' ')', etc. 100 * @param aKeywordCount is the count of tokens in aKeywordTable. 101 * @param aSExpression is text to feed through a STRING_LINE_READER 102 * @param aSource is a description of aSExpression, used for error reporting. 103 */ 104 DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, 105 const std::string& aSExpression, const wxString& aSource = wxEmptyString ); 106 107 /** 108 * Initialize a DSN lexer and prepares to read from @a aSExpression. 109 * 110 * Use this one without a keyword table with the DOM parser in ptree.h. 111 * 112 * @param aSExpression is text to feed through a #STRING_LINE_READER 113 * @param aSource is a description of aSExpression, used for error reporting. 114 */ 115 DSNLEXER( const std::string& aSExpression, const wxString& aSource = wxEmptyString ); 116 117 /** 118 * Initialize a DSN lexer and prepares to read from @a aLineReader which is already 119 * open, and may be in use by other DSNLEXERs also. 120 * 121 * No ownership is taken of @a aLineReader. This enables it to be used by other DSNLEXERs. 122 * 123 * @param aKeywordTable is an array of #KEYWORDS holding \a aKeywordCount. This 124 * token table need not contain the lexer separators such as '(' ')', etc. 125 * @param aKeywordCount is the count of tokens in aKeywordTable. 126 * @param aLineReader is any subclassed instance of LINE_READER, such as 127 * #STRING_LINE_READER or #FILE_LINE_READER. No ownership is taken. 128 */ 129 DSNLEXER( const KEYWORD* aKeywordTable, unsigned aKeywordCount, 130 LINE_READER* aLineReader = nullptr ); 131 132 virtual ~DSNLEXER(); 133 134 /** 135 * Reinit variables used during parsing, to ensure od states are not used in a new parsing 136 * must be called before parsing a new file after parsing an old file to avoid 137 * starting with some variables in a non initial state 138 */ 139 void InitParserState(); 140 141 /** 142 * Usable only for DSN lexers which share the same #LINE_READER. 143 * 144 * Synchronizes the pointers handling the data read by the #LINE_READER. Allows 2 145 * #DNSLEXER objects to share the same current line, when switching from a #DNSLEXER 146 * to another #DNSLEXER 147 * @param aLexer the model. 148 * @return true if the sync can be made ( at least the same line reader ). 149 */ 150 bool SyncLineReaderWith( DSNLEXER& aLexer ); 151 152 /** 153 * Change the behavior of this lexer into or out of "specctra mode". 154 * 155 * If specctra mode, then: 156 * -#) stringDelimiter can be changed. 157 * -#) KiCad quoting protocol is not in effect. 158 * -#) space_in_quoted_tokens is functional else none of the above are true. 159 * 160 * The default mode is non-specctra mode, meaning: 161 * -#) stringDelimiter cannot be changed. 162 * -#) KiCad quoting protocol is in effect. 163 * -#) space_in_quoted_tokens is not functional. 164 */ 165 void SetSpecctraMode( bool aMode ); 166 167 /** 168 * Manage a stack of LINE_READERs in order to handle nested file inclusion. 169 * 170 * This function pushes aLineReader onto the top of a stack of LINE_READERs and makes 171 * it the current #LINE_READER with its own #GetSource(), line number and line text. 172 * A grammar must be designed such that the "include" token (whatever its various names), 173 * and any of its parameters are not followed by anything on that same line, 174 * because PopReader always starts reading from a new line upon returning to 175 * the original #LINE_READER. 176 */ 177 void PushReader( LINE_READER* aLineReader ); 178 179 /** 180 * Delete the top most #LINE_READER from an internal stack of LINE_READERs and 181 * in the case of #FILE_LINE_READER this means the associated FILE is closed. 182 * 183 * The most recently used former #LINE_READER on the stack becomes the 184 * current #LINE_READER and its previous position in its input stream and the 185 * its latest line number should pertain. PopReader always starts reading 186 * from a new line upon returning to the previous #LINE_READER. A pop is only 187 * possible if there are at least 2 #LINE_READERs on the stack, since popping 188 * the last one is not supported. 189 * 190 * @return the LINE_READER that was in use before the pop, or NULL 191 * if there was not at least two readers on the stack and therefore the 192 * pop failed. 193 */ 194 LINE_READER* PopReader(); 195 196 /** 197 * Return the next token found in the input file or DSN_EOF when reaching the end of 198 * file. 199 * 200 * Users should wrap this function to return an enum to aid in grammar debugging while 201 * running under a debugger, but leave this lower level function returning an int (so 202 * the enum does not collide with another usage). 203 * 204 * @return the type of token found next. 205 * @throw IO_ERROR only if the #LINE_READER throws it. 206 */ 207 int NextTok(); 208 209 /** 210 * Call #NextTok() and then verifies that the token read in satisfies #IsSymbol(). 211 * 212 * @return the actual token read in. 213 * @throw IO_ERROR if the next token does not satisfy IsSymbol(). 214 */ 215 int NeedSYMBOL(); 216 217 /** 218 * Call #NextTok() and then verifies that the token read in satisfies bool IsSymbol() or 219 * the next token is #DSN_NUMBER. 220 * 221 * @return the actual token read in. 222 * @throw IO_ERROR if the next token does not satisfy the above test. 223 */ 224 int NeedSYMBOLorNUMBER(); 225 226 /** 227 * Call #NextTok() and then verifies that the token read is type #DSN_NUMBER. 228 * 229 * @return the actual token read in. 230 * @throw IO_ERROR if the next token does not satisfy the above test. 231 */ 232 int NeedNUMBER( const char* aExpectation ); 233 234 /** 235 * Return whatever #NextTok() returned the last time it was called. 236 */ CurTok()237 int CurTok() const 238 { 239 return curTok; 240 } 241 242 /** 243 * Return whatever NextTok() returned the 2nd to last time it was called. 244 */ PrevTok()245 int PrevTok() const 246 { 247 return prevTok; 248 } 249 250 /** 251 * Used to support "loose" matches (quoted tokens). 252 */ GetCurStrAsToken()253 int GetCurStrAsToken() const 254 { 255 return findToken( curText ); 256 } 257 258 /** 259 * Change the string delimiter from the default " to some other character and return 260 * the old value. 261 * 262 * @param aStringDelimiter The character in lowest 8 bits. 263 * @return The old delimiter in the lowest 8 bits. 264 */ SetStringDelimiter(char aStringDelimiter)265 char SetStringDelimiter( char aStringDelimiter ) 266 { 267 int old = stringDelimiter; 268 269 if( specctraMode ) 270 stringDelimiter = aStringDelimiter; 271 272 return old; 273 } 274 275 /** 276 * Change the setting controlling whether a space in a quoted string isa terminator. 277 * 278 * @param val If true, means 279 */ SetSpaceInQuotedTokens(bool val)280 bool SetSpaceInQuotedTokens( bool val ) 281 { 282 bool old = space_in_quoted_tokens; 283 284 if( specctraMode ) 285 space_in_quoted_tokens = val; 286 287 return old; 288 } 289 290 /** 291 * Change the handling of comments. 292 * 293 * If set true, comments are returned as single line strings with a terminating newline. 294 * Otherwise they are consumed by the lexer and not returned. 295 */ SetCommentsAreTokens(bool val)296 bool SetCommentsAreTokens( bool val ) 297 { 298 bool old = commentsAreTokens; 299 commentsAreTokens = val; 300 return old; 301 } 302 303 /** 304 * Check the next sequence of tokens and reads them into a wxArrayString if they are 305 * comments. 306 * 307 * Reading continues until a non-comment token is encountered, and such last read token 308 * remains as #CurTok() and as #CurText(). No push back or "un get" mechanism is used 309 * for this support. Upon return you simply avoid calling NextTok() for the next token, 310 * but rather #CurTok(). 311 * 312 * @return Heap allocated block of comments or NULL if none. The caller owns the 313 * allocation and must delete if not NULL. 314 */ 315 wxArrayString* ReadCommentLines(); 316 317 /** 318 * Test a token to see if it is a symbol. 319 * 320 * This means it cannot be a special delimiter character such as #DSN_LEFT, #DSN_RIGHT, 321 * #DSN_QUOTE, etc. It may however, coincidentally match a keyword and still be a symbol. 322 */ 323 static bool IsSymbol( int aTok ); 324 325 /** 326 * Throw an #IO_ERROR exception with an input file specific error message. 327 * 328 * @param aTok is the token/keyword type which was expected at the current input location. 329 * @throw IO_ERROR with the location within the input file of the problem. 330 */ 331 void Expecting( int aTok ) const; 332 333 /** 334 * Throw an #IO_ERROR exception with an input file specific error message. 335 * 336 * @param aTokenList is the token/keyword type which was expected at the 337 * current input location, e.g. "pin|graphic|property". 338 * @throw IO_ERROR with the location within the input file of the problem. 339 */ 340 void Expecting( const char* aTokenList ) const; 341 342 /** 343 * Throw an #IO_ERROR exception with an input file specific error message. 344 * 345 * @param aTok is the token/keyword type which was not expected at the 346 * current input location. 347 * @throw IO_ERROR with the location within the input file of the problem. 348 */ 349 void Unexpected( int aTok ) const; 350 351 /** 352 * Throw an #IO_ERROR exception with an input file specific error message. 353 * 354 * @param aToken is the token which was not expected at the current input location. 355 * @throw IO_ERROR with the location within the input file of the problem. 356 */ 357 void Unexpected( const char* aToken ) const; 358 359 /** 360 * Throw an #IO_ERROR exception with a message saying specifically that \a aTok 361 * is a duplicate of one already seen in current context. 362 * 363 * @param aTok is the token/keyword type which was not expected at the current input 364 * location. 365 * @throw IO_ERROR with the location within the input file of the problem. 366 */ 367 void Duplicate( int aTok ); 368 369 /** 370 * Call #NextTok() and then verifies that the token read in is a #DSN_LEFT. 371 * 372 * @throw IO_ERROR if the next token is not a #DSN_LEFT 373 */ 374 void NeedLEFT(); 375 376 /** 377 * Call #NextTok() and then verifies that the token read in is a #DSN_RIGHT. 378 * 379 * @throw IO_ERROR if the next token is not a #DSN_RIGHT 380 */ 381 void NeedRIGHT(); 382 383 /** 384 * Return the C string representation of a #DSN_T value. 385 */ 386 const char* GetTokenText( int aTok ) const; 387 388 /** 389 * Return a quote wrapped wxString representation of a token value. 390 */ 391 wxString GetTokenString( int aTok ) const; 392 393 static const char* Syntax( int aTok ); 394 395 /** 396 * Return a pointer to the current token's text. 397 */ CurText()398 const char* CurText() const 399 { 400 return curText.c_str(); 401 } 402 403 /** 404 * Return a reference to current token in std::string form. 405 */ CurStr()406 const std::string& CurStr() const 407 { 408 return curText; 409 } 410 411 /** 412 * Return the current token text as a wxString, assuming that the input byte stream 413 * is UTF8 encoded. 414 */ FromUTF8()415 wxString FromUTF8() const 416 { 417 return wxString::FromUTF8( curText.c_str() ); 418 } 419 420 /** 421 * Return the current line number within my #LINE_READER. 422 */ CurLineNumber()423 int CurLineNumber() const 424 { 425 return reader->LineNumber(); 426 } 427 428 /** 429 * Return the current line of text from which the #CurText() would return its token. 430 */ CurLine()431 const char* CurLine() const 432 { 433 return (const char*)(*reader); 434 } 435 436 /** 437 * Return the current #LINE_READER source. 438 * 439 * @return source of the lines of text, e.g. a filename or "clipboard". 440 */ CurSource()441 const wxString& CurSource() const 442 { 443 return reader->GetSource(); 444 } 445 446 /** 447 * Return the byte offset within the current line, using a 1 based index. 448 * 449 * @return a one based index into the current line. 450 */ CurOffset()451 int CurOffset() const 452 { 453 return curOffset + 1; 454 } 455 456 #ifndef SWIG 457 458 protected: 459 void init(); 460 readLine()461 int readLine() 462 { 463 if( reader ) 464 { 465 reader->ReadLine(); 466 467 unsigned len = reader->Length(); 468 469 // start may have changed in ReadLine(), which can resize and 470 // relocate reader's line buffer. 471 start = reader->Line(); 472 473 next = start; 474 limit = next + len; 475 476 return len; 477 } 478 return 0; 479 } 480 481 /** 482 * Take @a aToken string and looks up the string in the keywords table. 483 * 484 * @param aToken is a string to lookup in the keywords table. 485 * @return with a value from the enum #DSN_T matching the keyword text, 486 * or #DSN_SYMBOL if @a aToken is not in the keywords table. 487 */ 488 int findToken( const std::string& aToken ) const; 489 isStringTerminator(char cc)490 bool isStringTerminator( char cc ) const 491 { 492 if( !space_in_quoted_tokens && cc == ' ' ) 493 return true; 494 495 if( cc == stringDelimiter ) 496 return true; 497 498 return false; 499 } 500 501 bool iOwnReaders; ///< on readerStack, should I delete them? 502 const char* start; 503 const char* next; 504 const char* limit; 505 char dummy[1]; ///< when there is no reader. 506 507 typedef std::vector<LINE_READER*> READER_STACK; 508 509 READER_STACK readerStack; ///< all the LINE_READERs by pointer. 510 511 ///< no ownership. ownership is via readerStack, maybe, if iOwnReaders 512 LINE_READER* reader; 513 514 bool specctraMode; ///< if true, then: 515 ///< 1) stringDelimiter can be changed 516 ///< 2) Kicad quoting protocol is not in effect 517 ///< 3) space_in_quoted_tokens is functional 518 ///< else not. 519 520 char stringDelimiter; 521 bool space_in_quoted_tokens; ///< blank spaces within quoted strings 522 523 bool commentsAreTokens; ///< true if should return comments as tokens 524 525 int prevTok; ///< curTok from previous NextTok() call. 526 int curOffset; ///< offset within current line of the current token 527 528 int curTok; ///< the current token obtained on last NextTok() 529 std::string curText; ///< the text of the current token 530 531 const KEYWORD* keywords; ///< table sorted by CMake for bsearch() 532 unsigned keywordCount; ///< count of keywords table 533 KEYWORD_MAP keyword_hash; ///< fast, specialized "C string" hashtable 534 #endif // SWIG 535 }; 536 537 #endif // DSNLEXER_H_ 538