1 /*************************************************************************** 2 syntaxreader.h - description 3 ------------------- 4 begin : Wed Nov 28 2001 5 copyright : (C) 2001-2021 by Andre Simon 6 email : a.simon@mailbox.org 7 ***************************************************************************/ 8 9 10 /* 11 This file is part of Highlight. 12 13 Highlight is free software: you can redistribute it and/or modify 14 it under the terms of the GNU General Public License as published by 15 the Free Software Foundation, either version 3 of the License, or 16 (at your option) any later version. 17 18 Highlight is distributed in the hope that it will be useful, 19 but WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 GNU General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with Highlight. If not, see <http://www.gnu.org/licenses/>. 25 */ 26 27 28 #ifndef SYNTAXREADER_H 29 #define SYNTAXREADER_H 30 31 #include <vector> 32 #include <string> 33 #include <map> 34 #include <iostream> 35 #include <fstream> 36 #include <iterator> 37 #include <sstream> 38 39 #include <Diluculum/LuaState.hpp> 40 #include <Diluculum/LuaVariable.hpp> 41 #include <Diluculum/LuaFunction.hpp> 42 43 #include "regexelement.h" 44 #include "platform_fs.h" 45 #include "enums.h" 46 47 #define GLOBAL_SR_INSTANCE_NAME "HL_SRInstance" 48 49 using namespace std; 50 51 52 namespace highlight 53 { 54 class RegexElement; 55 56 /** maps keywords and the corresponding class IDs*/ 57 typedef map <string, int> KeywordMap; 58 59 /** maps embedded language names to exit delimiter regexes*/ 60 typedef map <string, string> DelimiterMap; 61 62 typedef map <string, bool> AllowInnerSectionsMap; 63 64 65 /**\brief Contains specific data of the programming language being processed. 66 67 * @author Andre Simon 68 */ 69 class SyntaxReader 70 { 71 72 public: 73 74 SyntaxReader(); 75 76 ~SyntaxReader(); 77 78 79 /** Load new language definition 80 Will only read a new language definition if the given 81 file path is not equal to the path of the current language definition. 82 \param langDefPath Path of language definition 83 \param pluginReadFilePath path to file which is read by plugin 84 \param outputType output format 85 \return LoadResult */ 86 LoadResult load( const string& langDefPath, const string& pluginReadFilePath, OutputType outputType ); 87 88 /** \return True if the next load() call would load a new language definition 89 \param langDefPath Path to language definition */ needsReload(const string & langDefPath)90 bool needsReload ( const string &langDefPath ) const 91 { 92 return currentPath!=langDefPath; 93 } 94 95 /** \return Failed regular expression */ getFailedRegex()96 string getFailedRegex() const 97 { 98 return regexErrorMsg; 99 } 100 101 /** \return Failed Lua exception description */ getLuaErrorText()102 string getLuaErrorText() const 103 { 104 return luaErrorMsg; 105 } 106 107 /** \return Prefix of raw strings */ getRawStringPrefix()108 unsigned char getRawStringPrefix() const 109 { 110 return rawStringPrefix; 111 } 112 113 /** \return Continuation Character */ getContinuationChar()114 unsigned char getContinuationChar() const 115 { 116 return continuationChar; 117 } 118 119 /** \return true if syntax highlighting is enabled*/ highlightingEnabled()120 bool highlightingEnabled() const 121 { 122 return !disableHighlighting; 123 } 124 125 /** \return True if language is case sensitive */ isIgnoreCase()126 bool isIgnoreCase() const 127 { 128 return ignoreCase; 129 } 130 131 /** \param s String 132 \return true if s is not a known keyword */ 133 bool isKeyword ( const string &s ) ; 134 135 /** \param s String 136 \return keyword list group id */ 137 int getKeywordListGroup ( const string &s ); 138 139 /** \return True if multi line comments may be nested */ allowNestedMLComments()140 bool allowNestedMLComments() const 141 { 142 return allowNestedComments; 143 } 144 145 /** \return True if highlighting is disabled 146 TODO remove method */ highlightingDisabled()147 bool highlightingDisabled() const 148 { 149 return disableHighlighting; 150 } 151 152 /** \return True if current language may be reformatted (c, c++, c#, java) */ enableReformatting()153 bool enableReformatting() const 154 { 155 return reformatCode; 156 } 157 assertDelimEqualLength()158 bool assertDelimEqualLength() const 159 { 160 return assertEqualLength; 161 } 162 163 /** \return keywords*/ getKeywords()164 const KeywordMap& getKeywords() const 165 { 166 return keywords; 167 } 168 169 /** \return keyword classes*/ getKeywordClasses()170 const vector<string>& getKeywordClasses() const 171 { 172 return keywordClasses; 173 } 174 175 /** \return regular expressions */ getRegexElements()176 const vector<RegexElement*>& getRegexElements() const 177 { 178 return regex; 179 } 180 181 /** \return list of Lua code snippets to be stored on disk */ getPersistentSnippets()182 const vector<string>& getPersistentSnippets() const 183 { 184 return persistentSnippets; 185 } 186 187 /** \return number of Lua code snippets to be stored on disk */ getPersistentSnippetsNum()188 int getPersistentSnippetsNum() const 189 { 190 return persistentSnippets.size(); 191 } 192 193 /** \return list of format override flags defined in syntax definitions */ getOverrideStyleAttributes()194 vector<int>& getOverrideStyleAttributes() 195 { 196 return overrideStyles; 197 } 198 199 /** \return description of the programming language */ getDescription()200 const string & getDescription () const 201 { 202 return langDesc; 203 } 204 getCategoryDescription()205 const string & getCategoryDescription() const 206 { 207 return categories; 208 } 209 210 211 /** \return header string defined by a plug-in */ getHeaderInjection()212 const string & getHeaderInjection () const 213 { 214 return headerInjection; 215 } 216 217 /** \return footer string defined by a plug-in */ getFooterInjection()218 const string & getFooterInjection () const 219 { 220 return footerInjection; 221 } 222 223 /** \param delimID delimiter id 224 \return true, if no closing delimiter exists (open and close delimiters are equal) 225 */ delimiterIsDistinct(int delimID)226 bool delimiterIsDistinct ( int delimID ) 227 { 228 return delimiterDistinct[delimID]; 229 } 230 231 /** \param delimID delimiter id 232 \return true, if delimiter indicates a raw string 233 */ delimiterIsRawString(int delimID)234 bool delimiterIsRawString ( int delimID ) 235 { 236 return rawStringOpenDelims[delimID]; 237 } 238 239 /** Pairs of open/close delimiters have a unique ID to test if two tokens act as delimiters 240 \param token delimiter token 241 \param s State of delimiter 242 \return delimiter ID 243 */ 244 int getOpenDelimiterID ( const string& token, State s); 245 246 /** Pairs of open/close delimiters have a unique ID to test if two tokens act as delimiters 247 \param token delimiter token 248 \param s State of delimiter 249 \param openDelimId opening delimiter retrieved with getOpenDelimiterID 250 \return true if delimiter id of token matches openDelimID 251 */ 252 bool matchesOpenDelimiter ( const string& token, State s, int openDelimId); 253 254 /** initializes end delimiter regex to switch back to host language 255 \param langPath path of embedded language definition 256 */ 257 void restoreLangEndDelim(const string&langPath); 258 259 bool allowsInnerSection(const string& langPath); 260 261 bool requiresTwoPassRun(); 262 263 bool requiresParamUpdate(); 264 265 266 string getPersistentHookConditions(); 267 268 void clearPersistentSnippets(); 269 270 /** 271 \param lang language definition name (no path, no ".lang" extension) 272 \return absolute path based on the previously loaded definition 273 */ 274 string getNewPath(const string& lang); 275 276 /** 277 \return absolute path of currently loaded definition 278 */ getCurrentPath()279 string getCurrentPath() const 280 { 281 return currentPath; 282 } 283 284 /** 285 \return encoding which is normally used for input files of this syntax 286 */ getEncodingHint()287 string getEncodingHint() const 288 { 289 return encodingHint; 290 } 291 292 /** 293 \return test function 294 */ getOverrideConfigVal(const string & name)295 string getOverrideConfigVal(const string& name) const 296 { 297 return pluginConfigOverride.count(name) ? pluginConfigOverride[name] : ""; 298 } 299 300 /** 301 \return pointer to state validation function 302 */ getValidateStateChangeFct()303 Diluculum::LuaFunction* getValidateStateChangeFct() const 304 { 305 return validateStateChangeFct; 306 } 307 /** 308 \return pointer to state decorate function 309 */ getDecorateFct()310 Diluculum::LuaFunction* getDecorateFct() const 311 { 312 return decorateFct; 313 } 314 315 /** 316 \return pointer to line begin decorate function 317 */ getDecorateLineBeginFct()318 Diluculum::LuaFunction* getDecorateLineBeginFct() const 319 { 320 return decorateLineBeginFct; 321 } 322 323 /** 324 \return pointer to line end decorate function 325 */ getDecorateLineEndFct()326 Diluculum::LuaFunction* getDecorateLineEndFct() const 327 { 328 return decorateLineEndFct; 329 } 330 331 /** 332 \return pointer to Lua state 333 */ getLuaState()334 Diluculum::LuaState* getLuaState() const 335 { 336 return luaState; 337 } 338 339 /** 340 \param chunk Lua function to be added to the function list 341 */ addUserChunk(const Diluculum::LuaFunction & chunk)342 void addUserChunk(const Diluculum::LuaFunction& chunk) 343 { 344 pluginChunks.push_back(new Diluculum::LuaFunction(chunk)); 345 } 346 347 /** 348 \param fn name of the processed input file 349 */ setInputFileName(const string & fn)350 void setInputFileName(const string& fn) { currentInputFile=fn; } 351 352 /** 353 \return name of the processed input file 354 */ 355 getInputFileName()356 string getInputFileName() const { return currentInputFile; } 357 358 /** 359 \param groupID keyword group to be stored on disk 360 \param kw keyword token to be stored on disk 361 */ 362 363 void addPersistentKeyword(unsigned int groupID, const string& kw); 364 365 /** 366 \param groupID keyword group to be stored on disk 367 \param column start of range within line 368 \param length length of range 369 \param lineNumber line number 370 \param fileName file name of processed file containing the line 371 */ 372 void addPersistentStateRange(unsigned int groupID, unsigned int column,unsigned int length, unsigned int lineNumber, const string& fileName); 373 374 /** 375 \param ls Lua state to be initialized with constants 376 \param langDefPath absolute path of language definition 377 \param pluginReadFilePath absolute path of plugin input file 378 */ 379 static void initLuaState(Diluculum::LuaState& ls, const string& langDefPath, const string& pluginReadFilePath, OutputType outputType=HTML ); 380 381 // generate a keyword class 382 unsigned int generateNewKWClass ( int classID, const char *prefix="kw" ); 383 384 int getKeywordCount() const; 385 386 private: 387 388 static const string REGEX_IDENTIFIER; 389 static const string REGEX_NUMBER; 390 static const string REGEX_ESCSEQ; 391 392 // path to loaded language definition 393 string currentPath; 394 395 // name of file being processed 396 string currentInputFile; 397 398 // Language description 399 string langDesc, categories, encodingHint; 400 401 string headerInjection, footerInjection; 402 403 string regexErrorMsg, luaErrorMsg; 404 405 KeywordMap keywords; 406 407 vector <string> keywordClasses; 408 static vector <string> persistentSnippets; 409 static set <string> persistentSyntaxDescriptions; 410 411 vector <RegexElement*> regex; 412 413 vector <int>overrideStyles; 414 415 // collect delimiters or get current delimiter in CodeGenerator::loadEmbeddedLang 416 static DelimiterMap nestedStateEndDelimiters; 417 418 static DelimiterMap pluginConfigOverride; 419 420 static AllowInnerSectionsMap allowInnerSections; 421 422 // saves if delimiter pair consists of the same delimiter symbol 423 map <int, bool> delimiterDistinct; 424 425 map <int, bool> rawStringOpenDelims; 426 427 map <int, int> matchingDelimiters; 428 429 // keywords are not case sensitive if set 430 bool ignoreCase, 431 432 // highlighting is disabled 433 disableHighlighting, 434 435 // allow nested multi line comment blocks 436 allowNestedComments, 437 438 // code formatting is enabled if set 439 reformatCode, 440 441 // string open and close delimiters must have the same length 442 assertEqualLength, 443 444 paramsNeedUpdate; 445 446 // character which is prefix of raw string (c#) 447 unsigned char rawStringPrefix; 448 449 //character which continues current style on next line 450 unsigned char continuationChar; 451 452 int keywordCount; 453 454 bool readFlag(const Diluculum::LuaVariable& var) ; 455 456 // interface for plug-ins: add keywords dynamically 457 static int luaAddKeyword (lua_State *L); 458 459 // interface for plug-ins: remove keywords dynamically 460 static int luaRemoveKeyword (lua_State *L); 461 462 static int luaAddPersistentState (lua_State *L); 463 464 static int luaOverrideParam (lua_State *L); 465 466 467 468 void addKeyword(unsigned int groupID, const string& kw); 469 470 void removeKeyword(const string& kw); 471 472 void overrideParam(const string& name, const string& val); 473 474 475 // Functions accessible in Lua State 476 Diluculum::LuaFunction* validateStateChangeFct; 477 Diluculum::LuaFunction* decorateFct, *decorateLineBeginFct, *decorateLineEndFct; 478 479 Diluculum::LuaState* luaState; // make member to allow interaction with codeparser instance 480 481 static vector<Diluculum::LuaFunction*> pluginChunks; 482 }; 483 484 } 485 #endif 486