1 /***************************************************************************
2                           syntaxreader.h  -  description
3                              -------------------
4     begin                : Wed Nov 28 2001
5     copyright            : (C) 2001-2021 by Andre Simon
6     email                : a.simon@mailbox.org
7  ***************************************************************************/
8 
9 
10 /*
11 This file is part of Highlight.
12 
13 Highlight is free software: you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
17 
18 Highlight is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22 
23 You should have received a copy of the GNU General Public License
24 along with Highlight.  If not, see <http://www.gnu.org/licenses/>.
25 */
26 
27 
28 #ifndef SYNTAXREADER_H
29 #define SYNTAXREADER_H
30 
31 #include <vector>
32 #include <string>
33 #include <map>
34 #include <iostream>
35 #include <fstream>
36 #include <iterator>
37 #include <sstream>
38 
39 #include <Diluculum/LuaState.hpp>
40 #include <Diluculum/LuaVariable.hpp>
41 #include <Diluculum/LuaFunction.hpp>
42 
43 #include "regexelement.h"
44 #include "platform_fs.h"
45 #include "enums.h"
46 
47 #define GLOBAL_SR_INSTANCE_NAME "HL_SRInstance"
48 
49 using namespace std;
50 
51 
52 namespace highlight
53 {
54 class RegexElement;
55 
56 /** maps keywords and the corresponding class IDs*/
57 typedef map <string, int> KeywordMap;
58 
59 /** maps embedded language names to exit delimiter regexes*/
60 typedef map <string, string> DelimiterMap;
61 
62 typedef map <string, bool> AllowInnerSectionsMap;
63 
64 
65 /**\brief Contains specific data of the programming language being processed.
66 
67 * @author Andre  Simon
68 */
69 class SyntaxReader
70 {
71 
72 public:
73 
74     SyntaxReader();
75 
76     ~SyntaxReader();
77 
78 
79     /** Load new language definition
80         Will only read a new language definition if the given
81           file path is not equal to the path of the current language definition.
82         \param langDefPath Path of language definition
83         \param pluginReadFilePath path to file which is read by plugin
84         \param outputType output format
85         \return LoadResult  */
86     LoadResult load( const string& langDefPath, const string& pluginReadFilePath,  OutputType outputType );
87 
88     /** \return True if the next load() call would load a new language definition
89         \param  langDefPath Path to language definition  */
needsReload(const string & langDefPath)90     bool needsReload ( const string &langDefPath ) const
91     {
92         return currentPath!=langDefPath;
93     }
94 
95     /** \return Failed regular expression */
getFailedRegex()96     string getFailedRegex() const
97     {
98         return regexErrorMsg;
99     }
100 
101     /** \return Failed Lua exception description */
getLuaErrorText()102     string getLuaErrorText() const
103     {
104         return luaErrorMsg;
105     }
106 
107     /** \return Prefix of raw strings */
getRawStringPrefix()108     unsigned char getRawStringPrefix() const
109     {
110         return rawStringPrefix;
111     }
112 
113     /** \return Continuation Character */
getContinuationChar()114     unsigned char getContinuationChar() const
115     {
116         return continuationChar;
117     }
118 
119     /** \return true if syntax highlighting is enabled*/
highlightingEnabled()120     bool highlightingEnabled() const
121     {
122         return !disableHighlighting;
123     }
124 
125     /** \return True if language is case sensitive */
isIgnoreCase()126     bool isIgnoreCase() const
127     {
128         return ignoreCase;
129     }
130 
131     /** \param s String
132          \return true if s is not a known keyword */
133     bool isKeyword ( const string &s ) ;
134 
135     /** \param s String
136          \return keyword list group id */
137     int getKeywordListGroup ( const string &s );
138 
139     /** \return True if multi line comments may be nested */
allowNestedMLComments()140     bool allowNestedMLComments() const
141     {
142         return allowNestedComments;
143     }
144 
145     /** \return True if highlighting is disabled
146                             TODO remove method */
highlightingDisabled()147     bool highlightingDisabled() const
148     {
149         return disableHighlighting;
150     }
151 
152     /** \return True if current language may be reformatted (c, c++, c#, java) */
enableReformatting()153     bool enableReformatting() const
154     {
155         return reformatCode;
156     }
157 
assertDelimEqualLength()158     bool assertDelimEqualLength() const
159     {
160       return assertEqualLength;
161     }
162 
163     /** \return keywords*/
getKeywords()164     const KeywordMap& getKeywords() const
165     {
166         return keywords;
167     }
168 
169     /** \return keyword classes*/
getKeywordClasses()170     const vector<string>& getKeywordClasses() const
171     {
172         return keywordClasses;
173     }
174 
175     /** \return regular expressions */
getRegexElements()176     const vector<RegexElement*>& getRegexElements() const
177     {
178         return regex;
179     }
180 
181     /** \return list of Lua code snippets to be stored on disk */
getPersistentSnippets()182     const vector<string>& getPersistentSnippets() const
183     {
184         return persistentSnippets;
185     }
186 
187     /** \return number of Lua code snippets to be stored on disk */
getPersistentSnippetsNum()188     int getPersistentSnippetsNum() const
189     {
190         return persistentSnippets.size();
191     }
192 
193     /** \return list of format override flags defined in syntax definitions */
getOverrideStyleAttributes()194     vector<int>& getOverrideStyleAttributes()
195     {
196         return overrideStyles;
197     }
198 
199     /** \return description of the programming language */
getDescription()200     const string & getDescription () const
201     {
202         return langDesc;
203     }
204 
getCategoryDescription()205     const string & getCategoryDescription() const
206     {
207         return categories;
208     }
209 
210 
211     /** \return header string defined by a plug-in */
getHeaderInjection()212     const string & getHeaderInjection () const
213     {
214         return headerInjection;
215     }
216 
217     /** \return footer string defined by a plug-in */
getFooterInjection()218     const string & getFooterInjection () const
219     {
220         return footerInjection;
221     }
222 
223     /**  \param delimID delimiter id
224          \return true,  if no closing delimiter exists (open and close delimiters are equal)
225      */
delimiterIsDistinct(int delimID)226     bool delimiterIsDistinct ( int delimID )
227     {
228         return delimiterDistinct[delimID];
229     }
230 
231     /**  \param delimID delimiter id
232          \return true,  if delimiter indicates a raw string
233      */
delimiterIsRawString(int delimID)234     bool delimiterIsRawString ( int delimID )
235     {
236         return rawStringOpenDelims[delimID];
237     }
238 
239     /**  Pairs of open/close delimiters have a unique ID to test if two tokens act as delimiters
240          \param token delimiter token
241                      \param s State of delimiter
242          \return delimiter ID
243      */
244     int getOpenDelimiterID ( const string& token, State s);
245 
246     /**  Pairs of open/close delimiters have a unique ID to test if two tokens act as delimiters
247          \param token delimiter token
248                      \param s State of delimiter
249                      \param openDelimId opening delimiter retrieved with getOpenDelimiterID
250          \return true if delimiter id of token matches openDelimID
251      */
252     bool matchesOpenDelimiter ( const string& token, State s, int openDelimId);
253 
254     /** initializes end delimiter regex to switch back to host language
255     	\param langPath path of embedded language definition
256     */
257     void restoreLangEndDelim(const string&langPath);
258 
259     bool allowsInnerSection(const string& langPath);
260 
261     bool requiresTwoPassRun();
262 
263     bool requiresParamUpdate();
264 
265 
266     string getPersistentHookConditions();
267 
268     void clearPersistentSnippets();
269 
270     /**
271     	\param lang language definition name  (no path, no ".lang" extension)
272     	\return absolute path based on the previously loaded definition
273     */
274     string getNewPath(const string& lang);
275 
276     /**
277     	\return absolute path of currently loaded definition
278     */
getCurrentPath()279     string getCurrentPath() const
280     {
281         return currentPath;
282     }
283 
284     /**
285     	\return encoding which is normally used for input files of this syntax
286     */
getEncodingHint()287     string getEncodingHint() const
288     {
289         return encodingHint;
290     }
291 
292      /**
293     	\return test function
294     */
getOverrideConfigVal(const string & name)295     string getOverrideConfigVal(const string& name) const
296     {
297         return pluginConfigOverride.count(name) ? pluginConfigOverride[name] : "";
298     }
299 
300     /**
301     	\return pointer to state validation function
302     */
getValidateStateChangeFct()303     Diluculum::LuaFunction* getValidateStateChangeFct() const
304     {
305         return validateStateChangeFct;
306     }
307     /**
308     	\return pointer to state decorate function
309     */
getDecorateFct()310     Diluculum::LuaFunction* getDecorateFct() const
311     {
312         return decorateFct;
313     }
314 
315     /**
316     	\return pointer to line begin decorate function
317     */
getDecorateLineBeginFct()318     Diluculum::LuaFunction* getDecorateLineBeginFct() const
319     {
320         return decorateLineBeginFct;
321     }
322 
323     /**
324     	\return pointer to line end decorate function
325     */
getDecorateLineEndFct()326     Diluculum::LuaFunction* getDecorateLineEndFct() const
327     {
328         return decorateLineEndFct;
329     }
330 
331     /**
332     	\return pointer to Lua state
333     */
getLuaState()334     Diluculum::LuaState* getLuaState() const
335     {
336         return luaState;
337     }
338 
339     /**
340     	\param chunk Lua function to be added to the function list
341     */
addUserChunk(const Diluculum::LuaFunction & chunk)342     void addUserChunk(const Diluculum::LuaFunction& chunk)
343     {
344         pluginChunks.push_back(new Diluculum::LuaFunction(chunk));
345     }
346 
347     /**
348     	\param fn name of the processed input file
349     */
setInputFileName(const string & fn)350     void setInputFileName(const string& fn) { currentInputFile=fn; }
351 
352     /**
353     	\return name of the processed input file
354     */
355 
getInputFileName()356     string getInputFileName() const { return currentInputFile; }
357 
358     /**
359     	\param groupID keyword group to be stored on disk
360     	\param kw keyword token to be stored on disk
361     */
362 
363     void addPersistentKeyword(unsigned int groupID, const string& kw);
364 
365     /**
366     	\param groupID keyword group to be stored on disk
367     	\param column start of range within line
368     	\param length length of range
369     	\param lineNumber line number
370     	\param fileName file name of processed file containing the line
371     	*/
372     void addPersistentStateRange(unsigned int groupID, unsigned int column,unsigned int length, unsigned int lineNumber, const string& fileName);
373 
374     /**
375         \param ls Lua state to be initialized with constants
376         \param langDefPath absolute path of language definition
377         \param pluginReadFilePath absolute path of plugin input file
378     */
379     static void initLuaState(Diluculum::LuaState& ls, const string& langDefPath, const string& pluginReadFilePath, OutputType outputType=HTML );
380 
381     // generate a keyword class
382     unsigned int generateNewKWClass ( int classID, const char *prefix="kw" );
383 
384     int getKeywordCount() const;
385 
386 private:
387 
388     static const string REGEX_IDENTIFIER;
389     static const string REGEX_NUMBER;
390     static const string REGEX_ESCSEQ;
391 
392     // path to loaded language definition
393     string currentPath;
394 
395     // name of file being processed
396     string currentInputFile;
397 
398     // Language description
399     string langDesc, categories, encodingHint;
400 
401     string headerInjection, footerInjection;
402 
403     string regexErrorMsg, luaErrorMsg;
404 
405     KeywordMap keywords;
406 
407     vector <string> keywordClasses;
408     static vector <string> persistentSnippets;
409     static set <string> persistentSyntaxDescriptions;
410 
411     vector <RegexElement*> regex;
412 
413     vector <int>overrideStyles;
414 
415     // collect delimiters or get current delimiter in CodeGenerator::loadEmbeddedLang
416     static DelimiterMap nestedStateEndDelimiters;
417 
418     static DelimiterMap pluginConfigOverride;
419 
420     static AllowInnerSectionsMap allowInnerSections;
421 
422     // saves if delimiter pair consists of the same delimiter symbol
423     map <int, bool> delimiterDistinct;
424 
425     map <int, bool> rawStringOpenDelims;
426 
427     map <int, int> matchingDelimiters;
428 
429     // keywords are not case sensitive if set
430     bool ignoreCase,
431 
432          // highlighting is disabled
433          disableHighlighting,
434 
435          // allow nested multi line comment blocks
436          allowNestedComments,
437 
438          // code formatting is enabled if set
439          reformatCode,
440 
441          // string open and close delimiters must have the same length
442          assertEqualLength,
443 
444          paramsNeedUpdate;
445 
446     // character which is prefix of raw string (c#)
447     unsigned char rawStringPrefix;
448 
449     //character which continues current style on next line
450     unsigned char continuationChar;
451 
452     int keywordCount;
453 
454     bool readFlag(const Diluculum::LuaVariable& var) ;
455 
456     // interface for plug-ins: add keywords dynamically
457     static int luaAddKeyword (lua_State *L);
458 
459     // interface for plug-ins: remove keywords dynamically
460     static int luaRemoveKeyword (lua_State *L);
461 
462     static int luaAddPersistentState (lua_State *L);
463 
464     static int luaOverrideParam (lua_State *L);
465 
466 
467 
468     void addKeyword(unsigned int groupID, const string& kw);
469 
470     void removeKeyword(const string& kw);
471 
472     void overrideParam(const string& name, const string& val);
473 
474 
475     // Functions accessible in Lua State
476     Diluculum::LuaFunction* validateStateChangeFct;
477     Diluculum::LuaFunction* decorateFct, *decorateLineBeginFct, *decorateLineEndFct;
478 
479     Diluculum::LuaState* luaState; // make member to allow interaction with codeparser instance
480 
481     static vector<Diluculum::LuaFunction*> pluginChunks;
482 };
483 
484 }
485 #endif
486