1 /***************************************************************************
2                           codegenerator.cpp  -  description
3                              -------------------
4     begin                : Die Jul 9 2002
5     copyright            : (C) 2002-2021 by Andre Simon
6     email                : a.simon@mailbox.org
7  ***************************************************************************/
8 
9 
10 /*
11 This file is part of Highlight.
12 
13 Highlight is free software: you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
17 
18 Highlight is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22 
23 You should have received a copy of the GNU General Public License
24 along with Highlight.  If not, see <http://www.gnu.org/licenses/>.
25 */
26 
27 
28 #include <climits>
29 #include <memory>
30 
31 #include <chrono>
32 #include <thread>
33 
34 
35 #include <boost/xpressive/xpressive_dynamic.hpp>
36 
37 #include "codegenerator.h"
38 
39 #include "htmlgenerator.h"
40 #include "xhtmlgenerator.h"
41 #include "rtfgenerator.h"
42 #include "latexgenerator.h"
43 #include "texgenerator.h"
44 #include "svggenerator.h"
45 #include "bbcodegenerator.h"
46 #include "pangogenerator.h"
47 #include "odtgenerator.h"
48 #include "astyle/astyle.h"
49 
50 #if !defined (QT)
51 #include "ansigenerator.h"
52 #include "xterm256generator.h"
53 #endif
54 
55 namespace highlight
56 {
57 const unsigned int CodeGenerator::NUMBER_BUILTIN_STATES = highlight::KEYWORD;
58 
59 // must not start with kw, st, sm prefixes
60 const string CodeGenerator::STY_NAME_STD="def";
61 const string CodeGenerator::STY_NAME_STR="sng";
62 const string CodeGenerator::STY_NAME_NUM="num";
63 const string CodeGenerator::STY_NAME_SLC="slc";
64 const string CodeGenerator::STY_NAME_COM="com";
65 const string CodeGenerator::STY_NAME_ESC="esc";
66 const string CodeGenerator::STY_NAME_DIR="ppc"; //preprocessor
67 const string CodeGenerator::STY_NAME_DST="pps"; //preprocessor string
68 const string CodeGenerator::STY_NAME_LIN="lin";
69 const string CodeGenerator::STY_NAME_SYM="opt"; //operator
70 const string CodeGenerator::STY_NAME_IPL="ipl"; //interpolation
71 
72 const string CodeGenerator::STY_NAME_HVR="hvr";
73 const string CodeGenerator::STY_NAME_ERR="err";
74 const string CodeGenerator::STY_NAME_ERM="erm";
75 
76 vector<Diluculum::LuaFunction*> CodeGenerator::pluginChunks;
77 
78 
getInstance(OutputType type)79 CodeGenerator * CodeGenerator::getInstance ( OutputType type )
80 {
81     CodeGenerator* generator=NULL;
82     switch ( type ) {
83     case HTML:
84         generator = new HtmlGenerator();
85         break;
86     case XHTML:
87         generator = new XHtmlGenerator();
88         break;
89     case TEX:
90         generator = new TexGenerator ();
91         break;
92     case LATEX:
93         generator = new LatexGenerator();
94         break;
95     case RTF:
96         generator = new RtfGenerator ();
97         break;
98     case SVG:
99         generator = new SVGGenerator();
100         break;
101     case BBCODE:
102         generator = new BBCodeGenerator();
103         break;
104     case PANGO:
105         generator = new PangoGenerator();
106         break;
107     case ODTFLAT:
108         generator = new ODTGenerator();
109         break;
110     case ESC_ANSI:
111         generator = new AnsiGenerator();
112         break;
113     case ESC_XTERM256:
114     case ESC_TRUECOLOR:
115         generator = new Xterm256Generator();
116         generator->setESCTrueColor(type==ESC_TRUECOLOR);
117         break;
118     default:
119         break;
120     }
121     return generator;
122 }
123 
124 
CodeGenerator(highlight::OutputType type)125 CodeGenerator::CodeGenerator ( highlight::OutputType type )
126     :currentSyntax(NULL),
127      in ( NULL ),
128      out ( NULL ),
129      encoding ( "none" ),
130      docTitle ( "Source file" ),
131      maskWs ( false ),
132      excludeWs ( false ),
133      fragmentOutput ( false ),
134      keepInjections( false ),
135      showLineNumbers ( false ),
136      lineNumberFillZeroes ( false ),
137      printNewLines(true),
138      omitVersionComment(false),
139      isolateTags(false),
140      disableStyleCache(false),
141      baseFontSize("10"),
142      lineNumber ( 0 ),
143      lineNumberOffset ( 0 ),
144      currentState ( _UNKNOWN ),
145      currentKeywordClass ( 0 ),
146      includeStyleDef ( false ),
147      numberCurrentLine ( false ),
148      lineIndex ( 0 ),
149      lastLineLength( 0 ),
150      syntaxChangeIndex(UINT_MAX),
151      syntaxChangeLineNo(UINT_MAX),
152      lineNumberWidth ( 5 ),
153      startLineCnt( 1 ),
154      startLineCntCurFile( 1 ),
155      maxLineCnt ( UINT_MAX ),
156      inputFilesCnt (0),
157      processedFilesCnt (0),
158      kwOffset(0),
159      noTrailingNewLine(0),
160 
161      terminatingChar ( '\0' ),
162      formatter ( NULL ),
163      streamIterator ( NULL ),
164      formattingEnabled ( false ),
165      formattingPossible ( false ),
166      validateInput ( false ),
167      numberWrappedLines ( true ),
168      resultOfHook(false),
169      lineContainedTestCase(false),
170      lineContainedStmt(false),
171      applySyntaxTestCase(false),
172      toggleDynRawString(false),
173      lsEnableHoverRequests(false),
174      lsCheckSemanticTokens(false),
175      lsCheckSyntaxErrors(false),
176 
177      keywordCase ( StringTools::CASE_UNCHANGED ),
178      eolDelimiter ('\n'),
179      outputType ( type )
180 {
181 }
182 
183 
~CodeGenerator()184 CodeGenerator::~CodeGenerator()
185 {
186     delete formatter;
187     delete streamIterator;
188 
189     resetSyntaxReaders();
190 
191     for (unsigned int i=0; i<pluginChunks.size(); i++) {
192         delete pluginChunks[i];
193     }
194     pluginChunks.clear();
195 }
196 
197 
initTheme(const string & themePath,bool loadSemanticStyles)198 bool CodeGenerator::initTheme ( const string& themePath, bool loadSemanticStyles)
199 {
200     this->themePath=themePath;
201     bool loadOK = docStyle.load ( themePath, outputType, loadSemanticStyles );
202     initOutputTags();
203     return loadOK;
204 }
205 
initLanguageServer(const string & executable,const vector<string> & options,const string & workspace,const string & syntax,int delay,int logLevel)206 LSResult CodeGenerator::initLanguageServer ( const string& executable, const vector<string> &options,
207                                              const string& workspace, const string& syntax,
208                                              int delay, int logLevel )
209 {
210     if (LSPClient.isInitialized()) {
211         return LSResult::INIT_OK;
212     }
213 
214     LSPClient.setLogging(logLevel>1);
215 
216     LSPClient.setExecutable(executable);
217     LSPClient.setWorkspace(workspace);
218     LSPClient.setOptions(options);
219     LSPClient.setSyntax(syntax);
220     LSPClient.setInitializeDelay(delay);
221     if (!LSPClient.connect()){
222         return LSResult::INIT_BAD_PIPE;
223     }
224 
225     if (!LSPClient.runInitialize()){
226         return LSResult::INIT_BAD_REQUEST;
227     }
228     for (int i=0; i<docStyle.getSemanticTokenStyleCount();i++) {
229         currentSyntax->generateNewKWClass(i+1, "st");
230     }
231     LSPClient.runInitialized();
232     updateKeywordClasses();
233     return LSResult::INIT_OK;
234 }
235 
lsOpenDocument(const string & fileName,const string & suffix)236 bool CodeGenerator::lsOpenDocument(const string& fileName, const string & suffix){
237     lsDocumentPath = fileName;
238     return LSPClient.runDidOpen(fileName, suffix);
239 }
240 
lsCloseDocument(const string & fileName,const string & suffix)241 bool CodeGenerator::lsCloseDocument(const string& fileName, const string & suffix){
242     lsDocumentPath.clear();
243     return LSPClient.runDidClose(fileName, suffix);
244 }
245 
lsAddSemanticInfo(const string & fileName,const string & suffix)246 bool CodeGenerator::lsAddSemanticInfo(const string& fileName, const string & suffix){
247     lsCheckSemanticTokens = LSPClient.runSemanticTokensFull(fileName);
248     return lsCheckSemanticTokens;
249 }
250 
isHoverProvider()251 bool CodeGenerator::isHoverProvider(){
252     return LSPClient.isHoverProvider();
253 }
254 
isSemanticTokensProvider()255 bool CodeGenerator::isSemanticTokensProvider(){
256     return LSPClient.isSemanticTokensProvider();
257 }
258 
lsAddHoverInfo(bool hover)259 void CodeGenerator::lsAddHoverInfo(bool hover){
260     lsEnableHoverRequests = hover;
261 }
262 
lsAddSyntaxErrorInfo(bool error)263 void CodeGenerator::lsAddSyntaxErrorInfo(bool error) {
264     lsCheckSyntaxErrors = error;;
265 }
266 
267 
exitLanguageServer()268 void CodeGenerator::exitLanguageServer () {
269     LSPClient.runShutdown();
270     LSPClient.runExit();
271 }
272 
getStyleName()273 const string& CodeGenerator::getStyleName()
274 {
275     return themePath;
276 }
277 
setLineNumberWidth(int w)278 void CodeGenerator::setLineNumberWidth ( int w )
279 {
280     lineNumberWidth=w;
281 }
282 
getLineNumberWidth()283 int CodeGenerator::getLineNumberWidth()
284 {
285     return lineNumberWidth;
286 }
287 
setPrintLineNumbers(bool flag,unsigned int startCnt)288 void CodeGenerator::setPrintLineNumbers ( bool flag, unsigned int startCnt )
289 {
290     showLineNumbers=flag;
291     lineNumberOffset = startCnt-1;
292 }
293 
getPrintLineNumbers()294 bool CodeGenerator::getPrintLineNumbers()
295 {
296     return showLineNumbers;
297 }
298 
setPrintZeroes(bool flag)299 void CodeGenerator::setPrintZeroes ( bool flag )
300 {
301     lineNumberFillZeroes=flag;
302 }
303 
getPrintZeroes()304 bool CodeGenerator::getPrintZeroes()
305 {
306     return lineNumberFillZeroes;
307 }
308 
setIncludeStyle(bool flag)309 void CodeGenerator::setIncludeStyle ( bool flag )
310 {
311     includeStyleDef = flag;
312 }
313 
disableTrailingNL(int flag)314 void CodeGenerator::disableTrailingNL ( int flag )
315 {
316     noTrailingNewLine = flag;
317 }
318 
setStyleInputPath(const string & path)319 void CodeGenerator::setStyleInputPath ( const string& path )
320 {
321     styleInputPath = path;
322 }
323 
setStyleOutputPath(const string & path)324 void CodeGenerator::setStyleOutputPath ( const string& path )
325 {
326     styleOutputPath = path;
327 }
328 
setPluginParameter(const string & param)329 void CodeGenerator::setPluginParameter ( const string& param )
330 {
331     pluginParameter = param;
332 }
333 
getStyleInputPath()334 const string&  CodeGenerator::getStyleInputPath()
335 {
336     return styleInputPath;
337 }
338 
getStyleOutputPath()339 const string&  CodeGenerator::getStyleOutputPath()
340 {
341     return styleOutputPath;
342 }
343 
setFragmentCode(bool flag)344 void CodeGenerator::setFragmentCode ( bool flag )
345 {
346     fragmentOutput=flag;
347 }
348 
getFragmentCode()349 bool CodeGenerator::getFragmentCode()
350 {
351     return fragmentOutput;
352 }
setKeepInjections(bool flag)353 void CodeGenerator::setKeepInjections ( bool flag )
354 {
355     keepInjections=flag;
356 }
357 
getKeepInjections()358 bool CodeGenerator::getKeepInjections()
359 {
360     return keepInjections;
361 }
setValidateInput(bool flag)362 void CodeGenerator::setValidateInput ( bool flag )
363 {
364     validateInput=flag;
365 }
366 
getValidateInput()367 bool CodeGenerator::getValidateInput()
368 {
369     return validateInput;
370 }
371 
setNumberWrappedLines(bool flag)372 void CodeGenerator::setNumberWrappedLines ( bool flag )
373 {
374     numberWrappedLines=flag;
375 }
376 
getNumberWrappedLines()377 bool CodeGenerator::getNumberWrappedLines()
378 {
379     return numberWrappedLines;
380 }
381 
setOmitVersionComment(bool flag)382 void CodeGenerator::setOmitVersionComment ( bool flag )
383 {
384     omitVersionComment=flag;
385 }
386 
getOmitVersionComment()387 bool CodeGenerator::getOmitVersionComment ()
388 {
389     return omitVersionComment;
390 }
391 
setIsolateTags(bool flag)392 void CodeGenerator::setIsolateTags ( bool flag )
393 {
394     isolateTags=flag;
395 }
396 
getIsolateTags()397 bool CodeGenerator::getIsolateTags ()
398 {
399     return isolateTags;
400 }
401 
setBaseFont(const string & fontName)402 void CodeGenerator::setBaseFont ( const string& fontName )
403 {
404     baseFont = fontName;
405 }
406 
setBaseFontSize(const string & fontSize)407 void CodeGenerator::setBaseFontSize ( const string& fontSize)
408 {
409     baseFontSize = fontSize;
410 }
411 
setStyleCaching(bool flag)412 void CodeGenerator::setStyleCaching ( bool flag )
413 {
414     disableStyleCache=!flag;
415 }
416 
getBaseFont() const417 const string CodeGenerator::getBaseFont() const
418 {
419     if ( !baseFont.empty() ) return baseFont;
420     switch ( outputType ) {
421     case HTML:
422     case XHTML:
423     case SVG:
424         return "'Courier New',monospace";
425         break;
426     case LATEX:
427         return "ttfamily";
428         break;
429     case TEX:
430         return "tt";
431         break;
432     default:
433         return "Courier New";
434     }
435 }
436 
getBaseFontSize()437 const string CodeGenerator::getBaseFontSize()
438 {
439     return baseFontSize;
440 }
441 
setTitle(const string & title)442 void CodeGenerator::setTitle ( const string & title )
443 {
444     if ( !title.empty() ) docTitle= title;
445 }
446 
getTitle()447 string CodeGenerator::getTitle()
448 {
449     return docTitle;
450 }
451 
setEncoding(const string & encodingName)452 void CodeGenerator::setEncoding ( const string& encodingName )
453 {
454     encoding = encodingName;
455 }
456 
formattingDisabled()457 bool CodeGenerator::formattingDisabled()
458 {
459     return !formattingEnabled;
460 }
461 
setStartingInputLine(unsigned int begin)462 void CodeGenerator::setStartingInputLine ( unsigned int begin )
463 {
464     startLineCnt = startLineCntCurFile = begin;
465 }
466 
setMaxInputLineCnt(unsigned int cnt)467 void CodeGenerator::setMaxInputLineCnt ( unsigned int cnt )
468 {
469     maxLineCnt = cnt;
470 }
471 
setFilesCnt(unsigned int cnt)472 void CodeGenerator::setFilesCnt ( unsigned int cnt )
473 {
474     inputFilesCnt = cnt;
475     processedFilesCnt = 0;
476 }
477 
formattingIsPossible()478 bool CodeGenerator::formattingIsPossible()
479 {
480     return formattingPossible;
481 }
482 
setPreformatting(WrapMode lineWrappingStyle,unsigned int lineLength,int numberSpaces)483 void CodeGenerator::setPreformatting ( WrapMode lineWrappingStyle,
484                                        unsigned int lineLength,
485                                        int numberSpaces )
486 {
487     bool enableWrap = lineWrappingStyle!=WRAP_DISABLED;
488     bool replaceTabs = numberSpaces > 0;
489 
490     if ( enableWrap || replaceTabs ) {
491         preFormatter.setWrap ( enableWrap );
492         preFormatter.setWrapIndentBraces ( lineWrappingStyle==WRAP_DEFAULT );
493         preFormatter.setWrapLineLength ( lineLength );
494         preFormatter.setReplaceTabs ( replaceTabs );
495         preFormatter.setNumberSpaces ( numberSpaces );
496     }
497 }
498 
setKeyWordCase(StringTools::KeywordCase keyCase)499 void CodeGenerator::setKeyWordCase ( StringTools::KeywordCase keyCase )
500 {
501     keywordCase = keyCase;
502 }
503 
setEOLDelimiter(char delim)504 void CodeGenerator::setEOLDelimiter(char delim)
505 {
506     eolDelimiter = delim;
507 }
508 
reset()509 void CodeGenerator::reset()
510 {
511     lineIndex = 0;
512     lineNumber = 0;
513     line.clear();
514     preFormatter.reset();
515     inFile.clear();
516     outFile.clear();
517     embedLangDefPath.clear();
518     printNewLines=true;
519     syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
520     startLineCntCurFile = startLineCnt;
521     applySyntaxTestCase=lineContainedTestCase=false;
522     if (currentSyntax){
523         vector<int> overrideStyleAttrs=currentSyntax->getOverrideStyleAttributes();
524         docStyle.overrideAttributes(overrideStyleAttrs);
525         if (overrideStyleAttrs.size())
526             disableStyleCache = true;
527     }
528 }
529 
getThemeInitError()530 string CodeGenerator::getThemeInitError()
531 {
532     return docStyle.getErrorMessage();
533 }
534 
getPluginScriptError()535 string CodeGenerator::getPluginScriptError()
536 {
537     return userScriptError;
538 }
539 
getSyntaxRegexError()540 string CodeGenerator::getSyntaxRegexError()
541 {
542     return (currentSyntax)? currentSyntax->getFailedRegex(): "syntax undef";
543 }
getSyntaxLuaError()544 string CodeGenerator::getSyntaxLuaError()
545 {
546     return (currentSyntax)? currentSyntax->getLuaErrorText(): "syntax undef";
547 
548 }
getSyntaxDescription()549 string CodeGenerator::getSyntaxDescription()
550 {
551     return (currentSyntax)? currentSyntax->getDescription(): "syntax undef";
552 
553 }
getSyntaxEncodingHint()554 string CodeGenerator::getSyntaxEncodingHint()
555 {
556     return (currentSyntax)? currentSyntax->getEncodingHint(): "";
557 
558 }
getThemeDescription()559 string CodeGenerator::getThemeDescription()
560 {
561     return docStyle.getDescription();
562 }
563 
getSyntaxCatDescription()564 string CodeGenerator::getSyntaxCatDescription(){
565     return (currentSyntax)? currentSyntax->getCategoryDescription(): "";
566 }
567 
getThemeCatDescription()568 string CodeGenerator::getThemeCatDescription()
569 {
570     return docStyle.getCategoryDescription();
571 }
572 
getThemeContrast()573 float CodeGenerator::getThemeContrast()
574 {
575     return docStyle.getContrast();
576 }
577 
getLineNumber()578 unsigned int CodeGenerator::getLineNumber()
579 {
580     return lineNumber;
581 }
582 
readNewLine(string & newLine)583 bool CodeGenerator::readNewLine ( string &newLine )
584 {
585     bool eof=false;
586 
587     if ( lineIndex ) terminatingChar=newLine[lineIndex-1];
588 
589     while (!eof && startLineCntCurFile>0) {
590         if ( formattingPossible && formattingEnabled ) {
591             eof=!formatter->hasMoreLines();
592             if ( !eof ) {
593                 newLine = formatter->nextLine();
594             }
595         } else {
596             eof = ! getline ( *in, newLine, eolDelimiter );
597         }
598         --startLineCntCurFile;
599     }
600     startLineCntCurFile=1;
601 #ifndef _WIN32
602     // drop CR of CRLF files
603     if (!newLine.empty() && newLine[newLine.size() - 1] == '\r')
604         newLine.erase(newLine.size() - 1);
605 #endif
606 
607     return eof || ( lineNumber == maxLineCnt );
608 }
609 
matchRegex(const string & line,State skipState)610 void CodeGenerator::matchRegex ( const string &line, State skipState)
611 {
612     regexGroups.clear();
613     int matchBegin=0;
614     int groupID=0;
615 
616     // cycle through all regex, save the start and ending indices of matches to report them later
617     for ( unsigned int i=0; i<currentSyntax->getRegexElements().size(); i++ ) {
618         RegexElement *regexElem = currentSyntax->getRegexElements() [i];
619 
620         if (regexElem->open == skipState) continue;
621 
622         if (regexElem->constraintLineNum && regexElem->constraintLineNum != lineNumber) {
623             continue;
624         }
625 
626         if (regexElem->constraintFilename.size() && regexElem->constraintFilename != inFile) {
627             continue;
628         }
629 
630         boost::xpressive::sregex_iterator cur( line.begin(), line.end(), regexElem->rex );
631         boost::xpressive::sregex_iterator end;
632 
633         for( ; cur != end; ++cur )  {
634             groupID = ( regexElem->capturingGroup<0 ) ? cur->size()-1 : regexElem->capturingGroup;
635             matchBegin = cur->position(groupID);
636 
637             regexGroups.insert (
638                 make_pair ( matchBegin + 1, RegexToken ( regexElem->open, cur->length(groupID), regexElem->kwClass, regexElem->langName ) ) );
639 
640             // priority regex (match required)
641             if (regexElem->priority) {
642                 return;
643             }
644         }
645     }
646 }
647 
getInputChar()648 unsigned char CodeGenerator::getInputChar()
649 {
650     // end of line?
651     if ( lineIndex == line.length() ) {
652 
653         //more testing required:
654         if (outputType==ESC_TRUECOLOR || outputType==ESC_XTERM256)
655             lastLineLength=StringTools::utf8_strlen(line + lsSyntaxErrorDesc);
656 
657         bool eof=false;
658         if ( preFormatter.isEnabled() ) {
659             if ( !preFormatter.hasMoreLines() ) {
660                 eof=readNewLine ( line );
661                 preFormatter.setLine ( line );
662                 ++lineNumber;
663                 numberCurrentLine = true;
664             } else {
665                 if (numberWrappedLines)
666                     ++lineNumber;
667                 numberCurrentLine = numberWrappedLines;
668             }
669 
670             line = preFormatter.getNextLine();
671         } else {
672             eof=readNewLine ( line );
673             ++lineNumber;
674 
675             numberCurrentLine = true;
676         }
677         lineIndex=0;
678 
679         if (!lineContainedTestCase && applySyntaxTestCase){
680             stateTraceTest = stateTraceCurrent;
681             stateTraceCurrent.clear();
682         }
683 
684         lineContainedTestCase=false;
685         lineContainedStmt=false;
686         matchRegex ( line );
687 
688         return ( eof ) ?'\0':'\n';
689     }
690 
691     return line[lineIndex++];
692 }
693 
694 /** changing this method requires regression testing with nested syntax files (HTML+PHP+JS+CSS,
695  *  Coffeescript with block regex, Pas + ASM)
696  *  especially nested syntax in one line
697  */
getCurrentState(State oldState)698 State CodeGenerator::getCurrentState (State oldState)
699 {
700     unsigned char c='\0';
701 
702     if ( token.length() ==0 ) {
703         c=getInputChar();
704     } else {
705         lineIndex-= ( token.length()-1 );
706         c=token[0];
707     }
708     if ( c=='\n' ) {
709         return _EOL;   // End of line
710     }
711 
712     if ( c=='\0' ) {
713         return _EOF;   // End of file
714     }
715 
716     if ( c==' ' || c=='\t' ) {
717         token= c;
718         return _WS;    // White space
719     }
720 
721     if ( applySyntaxTestCase && ( c=='^' || c=='<') && (oldState == ML_COMMENT || oldState==SL_COMMENT)  ) {
722         token= c;
723         return _TESTPOS;
724     }
725 
726     // at this position the syntax change takes place
727     if (lineIndex >= syntaxChangeIndex-1 || syntaxChangeLineNo < lineNumber){
728         loadEmbeddedLang(embedLangDefPath);  // load new syntax
729         matchRegex(line);                    // recognize new patterns in the (remaining) line
730         syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
731     }
732 
733 SKIP_EMBEDDED:
734 
735     if (lsCheckSyntaxErrors && LSPClient.errorExists(lineNumber, lineIndex)) {
736         highlight::SemanticToken errorToken = LSPClient.getError(lineNumber, lineIndex);
737         token = line.substr ( lineIndex-1, errorToken.length);
738         lineIndex += errorToken.length-1;
739         lsSyntaxErrorDesc = errorToken.id;
740 
741         //std::cerr <<"error num "<<lineNumber<< " idx "<<lineIndex<< " error "<<errorToken.id<< "\n";
742         return SYNTAX_ERROR;
743     }
744 
745     if (lsCheckSemanticTokens && LSPClient.tokenExists(lineNumber, lineIndex)) {
746         highlight::SemanticToken semToken = LSPClient.getToken(lineNumber, lineIndex);
747         int semStyleKwId = docStyle.getSemanticStyle(semToken.id);
748         if (semStyleKwId) {
749             token = line.substr ( lineIndex-1, semToken.length);
750             lineIndex += semToken.length-1;
751 
752             currentKeywordClass = semStyleKwId + kwOffset;  // +offset of missing kw groups in the theme
753             //std::cerr <<"l "<<lineNumber<<  "t "<<token<< " semStyleKwId "<< semStyleKwId << "  off "<<kwOffset<<" -> "  << semToken.id <<"\n";
754             return KEYWORD;
755         }
756     }
757 
758     // Test if a regular expression was found at the current position
759     if ( !regexGroups.empty() ) {
760         if ( regexGroups.count ( lineIndex ) ) {
761             token = line.substr ( lineIndex-1, regexGroups[lineIndex].length );
762 
763             unsigned int oldIndex= lineIndex;
764             if ( regexGroups[oldIndex].length>1 ) lineIndex+= regexGroups[oldIndex].length-1;
765 
766             if ( regexGroups[oldIndex].state==EMBEDDED_CODE_BEGIN ) {
767                 //do not handle a nested section if the syntax is marked as "sealed"
768                 if (embedLangDefPath.length()==0 || currentSyntax->allowsInnerSection(embedLangDefPath) ) {
769                     embedLangDefPath = currentSyntax->getNewPath(regexGroups[oldIndex].name);
770                     //remember position
771                     syntaxChangeIndex = lineIndex+2;
772                     syntaxChangeLineNo = lineNumber;
773                 }
774 
775                 // repeat parsing of this line without nested state recognition to highlight opening delimiter in the host syntax
776                 matchRegex(line, EMBEDDED_CODE_BEGIN);
777                 lineIndex = oldIndex;
778                 goto SKIP_EMBEDDED; // this is how it should be done
779             }
780 
781             if ( regexGroups[oldIndex].state==IDENTIFIER_BEGIN || regexGroups[oldIndex].state==KEYWORD ) {
782                 string reservedWord= ( currentSyntax->isIgnoreCase() ) ? StringTools::change_case ( token ) :token;
783                 currentKeywordClass=currentSyntax->getKeywordListGroup ( reservedWord ); //check in lists (no regex)
784 
785                 if ( !currentKeywordClass && regexGroups[oldIndex].state==KEYWORD ){
786                     currentKeywordClass = regexGroups[oldIndex].kwClass;
787                 }
788                 return validateState(( currentKeywordClass ) ? KEYWORD : STANDARD, oldState );
789             } else {
790                 return validateState(regexGroups[oldIndex].state, oldState);
791             }
792         }
793     }
794 
795     // Character not referring to any state
796     token = c;
797     return STANDARD;
798 }
799 
validateState(State newState,State oldState)800 State CodeGenerator::validateState(State newState, State oldState)
801 {
802 
803     if (currentSyntax->getValidateStateChangeFct()) {
804         Diluculum::LuaValueList params;
805         params.push_back(Diluculum::LuaValue(oldState));
806         params.push_back(Diluculum::LuaValue(newState));
807         params.push_back(Diluculum::LuaValue(token));
808         params.push_back(Diluculum::LuaValue(getCurrentKeywordClassId()) );
809         params.push_back(Diluculum::LuaValue(lineNumber) );
810         params.push_back(Diluculum::LuaValue(lineIndex-(unsigned int)token.length()) );
811 
812         Diluculum::LuaValueList res=
813             currentSyntax->getLuaState()->call ( *currentSyntax->getValidateStateChangeFct(),
814                     params,"getValidateStateChangeFct call")  ;
815 
816         resultOfHook = res.size()>=1;
817         if (resultOfHook) {
818 
819             setOverrideParams();
820 
821             State validatedState = (State)res[0].asInteger();
822             if ( validatedState== _REJECT) {
823 
824                 // proceed using only the first character of the token
825                 if (res.size()==1) {
826                     lineIndex -= (token.length() -1);
827                     token=token.substr(0, 1);
828                 }
829 
830                 //experimental for slim.lang: evaluate second return arg after _REJECT
831                 if (res.size()>=2) {
832                     lineIndex -= (token.length() );
833                     token.clear();
834                     return (State)res[1].asInteger();
835                 }
836                 return oldState;
837             }
838 
839             return validatedState;
840         }
841     }
842     resultOfHook  = false;
843 
844     return newState;
845 }
846 
getCurrentKeywordClassId()847 unsigned int CodeGenerator::getCurrentKeywordClassId(){
848     unsigned int kwClassId=0;
849 
850     // this vector contains the defined keyword classes, and currentKeywordClass is its index:
851     vector<string> kwClasses=currentSyntax->getKeywordClasses();
852 
853     if (currentKeywordClass && currentKeywordClass<=kwClasses.size()) {
854         string kwClassName=kwClasses[currentKeywordClass-1];
855         if (kwClassName.size()==3)
856             kwClassId = kwClassName[2] - 'a' + 1;
857     }
858     return kwClassId;
859 }
860 
861 //it is faster to pass ostream reference
maskString(ostream & ss,const string & s)862 void CodeGenerator::maskString ( ostream& ss, const string & s )
863 {
864     string escHoverText;
865 
866     if (lsEnableHoverRequests && (currentState==STANDARD || currentState==NUMBER || currentState==KEYWORD)) {
867 
868         string hoverText = LSPClient.runHover(lsDocumentPath, lineIndex - s.size(), lineNumber-1);
869 
870         for(const auto &c : hoverText)
871         {
872             if (isascii(c))
873                 escHoverText.append(maskCharacter(c));
874         }
875     }
876 
877     if (escHoverText.size()) {
878         ss << getHoverTagOpen(escHoverText);
879     }
880 
881     for (const auto &c : s)
882     {
883         ss << maskCharacter ( c );
884     }
885 
886     if (escHoverText.size()) {
887         ss << getHoverTagClose();
888     }
889 
890     // The test markers position should also be deternmined by calculating the code points
891     if ( applySyntaxTestCase ) {
892 
893         PositionState ps(currentState, getCurrentKeywordClassId(), false);
894 
895         //TODO avoid repeated string comparison:
896         int slen = encoding=="utf-8" ? StringTools::utf8_strlen(s) : s.length();
897         for (int i=0; i< slen; i++ ) {
898             stateTraceCurrent.push_back(ps);
899         }
900         if (stateTraceCurrent.size()>200)
901             stateTraceCurrent.erase(stateTraceCurrent.begin(), stateTraceCurrent.begin() + 100 );
902     }
903 }
904 
printSyntaxError(ostream & ss)905 void CodeGenerator::printSyntaxError ( ostream& ss ) {
906     if ( !lsSyntaxErrorDesc.empty()) {
907         ss << openTags[ highlight::SYNTAX_ERROR_MSG ];
908 
909         for(const auto &c : lsSyntaxErrorDesc)
910         {
911             ss << maskCharacter ( c );
912         }
913 
914         ss << closeTags[ highlight::SYNTAX_ERROR_MSG ];
915         lsSyntaxErrorDesc.clear();
916     }
917 }
918 
callDecorateFct(const string & token)919 Diluculum::LuaValueList CodeGenerator::callDecorateFct(const string& token)
920 {
921 
922     Diluculum::LuaValueList params;
923     params.push_back(Diluculum::LuaValue(token));
924     params.push_back(Diluculum::LuaValue(currentState));
925     params.push_back(Diluculum::LuaValue(currentKeywordClass));
926     params.push_back(Diluculum::LuaValue(lineContainedStmt));
927     params.push_back(Diluculum::LuaValue(lineNumber) );
928     params.push_back(Diluculum::LuaValue(lineIndex-(unsigned int)token.length()) );
929 
930     return currentSyntax->getLuaState()->call ( *currentSyntax->getDecorateFct(),
931             params,"getDecorateFct call")  ;
932 }
933 
printMaskedToken(bool flushWhiteSpace,StringTools::KeywordCase tcase)934 void CodeGenerator::printMaskedToken (bool flushWhiteSpace, StringTools::KeywordCase tcase )
935 {
936     if ( flushWhiteSpace )
937         flushWs(1);
938     string caseToken = StringTools::change_case ( token, tcase );
939     if (currentSyntax->getDecorateFct()) {
940 
941         Diluculum::LuaValueList res=callDecorateFct(caseToken);
942         if (res.size()==1) {
943             *out<<res[0].asString();
944         } else {
945             maskString ( *out, caseToken );
946         }
947     } else {
948         maskString ( *out, caseToken );
949     }
950 
951     // check this *after* the decorate call
952     if (   currentState == STANDARD || currentState == KEYWORD || currentState == NUMBER
953         || currentState == STRING || currentState == IDENTIFIER_BEGIN) {
954         lineContainedStmt = true;
955     }
956     token.clear();
957 }
958 
styleFound()959 bool CodeGenerator::styleFound()
960 {
961     return docStyle.found();
962 }
963 
printIndexFile(const vector<string> & fileList,const string & outPath)964 bool CodeGenerator::printIndexFile ( const vector<string> &fileList, const string &outPath )
965 {
966     return true;
967 }
968 
initIndentationScheme(const string & indentScheme)969 bool CodeGenerator::initIndentationScheme ( const string &indentScheme )
970 {
971 
972     if ( formatter!=NULL ) {
973         return true;
974     }
975 
976     if ( !indentScheme.size() ) return false;
977 
978     formatter=new astyle::ASFormatter();
979 
980     if ( indentScheme=="allman" || indentScheme=="bsd" || indentScheme=="ansi" ) {
981         formatter->setFormattingStyle ( astyle::STYLE_ALLMAN );
982     } else if ( indentScheme=="kr"||indentScheme=="k&r"||indentScheme=="k/r" ) {
983         formatter->setFormattingStyle ( astyle::STYLE_KR );
984     } else if ( indentScheme=="java" ) {
985         formatter->setFormattingStyle ( astyle::STYLE_JAVA );
986     } else if ( indentScheme=="stroustrup" ) {
987         formatter->setFormattingStyle ( astyle::STYLE_STROUSTRUP );
988     } else if ( indentScheme=="whitesmith" ) {
989         formatter->setFormattingStyle ( astyle::STYLE_WHITESMITH );
990     } else if ( indentScheme=="banner" || indentScheme=="ratliff") {
991         formatter->setFormattingStyle ( astyle::STYLE_RATLIFF );
992     } else if ( indentScheme=="gnu" ) {
993         formatter->setFormattingStyle ( astyle::STYLE_GNU );
994     } else if ( indentScheme=="linux" ) {
995         formatter->setFormattingStyle ( astyle::STYLE_LINUX );
996     } else if ( indentScheme=="horstmann" ) {
997         formatter->setFormattingStyle ( astyle::STYLE_HORSTMANN );
998     } else if ( indentScheme=="otbs" ||  indentScheme=="1tbs") {
999         formatter->setFormattingStyle ( astyle::STYLE_1TBS );
1000     } else if ( indentScheme=="google") {
1001         formatter->setFormattingStyle ( astyle::STYLE_GOOGLE );
1002     } else if ( indentScheme=="pico" ||  indentScheme=="a11") {
1003         formatter->setFormattingStyle ( astyle::STYLE_PICO );
1004     } else if ( indentScheme=="lisp" ||  indentScheme=="python"||  indentScheme=="a12") {
1005         formatter->setFormattingStyle ( astyle::STYLE_LISP );
1006     } else if ( indentScheme=="vtk") {
1007         formatter->setFormattingStyle ( astyle::STYLE_VTK );
1008     } else if ( indentScheme=="mozilla") {
1009         formatter->setFormattingStyle ( astyle::STYLE_MOZILLA );
1010     } else if ( indentScheme=="webkit") {
1011         formatter->setFormattingStyle ( astyle::STYLE_WEBKIT );
1012     } else if ( indentScheme!="user" ){
1013         return false;
1014     }
1015     return formattingEnabled=true;
1016 }
1017 
loadLanguage(const string & langDefPath,bool embedded)1018 LoadResult CodeGenerator::loadLanguage ( const string& langDefPath, bool embedded )
1019 {
1020 
1021     if (!embedded) {
1022         while (!nestedLangs.empty()) {
1023             nestedLangs.pop();
1024         }
1025     }
1026 
1027     bool reloadNecessary= currentSyntax ? currentSyntax->needsReload ( langDefPath ): true;
1028     LoadResult result=LOAD_OK;
1029     if ( reloadNecessary ) {
1030         if (syntaxReaders.count(langDefPath)) {
1031             currentSyntax=syntaxReaders[langDefPath];
1032             result=LOAD_OK;
1033         } else {
1034 
1035             currentSyntax=new SyntaxReader();
1036             result=currentSyntax->load(langDefPath, pluginParameter, outputType);
1037             syntaxReaders[langDefPath]=currentSyntax;
1038         }
1039 
1040         if ( result==LOAD_OK ) {
1041             formattingPossible=currentSyntax->enableReformatting();
1042             updateKeywordClasses();
1043         }
1044     }
1045 
1046     kwOffset=currentSyntax->getKeywordCount() - docStyle.getKeywordStyleCount();
1047 
1048     return result;
1049 }
1050 
validateInputStream()1051 bool CodeGenerator::validateInputStream()
1052 {
1053     if ( !in ) return false;
1054 
1055     // it is not possible to move stream pointer back with stdin
1056     if ( ( int ) in->tellg() == -1 ) // -1 : stdin
1057         return true;
1058 
1059     // Sources: http://en.wikipedia.org/wiki/Magic_number_(programming)
1060     // Magic configuration of "file"
1061     // This is intended for web plugins - only check filetypes often found in the net
1062     char magic_gif[]    = {'G','I','F','8', 0};
1063     char magic_png[]    = {'\x89','P','N','G', 0};
1064     char magic_java[]   = {'\xCA','\xFE','\xBA','\xBE', 0};
1065     char magic_jpeg[]   = {'\xFF','\xD8','\xFF', 0};
1066     char magic_bmp[]    = {'B','M', 0};
1067     char magic_pdf[]    = {'%','P','D','F', 0};
1068     char magic_utf8[]   = {'\xEF','\xBB','\xBF',0};
1069     char magic_rar[]    = {'R','a','r','!', 0};
1070     char magic_zip[]    = {'P','K','\x03','\x04', 0};
1071     char magic_ace[]    = {'*','*','A','C','E','*','*', 0};
1072     char magic_tgz[]    = {'\x8b','\x1f', '\x00', '\x08', 0};
1073     char magic_bzip[]   = {'B','Z', 0};
1074 
1075     char* magic_table[] = {magic_utf8,
1076                            magic_gif, magic_png, magic_jpeg, magic_bmp, magic_pdf,
1077                            magic_java,
1078                            magic_rar, magic_zip, magic_ace, magic_tgz, magic_bzip,
1079                            0
1080                           };
1081 
1082     char buffer [10]= {0};
1083     in->read ( buffer,8 );  //only read the first 8 bytes of input stream
1084 
1085     int magic_index=0;
1086     while ( magic_table[magic_index] ) {
1087         if ( !strncmp ( buffer, magic_table[magic_index], strlen ( magic_table[magic_index] ) ) ) {
1088             break;
1089         }
1090         magic_index++;
1091     }
1092     int streamReadPos=0;
1093     if ( magic_table[magic_index] == magic_utf8 ) {
1094         //setEncoding("utf-8");
1095         streamReadPos=3; // remove UTF-8 magic number from output
1096     }
1097 
1098     in -> seekg ( streamReadPos, ios::beg );
1099     in-> clear();  // clear fail bit to continue reading
1100 
1101     return !magic_table[magic_index] // points to 0 if no pattern was found
1102            || magic_table[magic_index] == magic_utf8;
1103 }
1104 
applyPluginChunk(const string & fctName,string * result,bool * keepDefault)1105 void CodeGenerator::applyPluginChunk(const string& fctName, string *result, bool *keepDefault) {
1106 
1107     if (currentSyntax && pluginChunks.size()) {
1108 
1109         Diluculum::LuaState luaState;
1110 
1111         Diluculum::LuaValueList chunkParams;
1112         chunkParams.push_back(currentSyntax->getDescription());
1113         for (unsigned int i=0; i<pluginChunks.size(); i++) {
1114             luaState.call(*pluginChunks[i], chunkParams, "format user function");
1115         }
1116 
1117         if (luaState.globals().count(fctName)) {
1118             Diluculum::LuaFunction* documentFct=new Diluculum::LuaFunction(luaState[fctName].value().asFunction());
1119 
1120             luaState["HL_PLUGIN_PARAM"] = pluginParameter;
1121             luaState["HL_OUTPUT"] = outputType;
1122             luaState["HL_FORMAT_HTML"]=HTML;
1123             luaState["HL_FORMAT_XHTML"]=XHTML;
1124             luaState["HL_FORMAT_TEX"]=TEX;
1125             luaState["HL_FORMAT_LATEX"]=LATEX;
1126             luaState["HL_FORMAT_RTF"]=RTF;
1127             luaState["HL_FORMAT_ANSI"]=ESC_ANSI;
1128             luaState["HL_FORMAT_XTERM256"]=ESC_XTERM256;
1129             luaState["HL_FORMAT_TRUECOLOR"]=ESC_TRUECOLOR;
1130             luaState["HL_FORMAT_SVG"]=SVG;
1131             luaState["HL_FORMAT_BBCODE"]=BBCODE;
1132             luaState["HL_FORMAT_PANGO"]=PANGO;
1133             luaState["HL_FORMAT_ODT"]=ODTFLAT;
1134 
1135             Diluculum::LuaValueList params;
1136             Diluculum::LuaValueMap options;
1137             options[Diluculum::LuaValue("title")] =  Diluculum::LuaValue( docTitle );
1138             options[Diluculum::LuaValue("encoding")] =  Diluculum::LuaValue(encoding);
1139             options[Diluculum::LuaValue("fragment")] =  Diluculum::LuaValue(fragmentOutput);
1140             options[Diluculum::LuaValue("font")] =  Diluculum::LuaValue(getBaseFont());
1141             options[Diluculum::LuaValue("fontsize")] =  Diluculum::LuaValue(getBaseFontSize());
1142 
1143             params.push_back(inputFilesCnt);
1144             params.push_back(processedFilesCnt);
1145             params.push_back(options);
1146 
1147             Diluculum::LuaValueList res=luaState.call ( *documentFct, params, fctName+" call");
1148             if (res.size()>=1) {
1149                 *keepDefault=false;
1150                 *result = res[0].asString();
1151                 if (res.size()==2)
1152                     *keepDefault = res[1].asBoolean();
1153             }
1154             delete documentFct;
1155         }
1156     }
1157 }
1158 
printHeader()1159 void CodeGenerator::printHeader()
1160 {
1161     bool keepDefaultHeader=true;
1162     string pluginHeader;
1163 
1164     processedFilesCnt++;
1165 
1166     applyPluginChunk("DocumentHeader", &pluginHeader, &keepDefaultHeader);
1167 
1168     if ( ! fragmentOutput && keepDefaultHeader)
1169         *out << getHeader();
1170 
1171     *out << pluginHeader;
1172 
1173     if ( !fragmentOutput || keepInjections)
1174         *out << currentSyntax->getHeaderInjection();
1175 }
1176 
printFooter()1177 void CodeGenerator::printFooter()
1178 {
1179 
1180     bool keepDefaultFooter=true;
1181     string pluginFooter;
1182 
1183     applyPluginChunk("DocumentFooter", &pluginFooter, &keepDefaultFooter);
1184 
1185     if ( !fragmentOutput || keepInjections)
1186         *out << currentSyntax->getFooterInjection();
1187 
1188     *out << pluginFooter;
1189 
1190     if ( ! fragmentOutput && keepDefaultFooter )
1191         *out << getFooter();
1192 }
1193 
generateFile(const string & inFileName,const string & outFileName)1194 ParseError CodeGenerator::generateFile ( const string &inFileName,
1195         const string &outFileName )
1196 {
1197     if ( !docStyle.found() ) {
1198         return BAD_STYLE;
1199     }
1200 
1201     reset();
1202 
1203     ParseError error=PARSE_OK;
1204 
1205     inFile=inFileName;
1206     outFile=outFileName;
1207 
1208     in = ( inFileName.empty() ? &cin :new ifstream ( inFileName.c_str() ) );
1209 
1210     if ( validateInput )
1211         if ( !validateInputStream() ) error= BAD_INPUT;
1212 
1213     if ( !in->fail() && error==PARSE_OK ) {
1214         out = ( outFileName.empty() ? &cout :new ofstream ( outFileName.c_str() ) );
1215         if ( out->fail() ) {
1216             error=BAD_OUTPUT;
1217         }
1218     }
1219 
1220     if ( in->fail() ) {
1221         error=BAD_INPUT;
1222     }
1223 
1224     if ( error==PARSE_OK ) {
1225         initASStream();
1226         currentSyntax->setInputFileName(inFile);
1227         printHeader();
1228         printBody();
1229         printFooter();
1230     }
1231 
1232     if ( !outFileName.empty() ) {
1233         delete out;
1234         out=NULL;
1235     }
1236     if ( !inFileName.empty() ) {
1237         delete in;
1238         in=NULL;
1239     }
1240     return error;
1241 }
1242 
generateString(const string & input)1243 string CodeGenerator::generateString ( const string &input )
1244 {
1245 
1246     if ( !docStyle.found() ) {
1247         return "";
1248     }
1249 
1250     reset();
1251 
1252     in = new istringstream ( input );
1253     out = new ostringstream ();
1254 
1255     if ( in->fail() || out->fail() ) {
1256         return "";
1257     }
1258 
1259     initASStream();
1260 
1261     printHeader();
1262     printBody();
1263     printFooter();
1264 
1265     string result = static_cast<ostringstream*> ( out )->str();
1266 
1267     delete out;
1268     out=NULL;
1269     delete in;
1270     in=NULL;
1271 
1272     return result;
1273 }
1274 
initASStream()1275 void CodeGenerator::initASStream() {
1276     if ( formatter != NULL ) {
1277         if (streamIterator) delete streamIterator;
1278         streamIterator =  new astyle::ASStreamIterator ( in );
1279         formatter->init ( streamIterator );
1280     }
1281 }
1282 
generateStringFromFile(const string & inFileName)1283 string CodeGenerator::generateStringFromFile ( const string &inFileName )
1284 {
1285 
1286     if ( !docStyle.found() ) {
1287         return "";
1288     }
1289 
1290     reset();
1291 
1292     inFile = inFileName;
1293 
1294     in = new ifstream ( inFileName.c_str() );
1295     out = new ostringstream ();
1296 
1297     if ( in->fail() || out->fail() ) {
1298         return "";
1299     }
1300 
1301     if ( validateInput && !validateInputStream() ) {
1302         return "ERROR: detected binary input";
1303     }
1304 
1305     initASStream();
1306 
1307     currentSyntax->setInputFileName(inFile);
1308 
1309     printHeader();
1310     printBody();
1311     printFooter();
1312 
1313     string result = static_cast<ostringstream*> ( out )->str();
1314 
1315     delete out;
1316     out=NULL;
1317     delete in;
1318     in=NULL;
1319 
1320     return result;
1321 }
1322 
getStyleID(State s,unsigned int kwClassID)1323 unsigned int CodeGenerator::getStyleID ( State s, unsigned int kwClassID )
1324 {
1325     if ( s==KEYWORD && kwClassID ) {
1326         return NUMBER_BUILTIN_STATES + kwClassID-1;
1327     }
1328     return ( unsigned int ) s ;
1329 }
1330 
openTag(State s)1331 void CodeGenerator::openTag ( State s )
1332 {
1333     *out << openTags[ ( unsigned int ) s];
1334     currentState=s;
1335 }
1336 
closeTag(State s)1337 void CodeGenerator::closeTag ( State s )
1338 {
1339     *out << closeTags[ ( unsigned int ) s];
1340     flushWs(2);
1341     currentState=_UNKNOWN;
1342 }
1343 
openKWTag(unsigned int kwClassID)1344 void CodeGenerator::openKWTag ( unsigned int kwClassID )
1345 {
1346     *out << openTags.at(getStyleID ( KEYWORD, kwClassID ) );
1347     currentState=KEYWORD;
1348 }
1349 
closeKWTag(unsigned int kwClassID)1350 void CodeGenerator::closeKWTag ( unsigned int kwClassID )
1351 {
1352     *out << closeTags.at(getStyleID ( KEYWORD, kwClassID ) );
1353     flushWs(3);
1354     currentState=_UNKNOWN;
1355 }
1356 
loadEmbeddedLang(const string & embedLangDefPath)1357 bool CodeGenerator::loadEmbeddedLang(const string&embedLangDefPath)
1358 {
1359     if (nestedLangs.empty()) {
1360         nestedLangs.push(currentSyntax->getCurrentPath() );
1361     }
1362     if (nestedLangs.top() != embedLangDefPath) {
1363         nestedLangs.push(embedLangDefPath);
1364     }
1365     LoadResult res = loadLanguage(embedLangDefPath, true);
1366     //pass end delimiter regex to syntax description
1367     currentSyntax->restoreLangEndDelim(embedLangDefPath);
1368     return res == LOAD_OK;
1369 }
1370 
1371 ///////////////////////////////////////////////////////////////////////////////
1372 
processRootState()1373 void CodeGenerator::processRootState()
1374 {
1375     bool eof=false,
1376          firstLine=true; // avoid newline before printing the first output line
1377 
1378     applySyntaxTestCase = inFile.find("syntax_test_")!=string::npos;
1379 
1380     if ( currentSyntax->highlightingDisabled() ) {
1381         string line;
1382         while ( getline ( *in, line ) && lineNumber < maxLineCnt ) {
1383             ++lineNumber;
1384             insertLineNumber ( !firstLine );
1385             flushWs(4);
1386             firstLine=false;
1387             if (lineNumber>=startLineCntCurFile && lineNumber <=maxLineCnt)
1388                 maskString ( *out, line );
1389         }
1390         *out << flush;
1391         return;
1392     }
1393 
1394     State state=STANDARD;
1395     openTag ( STANDARD );
1396 
1397     do {
1398         // determine next state
1399         state= getCurrentState(STANDARD);
1400 
1401         // handle current state
1402         switch ( state ) {
1403         case KEYWORD:
1404             closeTag ( STANDARD );
1405             eof=processKeywordState ( state );
1406             openTag ( STANDARD );
1407             break;
1408         case NUMBER:
1409             closeTag ( STANDARD );
1410             eof=processNumberState();
1411             openTag ( STANDARD );
1412             break;
1413         case ML_COMMENT:
1414             closeTag ( STANDARD );
1415             eof=processMultiLineCommentState();
1416             openTag ( STANDARD );
1417             break;
1418         case SL_COMMENT:
1419             closeTag ( STANDARD );
1420             eof=processSingleLineCommentState();
1421             openTag ( STANDARD );
1422             break;
1423         case STRING:
1424             closeTag ( STANDARD );
1425             eof=processStringState ( STANDARD );
1426             openTag ( STANDARD );
1427             break;
1428         case DIRECTIVE:
1429             closeTag ( STANDARD );
1430             eof=processDirectiveState();
1431             openTag ( STANDARD );
1432             break;
1433         case ESC_CHAR:
1434             closeTag ( STANDARD );
1435             eof=processEscapeCharState();
1436             openTag ( STANDARD );
1437             break;
1438         case SYMBOL:
1439             closeTag ( STANDARD );
1440             eof=processSymbolState();
1441             openTag ( STANDARD );
1442             break;
1443         case EMBEDDED_CODE_END:
1444             closeTag ( STANDARD );
1445             eof=processSyntaxChangeState(state);
1446             openTag ( STANDARD );
1447             break;
1448         case SYNTAX_ERROR:
1449             closeTag ( STANDARD );
1450             eof=processSyntaxErrorState();
1451             openTag ( STANDARD );
1452             break;
1453 
1454         case _EOL:
1455             // XTERM256 fix (issue with less cmd)
1456             if  (!firstLine || showLineNumbers) {
1457                 closeTag ( STANDARD );
1458             }
1459             insertLineNumber(!firstLine);
1460             if (!firstLine || showLineNumbers) {
1461                 flushWs(5);
1462                 stateTraceCurrent.clear();
1463                 openTag ( STANDARD );
1464             }
1465             firstLine=false;
1466             break;
1467         case _EOF:
1468             eof=true;
1469             break;
1470         case _WS:
1471             processWsState();
1472             break;
1473         default:
1474             printMaskedToken();
1475             break;
1476         }
1477     } while ( !eof );
1478 
1479     if (token.size() || lineNumber>1 || (outputType!=ESC_TRUECOLOR && outputType!=ESC_XTERM256))
1480         closeTag ( STANDARD );
1481 
1482     if (currentSyntax->getDecorateLineEndFct()) {
1483         Diluculum::LuaValueList res=callDecorateLineFct(false);
1484         if (res.size()==1) {
1485             *out << res[0].asString();
1486         }
1487     }
1488 
1489     printNewLines = noTrailingNewLine==0 || ( noTrailingNewLine==2 && ( token.size() || lineNumber>1) );
1490     *out << getNewLine();
1491     *out << flush;
1492 }
1493 
processSyntaxChangeState(State myState)1494 bool CodeGenerator::processSyntaxChangeState(State myState)
1495 {
1496     State newState=STANDARD;
1497     bool eof=false,
1498          exitState=false;
1499 
1500     openTag ( KEYWORD );
1501     do {
1502 
1503         if (myState==EMBEDDED_CODE_END) {
1504             if (!nestedLangs.empty()) {
1505                 nestedLangs.pop();
1506             }
1507             // load host language syntax
1508             if (!nestedLangs.empty()) {
1509                 loadLanguage(nestedLangs.top(), true);
1510             }
1511             matchRegex(line, EMBEDDED_CODE_BEGIN); // match remaining line using the host syntax
1512         }
1513 
1514         printMaskedToken ( newState!=_WS );
1515 
1516         newState= getCurrentState(myState);
1517 
1518         switch ( newState ) {
1519         case _WS:
1520             processWsState();
1521             break;
1522         case _EOL:
1523             insertLineNumber();
1524             exitState=true;
1525             break;
1526         case _EOF:
1527             eof = true;
1528             break;
1529         default:
1530             exitState=true;
1531             break;
1532         }
1533     } while (  !exitState  &&  !eof );
1534     closeTag ( KEYWORD );
1535 
1536     return eof;
1537 }
1538 
1539 
processKeywordState(State myState)1540 bool CodeGenerator::processKeywordState ( State myState )
1541 {
1542     State newState=STANDARD;
1543     unsigned int myClassID=currentKeywordClass;
1544     bool eof=false,
1545          exitState=false;
1546 
1547     openKWTag ( myClassID );
1548     do {
1549         printMaskedToken ( newState!=_WS,
1550                            ( currentSyntax->isIgnoreCase() ) ? keywordCase : StringTools::CASE_UNCHANGED );
1551         newState= getCurrentState(myState);
1552         switch ( newState ) {
1553         case _WS:
1554             processWsState();
1555             exitState=isolateTags;
1556             break;
1557         case _EOL:
1558             insertLineNumber();
1559             exitState=true;
1560 
1561             break;
1562         case _EOF:
1563             eof = true;
1564             break;
1565         case KEYWORD_END:
1566             exitState=true;
1567             break;
1568         default:
1569             exitState= ( myClassID!=currentKeywordClass ) || ( myState!=newState );
1570             break;
1571         }
1572     } while ( !exitState  &&  !eof );
1573 
1574     closeKWTag ( myClassID );
1575 
1576     currentKeywordClass=0;
1577     return eof;
1578 }
1579 
processNumberState()1580 bool CodeGenerator::processNumberState()
1581 {
1582     State newState=STANDARD;
1583     bool eof=false,
1584          exitState=false;
1585     openTag ( NUMBER );
1586     do {
1587         printMaskedToken ( newState!=_WS );
1588         newState= getCurrentState(NUMBER);
1589         switch ( newState ) {
1590         case _WS:
1591             processWsState();
1592             exitState=isolateTags;
1593             break;
1594         case _EOL:
1595             insertLineNumber();
1596             exitState=true;
1597             break;
1598         case _EOF:
1599             eof = true;
1600             break;
1601         default:
1602             exitState=newState!=NUMBER;
1603             break;
1604         }
1605     } while ( !exitState && !eof );
1606 
1607     closeTag ( NUMBER );
1608     return eof;
1609 }
1610 
1611 
processMultiLineCommentState()1612 bool CodeGenerator::processMultiLineCommentState()
1613 {
1614     int commentCount=1;
1615     int openDelimID=currentSyntax->getOpenDelimiterID ( token, ML_COMMENT);
1616     State newState=STANDARD;
1617     bool eof=false, exitState=false, containedTestCase=false;
1618     unsigned int startColumn=lineIndex - token.size() ;
1619     openTag ( ML_COMMENT );
1620     do {
1621         printMaskedToken (newState!=_WS );
1622         newState= getCurrentState(ML_COMMENT);
1623 
1624         switch ( newState ) {
1625         case _WS:
1626             processWsState();
1627             break;
1628         case _EOL:
1629             wsBuffer += closeTags[ML_COMMENT];
1630             insertLineNumber();
1631             wsBuffer += openTags[ML_COMMENT];
1632             startColumn=0;
1633             break;
1634         case _EOF:
1635             eof = true;
1636             break;
1637         case _TESTPOS:
1638             runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
1639             printMaskedToken();
1640             containedTestCase=true;
1641             break;
1642         case ML_COMMENT:
1643 
1644             if ( currentSyntax->allowNestedMLComments() ) {
1645                 ++commentCount;
1646             }
1647             // if delimiters are equal, close the comment by continuing to
1648             // ML_COMMENT_END section
1649             if (currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, ML_COMMENT  ))) break;
1650 
1651         case ML_COMMENT_END:
1652 
1653             if (!currentSyntax->matchesOpenDelimiter (token,  ML_COMMENT_END, openDelimID)) {
1654                 break;
1655             }
1656             commentCount--;
1657             if ( !commentCount ) {
1658                 printMaskedToken();
1659                 exitState=true;
1660             }
1661             break;
1662         default:
1663             break;
1664         }
1665     } while ( !exitState  &&  !eof );
1666 
1667     closeTag ( ML_COMMENT );
1668 
1669     if (containedTestCase){
1670         stateTraceCurrent.clear();
1671     }
1672     return eof;
1673 }
1674 
1675 
processSingleLineCommentState()1676 bool CodeGenerator::processSingleLineCommentState()
1677 {
1678     State newState=STANDARD;
1679     bool eof=false, exitState=false, containedTestCase=false;
1680     unsigned int startColumn = lineIndex - token.size() ;
1681 
1682     openTag ( SL_COMMENT );
1683     do {
1684         printMaskedToken ( newState!=_WS );
1685         newState= getCurrentState(SL_COMMENT);
1686 
1687         switch ( newState ) {
1688         case _WS:
1689             processWsState();
1690             break;
1691         case _EOL:
1692             printMaskedToken();
1693             if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
1694                 exitState=false;
1695             } else {
1696                 exitState=true;
1697             }
1698             if ( !exitState ) wsBuffer += closeTags[SL_COMMENT];
1699             insertLineNumber();
1700             if ( !exitState ) wsBuffer += openTags[SL_COMMENT];
1701 
1702             break;
1703         case _EOF:
1704             eof = true;
1705             break;
1706         case _TESTPOS:
1707             runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
1708             printMaskedToken();
1709             containedTestCase=true;
1710             break;
1711 
1712         default:
1713             break;
1714         }
1715     } while ( !exitState  &&  !eof );
1716 
1717     closeTag ( SL_COMMENT );
1718 
1719     if (containedTestCase) {
1720         stateTraceCurrent.clear();
1721     }
1722 
1723     return eof;
1724 }
1725 
processDirectiveState()1726 bool CodeGenerator::processDirectiveState()
1727 {
1728     State  newState=STANDARD;
1729     bool eof=false, exitState=false;
1730 
1731     openTag ( DIRECTIVE );
1732     do {
1733         printMaskedToken ( newState!=_WS );
1734         newState= getCurrentState(DIRECTIVE);
1735         switch ( newState ) {
1736         case _WS:
1737             processWsState();
1738             break;
1739         case DIRECTIVE_END:
1740             printMaskedToken();
1741             exitState=true;
1742             break;
1743         case _EOL:
1744             printMaskedToken();
1745 
1746             if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
1747                 exitState=false;
1748             } else {
1749                 if (currentSyntax->getContinuationChar()!=0x13){
1750                     exitState= ( terminatingChar!=currentSyntax->getContinuationChar() );
1751                 }
1752             }
1753             if ( !exitState ) wsBuffer += closeTags[DIRECTIVE];
1754             insertLineNumber();
1755             if ( !exitState ) wsBuffer += openTags[DIRECTIVE];
1756             break;
1757         case ML_COMMENT:
1758             closeTag ( DIRECTIVE );
1759             eof= processMultiLineCommentState();
1760             openTag ( DIRECTIVE );
1761             break;
1762         case SL_COMMENT:
1763             closeTag ( DIRECTIVE );
1764             eof= processSingleLineCommentState();
1765             openTag ( DIRECTIVE );
1766             exitState=true;
1767             break;
1768         case STRING:
1769             closeTag ( DIRECTIVE );
1770             eof=processStringState ( DIRECTIVE );
1771             openTag ( DIRECTIVE );
1772             break;
1773         case _EOF:
1774             eof = true;
1775             break;
1776         default:
1777             break;
1778         }
1779     } while ( !exitState && !eof );
1780 
1781     closeTag ( DIRECTIVE );
1782     return eof;
1783 }
1784 
processStringState(State oldState)1785 bool CodeGenerator::processStringState ( State oldState )
1786 {
1787     State newState=STANDARD;
1788     bool eof=false, exitState=false;
1789     bool returnedFromOtherState=false;
1790 
1791     State myState= ( oldState==DIRECTIVE ) ? DIRECTIVE_STRING : STRING;
1792 
1793     int openDelimID=currentSyntax->getOpenDelimiterID ( token, myState);
1794     string openDelim=token;
1795 
1796     //Raw String by definition:
1797     bool isRawString=currentSyntax->delimiterIsRawString(openDelimID) || toggleDynRawString;
1798 
1799     // Test if character before string open delimiter token equals to the
1800     // raw string prefix (Example: r" ", r""" """ in Python)
1801 
1802     //Raw String Prefix:
1803     if ( lineIndex>token.length() &&line[lineIndex-token.length()-1]==currentSyntax->getRawStringPrefix() ) {
1804         isRawString=true;
1805     }
1806 
1807     openTag ( myState );
1808     do {
1809         // true if last token was an escape char
1810         if ( !returnedFromOtherState ) {
1811             printMaskedToken (newState!=_WS );
1812         }
1813         returnedFromOtherState=false;
1814         newState= getCurrentState(myState);
1815 
1816         switch ( newState ) {
1817         case _WS:
1818             processWsState();
1819             break;
1820         case _EOL:
1821             wsBuffer += closeTags[myState];
1822             insertLineNumber();
1823             wsBuffer += openTags[myState];
1824             break;
1825         case STRING_END:
1826             if (resultOfHook || currentSyntax->matchesOpenDelimiter (token,  STRING_END, openDelimID)) {
1827                 if (currentSyntax->assertDelimEqualLength()) {
1828                     exitState= openDelim.length()==token.length();
1829                 } else {
1830                     exitState= true;
1831                 }
1832                 printMaskedToken();
1833             }
1834             break;
1835         case STRING:
1836             // if there exist multiple string delimiters, close string if
1837             // current delimiter is equal to the opening delimiter
1838             exitState=currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, STRING  )) && token==openDelim;
1839             printMaskedToken();
1840             break;
1841         case ESC_CHAR:
1842             if ( !isRawString ) {
1843                 closeTag ( myState );
1844                 eof=processEscapeCharState();
1845                 openTag ( myState );
1846                 returnedFromOtherState=true;
1847             } else {
1848                 // FIXME not a fix for Python r"""\"""
1849                 exitState=token.size()>1 && token[1] == openDelim[0];
1850                 printMaskedToken();
1851             }
1852             break;
1853         case STRING_INTERPOLATION:
1854             closeTag ( myState );
1855             eof=processInterpolationState();
1856             openTag ( myState );
1857             returnedFromOtherState=true;
1858             break;
1859 
1860         case _EOF:
1861             eof = true;
1862             break;
1863         default:
1864             printMaskedToken();
1865             break;
1866         }
1867     } while ( !exitState && !eof );
1868 
1869     closeTag ( myState );
1870 
1871     toggleDynRawString = false;
1872 
1873     return eof;
1874 }
1875 
processSymbolState()1876 bool CodeGenerator::processSymbolState()
1877 {
1878     State newState=STANDARD;
1879     bool eof=false,
1880          exitState=false;
1881 
1882     openTag ( SYMBOL );
1883     do {
1884         printMaskedToken ( newState!=_WS );
1885         newState= getCurrentState(SYMBOL);
1886         switch ( newState ) {
1887         case _WS:
1888             processWsState();
1889             exitState=isolateTags;
1890             break;
1891         case _EOL:
1892             insertLineNumber();
1893             exitState=true;
1894             break;
1895         case _EOF:
1896             eof = true;
1897             break;
1898         default:
1899             exitState=newState!=SYMBOL;
1900             break;
1901         }
1902     } while ( !exitState && !eof );
1903 
1904     closeTag ( SYMBOL );
1905     return eof;
1906 }
1907 
processSyntaxErrorState()1908 bool CodeGenerator::processSyntaxErrorState()
1909 {
1910     State newState=STANDARD;
1911     bool eof=false,
1912     exitState=false;
1913 
1914     openTag ( SYNTAX_ERROR );
1915     do {
1916         printMaskedToken ( newState!=_WS );
1917         newState= getCurrentState(SYNTAX_ERROR);
1918         switch ( newState ) {
1919             case _WS:
1920                 processWsState();
1921                 exitState=isolateTags;
1922                 break;
1923             case _EOL:
1924                 insertLineNumber();
1925                 exitState=true;
1926                 break;
1927             case _EOF:
1928                 eof = true;
1929                 break;
1930             default:
1931                 exitState=newState!=SYMBOL;
1932                 break;
1933         }
1934     } while ( !exitState && !eof );
1935 
1936     closeTag ( SYNTAX_ERROR );
1937     return eof;
1938 }
1939 
processEscapeCharState()1940 bool CodeGenerator::processEscapeCharState()
1941 {
1942     State newState=STANDARD;
1943     bool eof=false, exitState=false;
1944     openTag ( ESC_CHAR );
1945     do {
1946         printMaskedToken (newState!=_WS );
1947         newState= getCurrentState(ESC_CHAR);
1948         switch ( newState ) {
1949         case _EOL:
1950             insertLineNumber();
1951             exitState=true;
1952             break;
1953         case _WS:
1954             processWsState();
1955             exitState=isolateTags;
1956             break;
1957         case _EOF:
1958             eof = true;
1959             break;
1960         default:
1961             exitState=newState!=ESC_CHAR;
1962             break;
1963         }
1964     } while ( !exitState && !eof );
1965 
1966     closeTag ( ESC_CHAR );
1967     return eof;
1968 }
1969 
processInterpolationState()1970 bool CodeGenerator::processInterpolationState()
1971 {
1972     State newState=STANDARD;
1973     bool eof=false, exitState=false;
1974     openTag ( STRING_INTERPOLATION );
1975     do {
1976         printMaskedToken (newState!=_WS );
1977         newState= getCurrentState(STRING_INTERPOLATION);
1978         switch ( newState ) {
1979         case _EOL:
1980             insertLineNumber();
1981             exitState=true;
1982             break;
1983         case _WS:
1984             processWsState();
1985             exitState=isolateTags;
1986             break;
1987         case _EOF:
1988             eof = true;
1989             break;
1990         default:
1991             exitState=newState!=STRING_INTERPOLATION;
1992             break;
1993         }
1994     } while ( !exitState && !eof );
1995 
1996     closeTag ( STRING_INTERPOLATION );
1997     return eof;
1998 }
1999 
processWsState()2000 void CodeGenerator::processWsState()
2001 {
2002 
2003     if ( !maskWs ) {
2004         wsBuffer += token;
2005         token.clear();
2006         return;
2007     }
2008 
2009     flushWs(6);
2010 
2011     int cntWs=0;
2012     lineIndex--;
2013     PositionState ps(currentState, 0, true);
2014 
2015     while ( line[lineIndex]==' ' || line[lineIndex]=='\t' ) {
2016         ++cntWs;
2017         ++lineIndex;
2018     }
2019 
2020     if ( cntWs>1 ) {
2021 
2022         unsigned int styleID=getStyleID ( currentState, currentKeywordClass );
2023         if ( excludeWs && styleID!=_UNKNOWN ) {
2024             *out << closeTags[styleID];
2025         }
2026 
2027         *out << maskWsBegin;
2028         for ( int i=0; i<cntWs; i++ ) {
2029             *out << spacer;
2030             if (applySyntaxTestCase){
2031                 stateTraceCurrent.push_back(ps);
2032             }
2033         }
2034         *out << maskWsEnd;
2035         if ( excludeWs && styleID!=_UNKNOWN ) {
2036             *out << openTags[styleID];
2037         }
2038     } else {
2039 
2040         *out << spacer; //Bugfix fehlender Space nach Strings
2041         if (applySyntaxTestCase){
2042             stateTraceCurrent.push_back(ps);
2043         }
2044     }
2045 
2046     spacer = initialSpacer;
2047 
2048     token.clear();
2049 }
2050 
flushWs(int arg)2051 void CodeGenerator::flushWs(int arg)
2052 {
2053     PositionState ps(currentState, 0, true);
2054     //workaround condition
2055     for ( size_t i=0; i<wsBuffer.size() && ((arg > 3) || ( (arg<4) && lineIndex>1)) && applySyntaxTestCase ; i++ ) {
2056         stateTraceCurrent.push_back(ps);
2057         //std::cerr <<"\nflush >"<<wsBuffer<<"< arg:"<<arg;
2058     }
2059 
2060     //fix canvas whitespace
2061     if (wsBuffer.length() && (outputType==ESC_XTERM256 || outputType==ESC_TRUECOLOR) ){
2062         *out<<maskWsBegin;
2063     }
2064 
2065     *out << wsBuffer;
2066     wsBuffer.clear();
2067 }
2068 
getTestcaseName(State s,unsigned int kwClass)2069 string CodeGenerator::getTestcaseName(State s, unsigned int kwClass) {
2070     switch (s) {
2071 
2072         case STANDARD:
2073             return STY_NAME_STD;
2074         case STRING:
2075             return STY_NAME_STR;
2076         case NUMBER:
2077             return STY_NAME_NUM;
2078         case SL_COMMENT:
2079             return STY_NAME_SLC;
2080         case ML_COMMENT:
2081             return STY_NAME_COM;
2082         case ESC_CHAR:
2083             return STY_NAME_ESC;
2084         case DIRECTIVE:
2085             return STY_NAME_DIR;
2086         case DIRECTIVE_STRING:
2087             return STY_NAME_DST;
2088         case SYMBOL:
2089             return STY_NAME_SYM;
2090         case STRING_INTERPOLATION:
2091             return STY_NAME_IPL;
2092         case SYNTAX_ERROR:
2093             return STY_NAME_ERR;
2094         case _WS:
2095             return "ws";
2096         case KEYWORD: {
2097 
2098             if (!kwClass)
2099                 return "ws";
2100 
2101             char kwName[20] = {0};
2102             snprintf(kwName, sizeof(kwName), "keyword %c", ('a'+kwClass-1));
2103 
2104             return string(kwName);
2105         }
2106         default:
2107             return "unknown_test";
2108     }
2109 }
2110 
printTrace(const string & s)2111 void CodeGenerator::printTrace(const string &s){
2112     std::cout<<"\n curr "<<lineNumber<<" "<<s<<": ";
2113     for (unsigned int i=0; i< stateTraceCurrent.size(); i++) {
2114         std::cout<<" "<<stateTraceCurrent[i].state;
2115     }
2116     std::cout<<"\n test "<<lineNumber<<" "<<s<<": ";
2117     for (unsigned int i=0; i< stateTraceTest.size(); i++) {
2118         std::cout<<" "<<stateTraceTest[i].state;
2119     }
2120     std::cout<<"\n";
2121 }
2122 
2123 //column: lineIndex (not a UTF-8 validated string position)
runSyntaxTestcases(unsigned int column)2124 void CodeGenerator::runSyntaxTestcases(unsigned int column){
2125 
2126     if (encoding=="utf-8")
2127         column = StringTools::utf8_strlen(line.substr(0, column));
2128 
2129     unsigned int assertGroup=0;
2130     size_t typeDescPos=line.find_first_not_of("\t ^", lineIndex);
2131     State assertState=_UNKNOWN;
2132     bool negation=false;
2133     bool testFailed=false;
2134 
2135     ostringstream errMsg;
2136     string prefix;
2137     //printTrace("trace 2");
2138 
2139     if (typeDescPos!=string::npos) {
2140 
2141         if (line[typeDescPos]=='~') {
2142 
2143             negation=true;
2144             prefix="~";
2145             ++typeDescPos;
2146         }
2147 
2148         if (line.find(STY_NAME_NUM, typeDescPos)==typeDescPos)
2149             assertState=NUMBER;
2150         //TODO temp. fix to allow old and new string classes
2151         else if (line.find(STY_NAME_STR, typeDescPos)==typeDescPos || line.find("str", typeDescPos)==typeDescPos)
2152             assertState=STRING;
2153         else if (line.find(STY_NAME_ESC, typeDescPos)==typeDescPos)
2154             assertState=ESC_CHAR;
2155         else if (line.find(STY_NAME_IPL, typeDescPos)==typeDescPos)
2156             assertState=STRING_INTERPOLATION;
2157         else if (line.find(STY_NAME_SYM, typeDescPos)==typeDescPos)
2158             assertState=SYMBOL;
2159         else if (line.find(STY_NAME_DIR, typeDescPos)==typeDescPos)
2160             assertState=DIRECTIVE;
2161         else if (line.find(STY_NAME_SLC, typeDescPos)==typeDescPos)
2162             assertState=SL_COMMENT;
2163         else if (line.find(STY_NAME_COM, typeDescPos)==typeDescPos)
2164             assertState=ML_COMMENT;
2165         else if (line.find("ws", typeDescPos)==typeDescPos)
2166             assertState=_WS;
2167         //TODO temp. fix to allow old and new default classes
2168         else if (line.find(STY_NAME_STD, typeDescPos)==typeDescPos  || line.find("std", typeDescPos)==typeDescPos)
2169             assertState=STANDARD;
2170         else if (line.find(STY_NAME_DST, typeDescPos)==typeDescPos)
2171             assertState=DIRECTIVE_STRING;
2172 
2173         else if (line.find("kw", typeDescPos)==typeDescPos || line.find("st", typeDescPos)==typeDescPos) {
2174             assertState=KEYWORD;
2175             if (isalpha(line[typeDescPos+2]))
2176                 assertGroup=line[typeDescPos+2] - 'a' +1;
2177         }
2178 
2179        if (   (assertState!=_WS && stateTraceTest[column].state != assertState && !stateTraceTest[column].isWhiteSpace )
2180             || (assertState==_WS && !stateTraceTest[column].isWhiteSpace)
2181             || assertGroup != stateTraceTest[column].kwClass) {
2182 
2183             testFailed=!negation;
2184 
2185         } else if (negation ) {
2186 
2187             //TODO Fix ~ws
2188             if (assertState!=_WS  && !stateTraceTest[column].isWhiteSpace )
2189                 testFailed=true;
2190         }
2191 
2192         if (testFailed) {
2193             errMsg << inFile << " line " << lineNumber << ", column "<< column
2194                     << ": got " << getTestcaseName(stateTraceTest[column].state, stateTraceTest[column].kwClass)
2195                     << " instead of " << prefix << getTestcaseName(assertState, assertGroup);
2196 
2197             failedPosTests.push_back(errMsg.str());
2198         }
2199 
2200     }
2201 
2202     lineContainedTestCase=true;
2203 }
2204 
getNewLine()2205 string CodeGenerator::getNewLine()
2206 {
2207     ostringstream ss;
2208     printSyntaxError(ss);
2209     if (printNewLines)
2210         ss << newLineTag;
2211     return ss.str();
2212 }
2213 
callDecorateLineFct(bool isLineStart)2214 Diluculum::LuaValueList CodeGenerator::callDecorateLineFct(bool isLineStart)
2215 {
2216 
2217     Diluculum::LuaValueList params;
2218     params.push_back(Diluculum::LuaValue(lineNumber));
2219 
2220     return currentSyntax->getLuaState()->call ( isLineStart ?
2221             *currentSyntax->getDecorateLineBeginFct(): *currentSyntax->getDecorateLineEndFct(),
2222             params,"getDecorateLineFct call");
2223 }
2224 
setOverrideParams()2225 void CodeGenerator::setOverrideParams() {
2226     if (currentSyntax->requiresParamUpdate()) {
2227         if ( currentSyntax->getOverrideConfigVal("state.string.raw")=="true"){
2228             toggleDynRawString=true; // reset to false in string state fct
2229         }
2230         if ( currentSyntax->getOverrideConfigVal("format.maskws")=="true") {
2231             maskWs=true;
2232         }
2233         if ( currentSyntax->getOverrideConfigVal("format.spacer").size()) {
2234             spacer=currentSyntax->getOverrideConfigVal("format.spacer");
2235         }
2236     }
2237 }
2238 
insertLineNumber(bool insertNewLine)2239 void CodeGenerator::insertLineNumber ( bool insertNewLine )
2240 {
2241     if ( insertNewLine ) {
2242         if (currentSyntax->getDecorateLineEndFct()) {
2243             Diluculum::LuaValueList res=callDecorateLineFct(false);
2244             if (res.size()==1) {
2245                 setOverrideParams();
2246                 wsBuffer +=res[0].asString();
2247             }
2248         }
2249         wsBuffer += getNewLine();
2250     }
2251 
2252     if (currentSyntax->getDecorateLineBeginFct()) {
2253         Diluculum::LuaValueList res=callDecorateLineFct(true);
2254         if (res.size()==1) {
2255             setOverrideParams();
2256             wsBuffer += res[0].asString();
2257         }
2258     }
2259 
2260     if ( showLineNumbers ) {
2261         ostringstream os;
2262         ostringstream numberPrefix;
2263 
2264         os << setw ( getLineNumberWidth() ) << right;
2265         if( numberCurrentLine ) {
2266             if ( lineNumberFillZeroes ) {
2267                 os.fill ( '0' );
2268             }
2269             os << lineNumber+lineNumberOffset;
2270         } else {
2271             os << "";
2272         }
2273 
2274         numberPrefix << openTags[LINENUMBER];
2275         maskString ( numberPrefix, os.str() );
2276 
2277         //use initialSpacer here, spacer can be overridden by plug-in (format.spacer)
2278         numberPrefix << initialSpacer << closeTags[LINENUMBER];
2279         wsBuffer += numberPrefix.str();
2280     }
2281 }
2282 
getLineIndex()2283 unsigned int CodeGenerator::getLineIndex()
2284 {
2285     return lineIndex;
2286 }
getLastLineLength()2287 unsigned int CodeGenerator::getLastLineLength()
2288 {
2289     return lastLineLength;
2290 }
2291 
requiresTwoPassParsing() const2292 bool CodeGenerator::requiresTwoPassParsing() const {
2293     if (!currentSyntax) return false;
2294     return currentSyntax->getPersistentSnippetsNum()>0;
2295 }
2296 
2297 
printExternalStyle(const string & outFile)2298 bool CodeGenerator::printExternalStyle ( const string &outFile )
2299 {
2300     if ( !includeStyleDef ) {
2301         ostream *cssOutFile = ( outFile.empty() ? &cout :new ofstream ( outFile.c_str() ) );
2302         if ( !cssOutFile->fail() ) {
2303             if (!omitVersionComment) {
2304                 *cssOutFile << styleCommentOpen
2305                             <<" Style definition file generated by highlight "
2306                             << HIGHLIGHT_VERSION << ", " << HIGHLIGHT_URL
2307                             << " " << styleCommentClose << "\n";
2308             }
2309             *cssOutFile << getStyleDefinition()
2310                         << "\n";
2311             *cssOutFile << readUserStyleDef();
2312             if ( !outFile.empty() ) delete cssOutFile;
2313         } else {
2314             return false;
2315         }
2316     }
2317     return true;
2318 }
2319 
printPersistentState(const string & outFile)2320 bool CodeGenerator::printPersistentState ( const string &outFile )
2321 {
2322     if (!currentSyntax) return false;
2323 
2324     ofstream pluginOutFile( outFile.c_str());
2325     if ( !pluginOutFile.fail() ) {
2326 
2327         pluginOutFile   <<"Description=\"Plugin generated by highlight using the --two-pass option\"\n\n"
2328                         <<"Categories = {\"two-pass\" }\n\n"
2329                         <<"function syntaxUpdate(desc)\n\n";
2330 
2331         pluginOutFile << currentSyntax->getPersistentHookConditions();
2332 
2333         for (auto snippet: currentSyntax->getPersistentSnippets())
2334         {
2335             pluginOutFile << snippet <<"\n\n";
2336         }
2337 
2338         pluginOutFile<<"end\n\n"
2339                      <<"Plugins={\n"
2340                      <<"  { Type=\"lang\", Chunk=syntaxUpdate }\n"
2341                      <<"}\n";
2342     } else {
2343         return false;
2344     }
2345 
2346     return true;
2347 }
2348 
readUserStyleDef()2349 string CodeGenerator::readUserStyleDef()
2350 {
2351     ostringstream ostr;
2352     if ( !styleInputPath.empty() ) {
2353         ifstream userStyleDef ( styleInputPath.c_str() );
2354         if ( userStyleDef ) {
2355             ostr << "\n" << styleCommentOpen
2356                 << " Content of " << styleInputPath
2357                 << ": " <<styleCommentClose << "\n";
2358             string line;
2359             while ( getline ( userStyleDef, line ) ) {
2360                 ostr << line << "\n";
2361             }
2362             userStyleDef.close();
2363         } else {
2364             ostr << styleCommentOpen
2365                 << " ERROR: Could not include " << styleInputPath
2366                 << "." << styleCommentClose << "\n";
2367         }
2368     }
2369 
2370     string injections=docStyle.getInjections();
2371     if (!injections.empty()) {
2372         ostr 	<< "\n" << styleCommentOpen
2373                 << " Plug-in theme injections: " <<styleCommentClose << "\n";
2374         ostr << injections<<"\n";
2375     }
2376     return ostr.str();
2377 }
2378 
initPluginScript(const string & script)2379 bool CodeGenerator::initPluginScript(const string& script)
2380 {
2381 
2382     if (script.empty()) return true;
2383 
2384     try {
2385 
2386         userScriptError="";
2387         Diluculum::LuaState ls;
2388 
2389         ls.doFile (script);
2390         int listIdx=1;
2391 
2392         while (ls["Plugins"][listIdx].value() !=Diluculum::Nil) {
2393 
2394             // Theme plugins
2395             if (ls["Plugins"][listIdx]["Type"].value().asString()=="theme") {
2396                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
2397                     docStyle.addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
2398                 }
2399             }
2400             // Syntax plugins
2401             else if (ls["Plugins"][listIdx]["Type"].value().asString()=="lang") {
2402                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
2403                     currentSyntax->addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
2404                 }
2405             }
2406             // Format plugins
2407             else if (ls["Plugins"][listIdx]["Type"].value().asString()=="format") {
2408                 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
2409                     addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
2410                 }
2411             }
2412 
2413             listIdx++;
2414         }
2415     }  catch (Diluculum::LuaError &err) {
2416         userScriptError=err.what();
2417         return false;
2418     }
2419     return true;
2420 }
2421 
resetSyntaxReaders()2422 void CodeGenerator::resetSyntaxReaders() {
2423     for ( map<string, SyntaxReader*>::iterator it=syntaxReaders.begin(); it!=syntaxReaders.end(); it++ ) {
2424         delete it->second;
2425     }
2426     currentSyntax=NULL;
2427     syntaxReaders.clear();
2428 }
2429 
syntaxRequiresTwoPassRun()2430 bool CodeGenerator::syntaxRequiresTwoPassRun() {
2431     if (!currentSyntax) return false;
2432     return currentSyntax->requiresTwoPassRun();
2433 }
2434 
clearPersistentSnippets()2435 void CodeGenerator::clearPersistentSnippets(){
2436     if (currentSyntax) {
2437         currentSyntax->clearPersistentSnippets();
2438     }
2439 }
2440 
updateKeywordClasses()2441 void CodeGenerator::updateKeywordClasses(){
2442 
2443     if (openTags.size()) {
2444         if ( openTags.size() >NUMBER_BUILTIN_STATES ) {
2445             // remove dynamic keyword tag delimiters of the old language definition
2446             vector<string>::iterator keyStyleOpenBegin =
2447             openTags.begin() + NUMBER_BUILTIN_STATES;
2448             vector<string>::iterator keyStyleCloseBegin =
2449             closeTags.begin() + NUMBER_BUILTIN_STATES;
2450             openTags.erase ( keyStyleOpenBegin, openTags.end() );
2451             closeTags.erase ( keyStyleCloseBegin, closeTags.end() );
2452         }
2453         // add new keyword tag delimiters
2454 
2455         for ( unsigned int i=0; i< currentSyntax->getKeywordClasses().size(); i++ ) {
2456             openTags.push_back ( getKeywordOpenTag ( i ) );
2457             closeTags.push_back ( getKeywordCloseTag ( i ) );
2458         }
2459     }
2460 }
2461 
2462 
2463 }
2464