1 /***************************************************************************
2 codegenerator.cpp - description
3 -------------------
4 begin : Die Jul 9 2002
5 copyright : (C) 2002-2021 by Andre Simon
6 email : a.simon@mailbox.org
7 ***************************************************************************/
8
9
10 /*
11 This file is part of Highlight.
12
13 Highlight is free software: you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
17
18 Highlight is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with Highlight. If not, see <http://www.gnu.org/licenses/>.
25 */
26
27
28 #include <climits>
29 #include <memory>
30
31 #include <chrono>
32 #include <thread>
33
34
35 #include <boost/xpressive/xpressive_dynamic.hpp>
36
37 #include "codegenerator.h"
38
39 #include "htmlgenerator.h"
40 #include "xhtmlgenerator.h"
41 #include "rtfgenerator.h"
42 #include "latexgenerator.h"
43 #include "texgenerator.h"
44 #include "svggenerator.h"
45 #include "bbcodegenerator.h"
46 #include "pangogenerator.h"
47 #include "odtgenerator.h"
48 #include "astyle/astyle.h"
49
50 #if !defined (QT)
51 #include "ansigenerator.h"
52 #include "xterm256generator.h"
53 #endif
54
55 namespace highlight
56 {
57 const unsigned int CodeGenerator::NUMBER_BUILTIN_STATES = highlight::KEYWORD;
58
59 // must not start with kw, st, sm prefixes
60 const string CodeGenerator::STY_NAME_STD="def";
61 const string CodeGenerator::STY_NAME_STR="sng";
62 const string CodeGenerator::STY_NAME_NUM="num";
63 const string CodeGenerator::STY_NAME_SLC="slc";
64 const string CodeGenerator::STY_NAME_COM="com";
65 const string CodeGenerator::STY_NAME_ESC="esc";
66 const string CodeGenerator::STY_NAME_DIR="ppc"; //preprocessor
67 const string CodeGenerator::STY_NAME_DST="pps"; //preprocessor string
68 const string CodeGenerator::STY_NAME_LIN="lin";
69 const string CodeGenerator::STY_NAME_SYM="opt"; //operator
70 const string CodeGenerator::STY_NAME_IPL="ipl"; //interpolation
71
72 const string CodeGenerator::STY_NAME_HVR="hvr";
73 const string CodeGenerator::STY_NAME_ERR="err";
74 const string CodeGenerator::STY_NAME_ERM="erm";
75
76 vector<Diluculum::LuaFunction*> CodeGenerator::pluginChunks;
77
78
getInstance(OutputType type)79 CodeGenerator * CodeGenerator::getInstance ( OutputType type )
80 {
81 CodeGenerator* generator=NULL;
82 switch ( type ) {
83 case HTML:
84 generator = new HtmlGenerator();
85 break;
86 case XHTML:
87 generator = new XHtmlGenerator();
88 break;
89 case TEX:
90 generator = new TexGenerator ();
91 break;
92 case LATEX:
93 generator = new LatexGenerator();
94 break;
95 case RTF:
96 generator = new RtfGenerator ();
97 break;
98 case SVG:
99 generator = new SVGGenerator();
100 break;
101 case BBCODE:
102 generator = new BBCodeGenerator();
103 break;
104 case PANGO:
105 generator = new PangoGenerator();
106 break;
107 case ODTFLAT:
108 generator = new ODTGenerator();
109 break;
110 case ESC_ANSI:
111 generator = new AnsiGenerator();
112 break;
113 case ESC_XTERM256:
114 case ESC_TRUECOLOR:
115 generator = new Xterm256Generator();
116 generator->setESCTrueColor(type==ESC_TRUECOLOR);
117 break;
118 default:
119 break;
120 }
121 return generator;
122 }
123
124
CodeGenerator(highlight::OutputType type)125 CodeGenerator::CodeGenerator ( highlight::OutputType type )
126 :currentSyntax(NULL),
127 in ( NULL ),
128 out ( NULL ),
129 encoding ( "none" ),
130 docTitle ( "Source file" ),
131 maskWs ( false ),
132 excludeWs ( false ),
133 fragmentOutput ( false ),
134 keepInjections( false ),
135 showLineNumbers ( false ),
136 lineNumberFillZeroes ( false ),
137 printNewLines(true),
138 omitVersionComment(false),
139 isolateTags(false),
140 disableStyleCache(false),
141 baseFontSize("10"),
142 lineNumber ( 0 ),
143 lineNumberOffset ( 0 ),
144 currentState ( _UNKNOWN ),
145 currentKeywordClass ( 0 ),
146 includeStyleDef ( false ),
147 numberCurrentLine ( false ),
148 lineIndex ( 0 ),
149 lastLineLength( 0 ),
150 syntaxChangeIndex(UINT_MAX),
151 syntaxChangeLineNo(UINT_MAX),
152 lineNumberWidth ( 5 ),
153 startLineCnt( 1 ),
154 startLineCntCurFile( 1 ),
155 maxLineCnt ( UINT_MAX ),
156 inputFilesCnt (0),
157 processedFilesCnt (0),
158 kwOffset(0),
159 noTrailingNewLine(0),
160
161 terminatingChar ( '\0' ),
162 formatter ( NULL ),
163 streamIterator ( NULL ),
164 formattingEnabled ( false ),
165 formattingPossible ( false ),
166 validateInput ( false ),
167 numberWrappedLines ( true ),
168 resultOfHook(false),
169 lineContainedTestCase(false),
170 lineContainedStmt(false),
171 applySyntaxTestCase(false),
172 toggleDynRawString(false),
173 lsEnableHoverRequests(false),
174 lsCheckSemanticTokens(false),
175 lsCheckSyntaxErrors(false),
176
177 keywordCase ( StringTools::CASE_UNCHANGED ),
178 eolDelimiter ('\n'),
179 outputType ( type )
180 {
181 }
182
183
~CodeGenerator()184 CodeGenerator::~CodeGenerator()
185 {
186 delete formatter;
187 delete streamIterator;
188
189 resetSyntaxReaders();
190
191 for (unsigned int i=0; i<pluginChunks.size(); i++) {
192 delete pluginChunks[i];
193 }
194 pluginChunks.clear();
195 }
196
197
initTheme(const string & themePath,bool loadSemanticStyles)198 bool CodeGenerator::initTheme ( const string& themePath, bool loadSemanticStyles)
199 {
200 this->themePath=themePath;
201 bool loadOK = docStyle.load ( themePath, outputType, loadSemanticStyles );
202 initOutputTags();
203 return loadOK;
204 }
205
initLanguageServer(const string & executable,const vector<string> & options,const string & workspace,const string & syntax,int delay,int logLevel)206 LSResult CodeGenerator::initLanguageServer ( const string& executable, const vector<string> &options,
207 const string& workspace, const string& syntax,
208 int delay, int logLevel )
209 {
210 if (LSPClient.isInitialized()) {
211 return LSResult::INIT_OK;
212 }
213
214 LSPClient.setLogging(logLevel>1);
215
216 LSPClient.setExecutable(executable);
217 LSPClient.setWorkspace(workspace);
218 LSPClient.setOptions(options);
219 LSPClient.setSyntax(syntax);
220 LSPClient.setInitializeDelay(delay);
221 if (!LSPClient.connect()){
222 return LSResult::INIT_BAD_PIPE;
223 }
224
225 if (!LSPClient.runInitialize()){
226 return LSResult::INIT_BAD_REQUEST;
227 }
228 for (int i=0; i<docStyle.getSemanticTokenStyleCount();i++) {
229 currentSyntax->generateNewKWClass(i+1, "st");
230 }
231 LSPClient.runInitialized();
232 updateKeywordClasses();
233 return LSResult::INIT_OK;
234 }
235
lsOpenDocument(const string & fileName,const string & suffix)236 bool CodeGenerator::lsOpenDocument(const string& fileName, const string & suffix){
237 lsDocumentPath = fileName;
238 return LSPClient.runDidOpen(fileName, suffix);
239 }
240
lsCloseDocument(const string & fileName,const string & suffix)241 bool CodeGenerator::lsCloseDocument(const string& fileName, const string & suffix){
242 lsDocumentPath.clear();
243 return LSPClient.runDidClose(fileName, suffix);
244 }
245
lsAddSemanticInfo(const string & fileName,const string & suffix)246 bool CodeGenerator::lsAddSemanticInfo(const string& fileName, const string & suffix){
247 lsCheckSemanticTokens = LSPClient.runSemanticTokensFull(fileName);
248 return lsCheckSemanticTokens;
249 }
250
isHoverProvider()251 bool CodeGenerator::isHoverProvider(){
252 return LSPClient.isHoverProvider();
253 }
254
isSemanticTokensProvider()255 bool CodeGenerator::isSemanticTokensProvider(){
256 return LSPClient.isSemanticTokensProvider();
257 }
258
lsAddHoverInfo(bool hover)259 void CodeGenerator::lsAddHoverInfo(bool hover){
260 lsEnableHoverRequests = hover;
261 }
262
lsAddSyntaxErrorInfo(bool error)263 void CodeGenerator::lsAddSyntaxErrorInfo(bool error) {
264 lsCheckSyntaxErrors = error;;
265 }
266
267
exitLanguageServer()268 void CodeGenerator::exitLanguageServer () {
269 LSPClient.runShutdown();
270 LSPClient.runExit();
271 }
272
getStyleName()273 const string& CodeGenerator::getStyleName()
274 {
275 return themePath;
276 }
277
setLineNumberWidth(int w)278 void CodeGenerator::setLineNumberWidth ( int w )
279 {
280 lineNumberWidth=w;
281 }
282
getLineNumberWidth()283 int CodeGenerator::getLineNumberWidth()
284 {
285 return lineNumberWidth;
286 }
287
setPrintLineNumbers(bool flag,unsigned int startCnt)288 void CodeGenerator::setPrintLineNumbers ( bool flag, unsigned int startCnt )
289 {
290 showLineNumbers=flag;
291 lineNumberOffset = startCnt-1;
292 }
293
getPrintLineNumbers()294 bool CodeGenerator::getPrintLineNumbers()
295 {
296 return showLineNumbers;
297 }
298
setPrintZeroes(bool flag)299 void CodeGenerator::setPrintZeroes ( bool flag )
300 {
301 lineNumberFillZeroes=flag;
302 }
303
getPrintZeroes()304 bool CodeGenerator::getPrintZeroes()
305 {
306 return lineNumberFillZeroes;
307 }
308
setIncludeStyle(bool flag)309 void CodeGenerator::setIncludeStyle ( bool flag )
310 {
311 includeStyleDef = flag;
312 }
313
disableTrailingNL(int flag)314 void CodeGenerator::disableTrailingNL ( int flag )
315 {
316 noTrailingNewLine = flag;
317 }
318
setStyleInputPath(const string & path)319 void CodeGenerator::setStyleInputPath ( const string& path )
320 {
321 styleInputPath = path;
322 }
323
setStyleOutputPath(const string & path)324 void CodeGenerator::setStyleOutputPath ( const string& path )
325 {
326 styleOutputPath = path;
327 }
328
setPluginParameter(const string & param)329 void CodeGenerator::setPluginParameter ( const string& param )
330 {
331 pluginParameter = param;
332 }
333
getStyleInputPath()334 const string& CodeGenerator::getStyleInputPath()
335 {
336 return styleInputPath;
337 }
338
getStyleOutputPath()339 const string& CodeGenerator::getStyleOutputPath()
340 {
341 return styleOutputPath;
342 }
343
setFragmentCode(bool flag)344 void CodeGenerator::setFragmentCode ( bool flag )
345 {
346 fragmentOutput=flag;
347 }
348
getFragmentCode()349 bool CodeGenerator::getFragmentCode()
350 {
351 return fragmentOutput;
352 }
setKeepInjections(bool flag)353 void CodeGenerator::setKeepInjections ( bool flag )
354 {
355 keepInjections=flag;
356 }
357
getKeepInjections()358 bool CodeGenerator::getKeepInjections()
359 {
360 return keepInjections;
361 }
setValidateInput(bool flag)362 void CodeGenerator::setValidateInput ( bool flag )
363 {
364 validateInput=flag;
365 }
366
getValidateInput()367 bool CodeGenerator::getValidateInput()
368 {
369 return validateInput;
370 }
371
setNumberWrappedLines(bool flag)372 void CodeGenerator::setNumberWrappedLines ( bool flag )
373 {
374 numberWrappedLines=flag;
375 }
376
getNumberWrappedLines()377 bool CodeGenerator::getNumberWrappedLines()
378 {
379 return numberWrappedLines;
380 }
381
setOmitVersionComment(bool flag)382 void CodeGenerator::setOmitVersionComment ( bool flag )
383 {
384 omitVersionComment=flag;
385 }
386
getOmitVersionComment()387 bool CodeGenerator::getOmitVersionComment ()
388 {
389 return omitVersionComment;
390 }
391
setIsolateTags(bool flag)392 void CodeGenerator::setIsolateTags ( bool flag )
393 {
394 isolateTags=flag;
395 }
396
getIsolateTags()397 bool CodeGenerator::getIsolateTags ()
398 {
399 return isolateTags;
400 }
401
setBaseFont(const string & fontName)402 void CodeGenerator::setBaseFont ( const string& fontName )
403 {
404 baseFont = fontName;
405 }
406
setBaseFontSize(const string & fontSize)407 void CodeGenerator::setBaseFontSize ( const string& fontSize)
408 {
409 baseFontSize = fontSize;
410 }
411
setStyleCaching(bool flag)412 void CodeGenerator::setStyleCaching ( bool flag )
413 {
414 disableStyleCache=!flag;
415 }
416
getBaseFont() const417 const string CodeGenerator::getBaseFont() const
418 {
419 if ( !baseFont.empty() ) return baseFont;
420 switch ( outputType ) {
421 case HTML:
422 case XHTML:
423 case SVG:
424 return "'Courier New',monospace";
425 break;
426 case LATEX:
427 return "ttfamily";
428 break;
429 case TEX:
430 return "tt";
431 break;
432 default:
433 return "Courier New";
434 }
435 }
436
getBaseFontSize()437 const string CodeGenerator::getBaseFontSize()
438 {
439 return baseFontSize;
440 }
441
setTitle(const string & title)442 void CodeGenerator::setTitle ( const string & title )
443 {
444 if ( !title.empty() ) docTitle= title;
445 }
446
getTitle()447 string CodeGenerator::getTitle()
448 {
449 return docTitle;
450 }
451
setEncoding(const string & encodingName)452 void CodeGenerator::setEncoding ( const string& encodingName )
453 {
454 encoding = encodingName;
455 }
456
formattingDisabled()457 bool CodeGenerator::formattingDisabled()
458 {
459 return !formattingEnabled;
460 }
461
setStartingInputLine(unsigned int begin)462 void CodeGenerator::setStartingInputLine ( unsigned int begin )
463 {
464 startLineCnt = startLineCntCurFile = begin;
465 }
466
setMaxInputLineCnt(unsigned int cnt)467 void CodeGenerator::setMaxInputLineCnt ( unsigned int cnt )
468 {
469 maxLineCnt = cnt;
470 }
471
setFilesCnt(unsigned int cnt)472 void CodeGenerator::setFilesCnt ( unsigned int cnt )
473 {
474 inputFilesCnt = cnt;
475 processedFilesCnt = 0;
476 }
477
formattingIsPossible()478 bool CodeGenerator::formattingIsPossible()
479 {
480 return formattingPossible;
481 }
482
setPreformatting(WrapMode lineWrappingStyle,unsigned int lineLength,int numberSpaces)483 void CodeGenerator::setPreformatting ( WrapMode lineWrappingStyle,
484 unsigned int lineLength,
485 int numberSpaces )
486 {
487 bool enableWrap = lineWrappingStyle!=WRAP_DISABLED;
488 bool replaceTabs = numberSpaces > 0;
489
490 if ( enableWrap || replaceTabs ) {
491 preFormatter.setWrap ( enableWrap );
492 preFormatter.setWrapIndentBraces ( lineWrappingStyle==WRAP_DEFAULT );
493 preFormatter.setWrapLineLength ( lineLength );
494 preFormatter.setReplaceTabs ( replaceTabs );
495 preFormatter.setNumberSpaces ( numberSpaces );
496 }
497 }
498
setKeyWordCase(StringTools::KeywordCase keyCase)499 void CodeGenerator::setKeyWordCase ( StringTools::KeywordCase keyCase )
500 {
501 keywordCase = keyCase;
502 }
503
setEOLDelimiter(char delim)504 void CodeGenerator::setEOLDelimiter(char delim)
505 {
506 eolDelimiter = delim;
507 }
508
reset()509 void CodeGenerator::reset()
510 {
511 lineIndex = 0;
512 lineNumber = 0;
513 line.clear();
514 preFormatter.reset();
515 inFile.clear();
516 outFile.clear();
517 embedLangDefPath.clear();
518 printNewLines=true;
519 syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
520 startLineCntCurFile = startLineCnt;
521 applySyntaxTestCase=lineContainedTestCase=false;
522 if (currentSyntax){
523 vector<int> overrideStyleAttrs=currentSyntax->getOverrideStyleAttributes();
524 docStyle.overrideAttributes(overrideStyleAttrs);
525 if (overrideStyleAttrs.size())
526 disableStyleCache = true;
527 }
528 }
529
getThemeInitError()530 string CodeGenerator::getThemeInitError()
531 {
532 return docStyle.getErrorMessage();
533 }
534
getPluginScriptError()535 string CodeGenerator::getPluginScriptError()
536 {
537 return userScriptError;
538 }
539
getSyntaxRegexError()540 string CodeGenerator::getSyntaxRegexError()
541 {
542 return (currentSyntax)? currentSyntax->getFailedRegex(): "syntax undef";
543 }
getSyntaxLuaError()544 string CodeGenerator::getSyntaxLuaError()
545 {
546 return (currentSyntax)? currentSyntax->getLuaErrorText(): "syntax undef";
547
548 }
getSyntaxDescription()549 string CodeGenerator::getSyntaxDescription()
550 {
551 return (currentSyntax)? currentSyntax->getDescription(): "syntax undef";
552
553 }
getSyntaxEncodingHint()554 string CodeGenerator::getSyntaxEncodingHint()
555 {
556 return (currentSyntax)? currentSyntax->getEncodingHint(): "";
557
558 }
getThemeDescription()559 string CodeGenerator::getThemeDescription()
560 {
561 return docStyle.getDescription();
562 }
563
getSyntaxCatDescription()564 string CodeGenerator::getSyntaxCatDescription(){
565 return (currentSyntax)? currentSyntax->getCategoryDescription(): "";
566 }
567
getThemeCatDescription()568 string CodeGenerator::getThemeCatDescription()
569 {
570 return docStyle.getCategoryDescription();
571 }
572
getThemeContrast()573 float CodeGenerator::getThemeContrast()
574 {
575 return docStyle.getContrast();
576 }
577
getLineNumber()578 unsigned int CodeGenerator::getLineNumber()
579 {
580 return lineNumber;
581 }
582
readNewLine(string & newLine)583 bool CodeGenerator::readNewLine ( string &newLine )
584 {
585 bool eof=false;
586
587 if ( lineIndex ) terminatingChar=newLine[lineIndex-1];
588
589 while (!eof && startLineCntCurFile>0) {
590 if ( formattingPossible && formattingEnabled ) {
591 eof=!formatter->hasMoreLines();
592 if ( !eof ) {
593 newLine = formatter->nextLine();
594 }
595 } else {
596 eof = ! getline ( *in, newLine, eolDelimiter );
597 }
598 --startLineCntCurFile;
599 }
600 startLineCntCurFile=1;
601 #ifndef _WIN32
602 // drop CR of CRLF files
603 if (!newLine.empty() && newLine[newLine.size() - 1] == '\r')
604 newLine.erase(newLine.size() - 1);
605 #endif
606
607 return eof || ( lineNumber == maxLineCnt );
608 }
609
matchRegex(const string & line,State skipState)610 void CodeGenerator::matchRegex ( const string &line, State skipState)
611 {
612 regexGroups.clear();
613 int matchBegin=0;
614 int groupID=0;
615
616 // cycle through all regex, save the start and ending indices of matches to report them later
617 for ( unsigned int i=0; i<currentSyntax->getRegexElements().size(); i++ ) {
618 RegexElement *regexElem = currentSyntax->getRegexElements() [i];
619
620 if (regexElem->open == skipState) continue;
621
622 if (regexElem->constraintLineNum && regexElem->constraintLineNum != lineNumber) {
623 continue;
624 }
625
626 if (regexElem->constraintFilename.size() && regexElem->constraintFilename != inFile) {
627 continue;
628 }
629
630 boost::xpressive::sregex_iterator cur( line.begin(), line.end(), regexElem->rex );
631 boost::xpressive::sregex_iterator end;
632
633 for( ; cur != end; ++cur ) {
634 groupID = ( regexElem->capturingGroup<0 ) ? cur->size()-1 : regexElem->capturingGroup;
635 matchBegin = cur->position(groupID);
636
637 regexGroups.insert (
638 make_pair ( matchBegin + 1, RegexToken ( regexElem->open, cur->length(groupID), regexElem->kwClass, regexElem->langName ) ) );
639
640 // priority regex (match required)
641 if (regexElem->priority) {
642 return;
643 }
644 }
645 }
646 }
647
getInputChar()648 unsigned char CodeGenerator::getInputChar()
649 {
650 // end of line?
651 if ( lineIndex == line.length() ) {
652
653 //more testing required:
654 if (outputType==ESC_TRUECOLOR || outputType==ESC_XTERM256)
655 lastLineLength=StringTools::utf8_strlen(line + lsSyntaxErrorDesc);
656
657 bool eof=false;
658 if ( preFormatter.isEnabled() ) {
659 if ( !preFormatter.hasMoreLines() ) {
660 eof=readNewLine ( line );
661 preFormatter.setLine ( line );
662 ++lineNumber;
663 numberCurrentLine = true;
664 } else {
665 if (numberWrappedLines)
666 ++lineNumber;
667 numberCurrentLine = numberWrappedLines;
668 }
669
670 line = preFormatter.getNextLine();
671 } else {
672 eof=readNewLine ( line );
673 ++lineNumber;
674
675 numberCurrentLine = true;
676 }
677 lineIndex=0;
678
679 if (!lineContainedTestCase && applySyntaxTestCase){
680 stateTraceTest = stateTraceCurrent;
681 stateTraceCurrent.clear();
682 }
683
684 lineContainedTestCase=false;
685 lineContainedStmt=false;
686 matchRegex ( line );
687
688 return ( eof ) ?'\0':'\n';
689 }
690
691 return line[lineIndex++];
692 }
693
694 /** changing this method requires regression testing with nested syntax files (HTML+PHP+JS+CSS,
695 * Coffeescript with block regex, Pas + ASM)
696 * especially nested syntax in one line
697 */
getCurrentState(State oldState)698 State CodeGenerator::getCurrentState (State oldState)
699 {
700 unsigned char c='\0';
701
702 if ( token.length() ==0 ) {
703 c=getInputChar();
704 } else {
705 lineIndex-= ( token.length()-1 );
706 c=token[0];
707 }
708 if ( c=='\n' ) {
709 return _EOL; // End of line
710 }
711
712 if ( c=='\0' ) {
713 return _EOF; // End of file
714 }
715
716 if ( c==' ' || c=='\t' ) {
717 token= c;
718 return _WS; // White space
719 }
720
721 if ( applySyntaxTestCase && ( c=='^' || c=='<') && (oldState == ML_COMMENT || oldState==SL_COMMENT) ) {
722 token= c;
723 return _TESTPOS;
724 }
725
726 // at this position the syntax change takes place
727 if (lineIndex >= syntaxChangeIndex-1 || syntaxChangeLineNo < lineNumber){
728 loadEmbeddedLang(embedLangDefPath); // load new syntax
729 matchRegex(line); // recognize new patterns in the (remaining) line
730 syntaxChangeIndex = syntaxChangeLineNo = UINT_MAX;
731 }
732
733 SKIP_EMBEDDED:
734
735 if (lsCheckSyntaxErrors && LSPClient.errorExists(lineNumber, lineIndex)) {
736 highlight::SemanticToken errorToken = LSPClient.getError(lineNumber, lineIndex);
737 token = line.substr ( lineIndex-1, errorToken.length);
738 lineIndex += errorToken.length-1;
739 lsSyntaxErrorDesc = errorToken.id;
740
741 //std::cerr <<"error num "<<lineNumber<< " idx "<<lineIndex<< " error "<<errorToken.id<< "\n";
742 return SYNTAX_ERROR;
743 }
744
745 if (lsCheckSemanticTokens && LSPClient.tokenExists(lineNumber, lineIndex)) {
746 highlight::SemanticToken semToken = LSPClient.getToken(lineNumber, lineIndex);
747 int semStyleKwId = docStyle.getSemanticStyle(semToken.id);
748 if (semStyleKwId) {
749 token = line.substr ( lineIndex-1, semToken.length);
750 lineIndex += semToken.length-1;
751
752 currentKeywordClass = semStyleKwId + kwOffset; // +offset of missing kw groups in the theme
753 //std::cerr <<"l "<<lineNumber<< "t "<<token<< " semStyleKwId "<< semStyleKwId << " off "<<kwOffset<<" -> " << semToken.id <<"\n";
754 return KEYWORD;
755 }
756 }
757
758 // Test if a regular expression was found at the current position
759 if ( !regexGroups.empty() ) {
760 if ( regexGroups.count ( lineIndex ) ) {
761 token = line.substr ( lineIndex-1, regexGroups[lineIndex].length );
762
763 unsigned int oldIndex= lineIndex;
764 if ( regexGroups[oldIndex].length>1 ) lineIndex+= regexGroups[oldIndex].length-1;
765
766 if ( regexGroups[oldIndex].state==EMBEDDED_CODE_BEGIN ) {
767 //do not handle a nested section if the syntax is marked as "sealed"
768 if (embedLangDefPath.length()==0 || currentSyntax->allowsInnerSection(embedLangDefPath) ) {
769 embedLangDefPath = currentSyntax->getNewPath(regexGroups[oldIndex].name);
770 //remember position
771 syntaxChangeIndex = lineIndex+2;
772 syntaxChangeLineNo = lineNumber;
773 }
774
775 // repeat parsing of this line without nested state recognition to highlight opening delimiter in the host syntax
776 matchRegex(line, EMBEDDED_CODE_BEGIN);
777 lineIndex = oldIndex;
778 goto SKIP_EMBEDDED; // this is how it should be done
779 }
780
781 if ( regexGroups[oldIndex].state==IDENTIFIER_BEGIN || regexGroups[oldIndex].state==KEYWORD ) {
782 string reservedWord= ( currentSyntax->isIgnoreCase() ) ? StringTools::change_case ( token ) :token;
783 currentKeywordClass=currentSyntax->getKeywordListGroup ( reservedWord ); //check in lists (no regex)
784
785 if ( !currentKeywordClass && regexGroups[oldIndex].state==KEYWORD ){
786 currentKeywordClass = regexGroups[oldIndex].kwClass;
787 }
788 return validateState(( currentKeywordClass ) ? KEYWORD : STANDARD, oldState );
789 } else {
790 return validateState(regexGroups[oldIndex].state, oldState);
791 }
792 }
793 }
794
795 // Character not referring to any state
796 token = c;
797 return STANDARD;
798 }
799
validateState(State newState,State oldState)800 State CodeGenerator::validateState(State newState, State oldState)
801 {
802
803 if (currentSyntax->getValidateStateChangeFct()) {
804 Diluculum::LuaValueList params;
805 params.push_back(Diluculum::LuaValue(oldState));
806 params.push_back(Diluculum::LuaValue(newState));
807 params.push_back(Diluculum::LuaValue(token));
808 params.push_back(Diluculum::LuaValue(getCurrentKeywordClassId()) );
809 params.push_back(Diluculum::LuaValue(lineNumber) );
810 params.push_back(Diluculum::LuaValue(lineIndex-(unsigned int)token.length()) );
811
812 Diluculum::LuaValueList res=
813 currentSyntax->getLuaState()->call ( *currentSyntax->getValidateStateChangeFct(),
814 params,"getValidateStateChangeFct call") ;
815
816 resultOfHook = res.size()>=1;
817 if (resultOfHook) {
818
819 setOverrideParams();
820
821 State validatedState = (State)res[0].asInteger();
822 if ( validatedState== _REJECT) {
823
824 // proceed using only the first character of the token
825 if (res.size()==1) {
826 lineIndex -= (token.length() -1);
827 token=token.substr(0, 1);
828 }
829
830 //experimental for slim.lang: evaluate second return arg after _REJECT
831 if (res.size()>=2) {
832 lineIndex -= (token.length() );
833 token.clear();
834 return (State)res[1].asInteger();
835 }
836 return oldState;
837 }
838
839 return validatedState;
840 }
841 }
842 resultOfHook = false;
843
844 return newState;
845 }
846
getCurrentKeywordClassId()847 unsigned int CodeGenerator::getCurrentKeywordClassId(){
848 unsigned int kwClassId=0;
849
850 // this vector contains the defined keyword classes, and currentKeywordClass is its index:
851 vector<string> kwClasses=currentSyntax->getKeywordClasses();
852
853 if (currentKeywordClass && currentKeywordClass<=kwClasses.size()) {
854 string kwClassName=kwClasses[currentKeywordClass-1];
855 if (kwClassName.size()==3)
856 kwClassId = kwClassName[2] - 'a' + 1;
857 }
858 return kwClassId;
859 }
860
861 //it is faster to pass ostream reference
maskString(ostream & ss,const string & s)862 void CodeGenerator::maskString ( ostream& ss, const string & s )
863 {
864 string escHoverText;
865
866 if (lsEnableHoverRequests && (currentState==STANDARD || currentState==NUMBER || currentState==KEYWORD)) {
867
868 string hoverText = LSPClient.runHover(lsDocumentPath, lineIndex - s.size(), lineNumber-1);
869
870 for(const auto &c : hoverText)
871 {
872 if (isascii(c))
873 escHoverText.append(maskCharacter(c));
874 }
875 }
876
877 if (escHoverText.size()) {
878 ss << getHoverTagOpen(escHoverText);
879 }
880
881 for (const auto &c : s)
882 {
883 ss << maskCharacter ( c );
884 }
885
886 if (escHoverText.size()) {
887 ss << getHoverTagClose();
888 }
889
890 // The test markers position should also be deternmined by calculating the code points
891 if ( applySyntaxTestCase ) {
892
893 PositionState ps(currentState, getCurrentKeywordClassId(), false);
894
895 //TODO avoid repeated string comparison:
896 int slen = encoding=="utf-8" ? StringTools::utf8_strlen(s) : s.length();
897 for (int i=0; i< slen; i++ ) {
898 stateTraceCurrent.push_back(ps);
899 }
900 if (stateTraceCurrent.size()>200)
901 stateTraceCurrent.erase(stateTraceCurrent.begin(), stateTraceCurrent.begin() + 100 );
902 }
903 }
904
printSyntaxError(ostream & ss)905 void CodeGenerator::printSyntaxError ( ostream& ss ) {
906 if ( !lsSyntaxErrorDesc.empty()) {
907 ss << openTags[ highlight::SYNTAX_ERROR_MSG ];
908
909 for(const auto &c : lsSyntaxErrorDesc)
910 {
911 ss << maskCharacter ( c );
912 }
913
914 ss << closeTags[ highlight::SYNTAX_ERROR_MSG ];
915 lsSyntaxErrorDesc.clear();
916 }
917 }
918
callDecorateFct(const string & token)919 Diluculum::LuaValueList CodeGenerator::callDecorateFct(const string& token)
920 {
921
922 Diluculum::LuaValueList params;
923 params.push_back(Diluculum::LuaValue(token));
924 params.push_back(Diluculum::LuaValue(currentState));
925 params.push_back(Diluculum::LuaValue(currentKeywordClass));
926 params.push_back(Diluculum::LuaValue(lineContainedStmt));
927 params.push_back(Diluculum::LuaValue(lineNumber) );
928 params.push_back(Diluculum::LuaValue(lineIndex-(unsigned int)token.length()) );
929
930 return currentSyntax->getLuaState()->call ( *currentSyntax->getDecorateFct(),
931 params,"getDecorateFct call") ;
932 }
933
printMaskedToken(bool flushWhiteSpace,StringTools::KeywordCase tcase)934 void CodeGenerator::printMaskedToken (bool flushWhiteSpace, StringTools::KeywordCase tcase )
935 {
936 if ( flushWhiteSpace )
937 flushWs(1);
938 string caseToken = StringTools::change_case ( token, tcase );
939 if (currentSyntax->getDecorateFct()) {
940
941 Diluculum::LuaValueList res=callDecorateFct(caseToken);
942 if (res.size()==1) {
943 *out<<res[0].asString();
944 } else {
945 maskString ( *out, caseToken );
946 }
947 } else {
948 maskString ( *out, caseToken );
949 }
950
951 // check this *after* the decorate call
952 if ( currentState == STANDARD || currentState == KEYWORD || currentState == NUMBER
953 || currentState == STRING || currentState == IDENTIFIER_BEGIN) {
954 lineContainedStmt = true;
955 }
956 token.clear();
957 }
958
styleFound()959 bool CodeGenerator::styleFound()
960 {
961 return docStyle.found();
962 }
963
printIndexFile(const vector<string> & fileList,const string & outPath)964 bool CodeGenerator::printIndexFile ( const vector<string> &fileList, const string &outPath )
965 {
966 return true;
967 }
968
initIndentationScheme(const string & indentScheme)969 bool CodeGenerator::initIndentationScheme ( const string &indentScheme )
970 {
971
972 if ( formatter!=NULL ) {
973 return true;
974 }
975
976 if ( !indentScheme.size() ) return false;
977
978 formatter=new astyle::ASFormatter();
979
980 if ( indentScheme=="allman" || indentScheme=="bsd" || indentScheme=="ansi" ) {
981 formatter->setFormattingStyle ( astyle::STYLE_ALLMAN );
982 } else if ( indentScheme=="kr"||indentScheme=="k&r"||indentScheme=="k/r" ) {
983 formatter->setFormattingStyle ( astyle::STYLE_KR );
984 } else if ( indentScheme=="java" ) {
985 formatter->setFormattingStyle ( astyle::STYLE_JAVA );
986 } else if ( indentScheme=="stroustrup" ) {
987 formatter->setFormattingStyle ( astyle::STYLE_STROUSTRUP );
988 } else if ( indentScheme=="whitesmith" ) {
989 formatter->setFormattingStyle ( astyle::STYLE_WHITESMITH );
990 } else if ( indentScheme=="banner" || indentScheme=="ratliff") {
991 formatter->setFormattingStyle ( astyle::STYLE_RATLIFF );
992 } else if ( indentScheme=="gnu" ) {
993 formatter->setFormattingStyle ( astyle::STYLE_GNU );
994 } else if ( indentScheme=="linux" ) {
995 formatter->setFormattingStyle ( astyle::STYLE_LINUX );
996 } else if ( indentScheme=="horstmann" ) {
997 formatter->setFormattingStyle ( astyle::STYLE_HORSTMANN );
998 } else if ( indentScheme=="otbs" || indentScheme=="1tbs") {
999 formatter->setFormattingStyle ( astyle::STYLE_1TBS );
1000 } else if ( indentScheme=="google") {
1001 formatter->setFormattingStyle ( astyle::STYLE_GOOGLE );
1002 } else if ( indentScheme=="pico" || indentScheme=="a11") {
1003 formatter->setFormattingStyle ( astyle::STYLE_PICO );
1004 } else if ( indentScheme=="lisp" || indentScheme=="python"|| indentScheme=="a12") {
1005 formatter->setFormattingStyle ( astyle::STYLE_LISP );
1006 } else if ( indentScheme=="vtk") {
1007 formatter->setFormattingStyle ( astyle::STYLE_VTK );
1008 } else if ( indentScheme=="mozilla") {
1009 formatter->setFormattingStyle ( astyle::STYLE_MOZILLA );
1010 } else if ( indentScheme=="webkit") {
1011 formatter->setFormattingStyle ( astyle::STYLE_WEBKIT );
1012 } else if ( indentScheme!="user" ){
1013 return false;
1014 }
1015 return formattingEnabled=true;
1016 }
1017
loadLanguage(const string & langDefPath,bool embedded)1018 LoadResult CodeGenerator::loadLanguage ( const string& langDefPath, bool embedded )
1019 {
1020
1021 if (!embedded) {
1022 while (!nestedLangs.empty()) {
1023 nestedLangs.pop();
1024 }
1025 }
1026
1027 bool reloadNecessary= currentSyntax ? currentSyntax->needsReload ( langDefPath ): true;
1028 LoadResult result=LOAD_OK;
1029 if ( reloadNecessary ) {
1030 if (syntaxReaders.count(langDefPath)) {
1031 currentSyntax=syntaxReaders[langDefPath];
1032 result=LOAD_OK;
1033 } else {
1034
1035 currentSyntax=new SyntaxReader();
1036 result=currentSyntax->load(langDefPath, pluginParameter, outputType);
1037 syntaxReaders[langDefPath]=currentSyntax;
1038 }
1039
1040 if ( result==LOAD_OK ) {
1041 formattingPossible=currentSyntax->enableReformatting();
1042 updateKeywordClasses();
1043 }
1044 }
1045
1046 kwOffset=currentSyntax->getKeywordCount() - docStyle.getKeywordStyleCount();
1047
1048 return result;
1049 }
1050
validateInputStream()1051 bool CodeGenerator::validateInputStream()
1052 {
1053 if ( !in ) return false;
1054
1055 // it is not possible to move stream pointer back with stdin
1056 if ( ( int ) in->tellg() == -1 ) // -1 : stdin
1057 return true;
1058
1059 // Sources: http://en.wikipedia.org/wiki/Magic_number_(programming)
1060 // Magic configuration of "file"
1061 // This is intended for web plugins - only check filetypes often found in the net
1062 char magic_gif[] = {'G','I','F','8', 0};
1063 char magic_png[] = {'\x89','P','N','G', 0};
1064 char magic_java[] = {'\xCA','\xFE','\xBA','\xBE', 0};
1065 char magic_jpeg[] = {'\xFF','\xD8','\xFF', 0};
1066 char magic_bmp[] = {'B','M', 0};
1067 char magic_pdf[] = {'%','P','D','F', 0};
1068 char magic_utf8[] = {'\xEF','\xBB','\xBF',0};
1069 char magic_rar[] = {'R','a','r','!', 0};
1070 char magic_zip[] = {'P','K','\x03','\x04', 0};
1071 char magic_ace[] = {'*','*','A','C','E','*','*', 0};
1072 char magic_tgz[] = {'\x8b','\x1f', '\x00', '\x08', 0};
1073 char magic_bzip[] = {'B','Z', 0};
1074
1075 char* magic_table[] = {magic_utf8,
1076 magic_gif, magic_png, magic_jpeg, magic_bmp, magic_pdf,
1077 magic_java,
1078 magic_rar, magic_zip, magic_ace, magic_tgz, magic_bzip,
1079 0
1080 };
1081
1082 char buffer [10]= {0};
1083 in->read ( buffer,8 ); //only read the first 8 bytes of input stream
1084
1085 int magic_index=0;
1086 while ( magic_table[magic_index] ) {
1087 if ( !strncmp ( buffer, magic_table[magic_index], strlen ( magic_table[magic_index] ) ) ) {
1088 break;
1089 }
1090 magic_index++;
1091 }
1092 int streamReadPos=0;
1093 if ( magic_table[magic_index] == magic_utf8 ) {
1094 //setEncoding("utf-8");
1095 streamReadPos=3; // remove UTF-8 magic number from output
1096 }
1097
1098 in -> seekg ( streamReadPos, ios::beg );
1099 in-> clear(); // clear fail bit to continue reading
1100
1101 return !magic_table[magic_index] // points to 0 if no pattern was found
1102 || magic_table[magic_index] == magic_utf8;
1103 }
1104
applyPluginChunk(const string & fctName,string * result,bool * keepDefault)1105 void CodeGenerator::applyPluginChunk(const string& fctName, string *result, bool *keepDefault) {
1106
1107 if (currentSyntax && pluginChunks.size()) {
1108
1109 Diluculum::LuaState luaState;
1110
1111 Diluculum::LuaValueList chunkParams;
1112 chunkParams.push_back(currentSyntax->getDescription());
1113 for (unsigned int i=0; i<pluginChunks.size(); i++) {
1114 luaState.call(*pluginChunks[i], chunkParams, "format user function");
1115 }
1116
1117 if (luaState.globals().count(fctName)) {
1118 Diluculum::LuaFunction* documentFct=new Diluculum::LuaFunction(luaState[fctName].value().asFunction());
1119
1120 luaState["HL_PLUGIN_PARAM"] = pluginParameter;
1121 luaState["HL_OUTPUT"] = outputType;
1122 luaState["HL_FORMAT_HTML"]=HTML;
1123 luaState["HL_FORMAT_XHTML"]=XHTML;
1124 luaState["HL_FORMAT_TEX"]=TEX;
1125 luaState["HL_FORMAT_LATEX"]=LATEX;
1126 luaState["HL_FORMAT_RTF"]=RTF;
1127 luaState["HL_FORMAT_ANSI"]=ESC_ANSI;
1128 luaState["HL_FORMAT_XTERM256"]=ESC_XTERM256;
1129 luaState["HL_FORMAT_TRUECOLOR"]=ESC_TRUECOLOR;
1130 luaState["HL_FORMAT_SVG"]=SVG;
1131 luaState["HL_FORMAT_BBCODE"]=BBCODE;
1132 luaState["HL_FORMAT_PANGO"]=PANGO;
1133 luaState["HL_FORMAT_ODT"]=ODTFLAT;
1134
1135 Diluculum::LuaValueList params;
1136 Diluculum::LuaValueMap options;
1137 options[Diluculum::LuaValue("title")] = Diluculum::LuaValue( docTitle );
1138 options[Diluculum::LuaValue("encoding")] = Diluculum::LuaValue(encoding);
1139 options[Diluculum::LuaValue("fragment")] = Diluculum::LuaValue(fragmentOutput);
1140 options[Diluculum::LuaValue("font")] = Diluculum::LuaValue(getBaseFont());
1141 options[Diluculum::LuaValue("fontsize")] = Diluculum::LuaValue(getBaseFontSize());
1142
1143 params.push_back(inputFilesCnt);
1144 params.push_back(processedFilesCnt);
1145 params.push_back(options);
1146
1147 Diluculum::LuaValueList res=luaState.call ( *documentFct, params, fctName+" call");
1148 if (res.size()>=1) {
1149 *keepDefault=false;
1150 *result = res[0].asString();
1151 if (res.size()==2)
1152 *keepDefault = res[1].asBoolean();
1153 }
1154 delete documentFct;
1155 }
1156 }
1157 }
1158
printHeader()1159 void CodeGenerator::printHeader()
1160 {
1161 bool keepDefaultHeader=true;
1162 string pluginHeader;
1163
1164 processedFilesCnt++;
1165
1166 applyPluginChunk("DocumentHeader", &pluginHeader, &keepDefaultHeader);
1167
1168 if ( ! fragmentOutput && keepDefaultHeader)
1169 *out << getHeader();
1170
1171 *out << pluginHeader;
1172
1173 if ( !fragmentOutput || keepInjections)
1174 *out << currentSyntax->getHeaderInjection();
1175 }
1176
printFooter()1177 void CodeGenerator::printFooter()
1178 {
1179
1180 bool keepDefaultFooter=true;
1181 string pluginFooter;
1182
1183 applyPluginChunk("DocumentFooter", &pluginFooter, &keepDefaultFooter);
1184
1185 if ( !fragmentOutput || keepInjections)
1186 *out << currentSyntax->getFooterInjection();
1187
1188 *out << pluginFooter;
1189
1190 if ( ! fragmentOutput && keepDefaultFooter )
1191 *out << getFooter();
1192 }
1193
generateFile(const string & inFileName,const string & outFileName)1194 ParseError CodeGenerator::generateFile ( const string &inFileName,
1195 const string &outFileName )
1196 {
1197 if ( !docStyle.found() ) {
1198 return BAD_STYLE;
1199 }
1200
1201 reset();
1202
1203 ParseError error=PARSE_OK;
1204
1205 inFile=inFileName;
1206 outFile=outFileName;
1207
1208 in = ( inFileName.empty() ? &cin :new ifstream ( inFileName.c_str() ) );
1209
1210 if ( validateInput )
1211 if ( !validateInputStream() ) error= BAD_INPUT;
1212
1213 if ( !in->fail() && error==PARSE_OK ) {
1214 out = ( outFileName.empty() ? &cout :new ofstream ( outFileName.c_str() ) );
1215 if ( out->fail() ) {
1216 error=BAD_OUTPUT;
1217 }
1218 }
1219
1220 if ( in->fail() ) {
1221 error=BAD_INPUT;
1222 }
1223
1224 if ( error==PARSE_OK ) {
1225 initASStream();
1226 currentSyntax->setInputFileName(inFile);
1227 printHeader();
1228 printBody();
1229 printFooter();
1230 }
1231
1232 if ( !outFileName.empty() ) {
1233 delete out;
1234 out=NULL;
1235 }
1236 if ( !inFileName.empty() ) {
1237 delete in;
1238 in=NULL;
1239 }
1240 return error;
1241 }
1242
generateString(const string & input)1243 string CodeGenerator::generateString ( const string &input )
1244 {
1245
1246 if ( !docStyle.found() ) {
1247 return "";
1248 }
1249
1250 reset();
1251
1252 in = new istringstream ( input );
1253 out = new ostringstream ();
1254
1255 if ( in->fail() || out->fail() ) {
1256 return "";
1257 }
1258
1259 initASStream();
1260
1261 printHeader();
1262 printBody();
1263 printFooter();
1264
1265 string result = static_cast<ostringstream*> ( out )->str();
1266
1267 delete out;
1268 out=NULL;
1269 delete in;
1270 in=NULL;
1271
1272 return result;
1273 }
1274
initASStream()1275 void CodeGenerator::initASStream() {
1276 if ( formatter != NULL ) {
1277 if (streamIterator) delete streamIterator;
1278 streamIterator = new astyle::ASStreamIterator ( in );
1279 formatter->init ( streamIterator );
1280 }
1281 }
1282
generateStringFromFile(const string & inFileName)1283 string CodeGenerator::generateStringFromFile ( const string &inFileName )
1284 {
1285
1286 if ( !docStyle.found() ) {
1287 return "";
1288 }
1289
1290 reset();
1291
1292 inFile = inFileName;
1293
1294 in = new ifstream ( inFileName.c_str() );
1295 out = new ostringstream ();
1296
1297 if ( in->fail() || out->fail() ) {
1298 return "";
1299 }
1300
1301 if ( validateInput && !validateInputStream() ) {
1302 return "ERROR: detected binary input";
1303 }
1304
1305 initASStream();
1306
1307 currentSyntax->setInputFileName(inFile);
1308
1309 printHeader();
1310 printBody();
1311 printFooter();
1312
1313 string result = static_cast<ostringstream*> ( out )->str();
1314
1315 delete out;
1316 out=NULL;
1317 delete in;
1318 in=NULL;
1319
1320 return result;
1321 }
1322
getStyleID(State s,unsigned int kwClassID)1323 unsigned int CodeGenerator::getStyleID ( State s, unsigned int kwClassID )
1324 {
1325 if ( s==KEYWORD && kwClassID ) {
1326 return NUMBER_BUILTIN_STATES + kwClassID-1;
1327 }
1328 return ( unsigned int ) s ;
1329 }
1330
openTag(State s)1331 void CodeGenerator::openTag ( State s )
1332 {
1333 *out << openTags[ ( unsigned int ) s];
1334 currentState=s;
1335 }
1336
closeTag(State s)1337 void CodeGenerator::closeTag ( State s )
1338 {
1339 *out << closeTags[ ( unsigned int ) s];
1340 flushWs(2);
1341 currentState=_UNKNOWN;
1342 }
1343
openKWTag(unsigned int kwClassID)1344 void CodeGenerator::openKWTag ( unsigned int kwClassID )
1345 {
1346 *out << openTags.at(getStyleID ( KEYWORD, kwClassID ) );
1347 currentState=KEYWORD;
1348 }
1349
closeKWTag(unsigned int kwClassID)1350 void CodeGenerator::closeKWTag ( unsigned int kwClassID )
1351 {
1352 *out << closeTags.at(getStyleID ( KEYWORD, kwClassID ) );
1353 flushWs(3);
1354 currentState=_UNKNOWN;
1355 }
1356
loadEmbeddedLang(const string & embedLangDefPath)1357 bool CodeGenerator::loadEmbeddedLang(const string&embedLangDefPath)
1358 {
1359 if (nestedLangs.empty()) {
1360 nestedLangs.push(currentSyntax->getCurrentPath() );
1361 }
1362 if (nestedLangs.top() != embedLangDefPath) {
1363 nestedLangs.push(embedLangDefPath);
1364 }
1365 LoadResult res = loadLanguage(embedLangDefPath, true);
1366 //pass end delimiter regex to syntax description
1367 currentSyntax->restoreLangEndDelim(embedLangDefPath);
1368 return res == LOAD_OK;
1369 }
1370
1371 ///////////////////////////////////////////////////////////////////////////////
1372
processRootState()1373 void CodeGenerator::processRootState()
1374 {
1375 bool eof=false,
1376 firstLine=true; // avoid newline before printing the first output line
1377
1378 applySyntaxTestCase = inFile.find("syntax_test_")!=string::npos;
1379
1380 if ( currentSyntax->highlightingDisabled() ) {
1381 string line;
1382 while ( getline ( *in, line ) && lineNumber < maxLineCnt ) {
1383 ++lineNumber;
1384 insertLineNumber ( !firstLine );
1385 flushWs(4);
1386 firstLine=false;
1387 if (lineNumber>=startLineCntCurFile && lineNumber <=maxLineCnt)
1388 maskString ( *out, line );
1389 }
1390 *out << flush;
1391 return;
1392 }
1393
1394 State state=STANDARD;
1395 openTag ( STANDARD );
1396
1397 do {
1398 // determine next state
1399 state= getCurrentState(STANDARD);
1400
1401 // handle current state
1402 switch ( state ) {
1403 case KEYWORD:
1404 closeTag ( STANDARD );
1405 eof=processKeywordState ( state );
1406 openTag ( STANDARD );
1407 break;
1408 case NUMBER:
1409 closeTag ( STANDARD );
1410 eof=processNumberState();
1411 openTag ( STANDARD );
1412 break;
1413 case ML_COMMENT:
1414 closeTag ( STANDARD );
1415 eof=processMultiLineCommentState();
1416 openTag ( STANDARD );
1417 break;
1418 case SL_COMMENT:
1419 closeTag ( STANDARD );
1420 eof=processSingleLineCommentState();
1421 openTag ( STANDARD );
1422 break;
1423 case STRING:
1424 closeTag ( STANDARD );
1425 eof=processStringState ( STANDARD );
1426 openTag ( STANDARD );
1427 break;
1428 case DIRECTIVE:
1429 closeTag ( STANDARD );
1430 eof=processDirectiveState();
1431 openTag ( STANDARD );
1432 break;
1433 case ESC_CHAR:
1434 closeTag ( STANDARD );
1435 eof=processEscapeCharState();
1436 openTag ( STANDARD );
1437 break;
1438 case SYMBOL:
1439 closeTag ( STANDARD );
1440 eof=processSymbolState();
1441 openTag ( STANDARD );
1442 break;
1443 case EMBEDDED_CODE_END:
1444 closeTag ( STANDARD );
1445 eof=processSyntaxChangeState(state);
1446 openTag ( STANDARD );
1447 break;
1448 case SYNTAX_ERROR:
1449 closeTag ( STANDARD );
1450 eof=processSyntaxErrorState();
1451 openTag ( STANDARD );
1452 break;
1453
1454 case _EOL:
1455 // XTERM256 fix (issue with less cmd)
1456 if (!firstLine || showLineNumbers) {
1457 closeTag ( STANDARD );
1458 }
1459 insertLineNumber(!firstLine);
1460 if (!firstLine || showLineNumbers) {
1461 flushWs(5);
1462 stateTraceCurrent.clear();
1463 openTag ( STANDARD );
1464 }
1465 firstLine=false;
1466 break;
1467 case _EOF:
1468 eof=true;
1469 break;
1470 case _WS:
1471 processWsState();
1472 break;
1473 default:
1474 printMaskedToken();
1475 break;
1476 }
1477 } while ( !eof );
1478
1479 if (token.size() || lineNumber>1 || (outputType!=ESC_TRUECOLOR && outputType!=ESC_XTERM256))
1480 closeTag ( STANDARD );
1481
1482 if (currentSyntax->getDecorateLineEndFct()) {
1483 Diluculum::LuaValueList res=callDecorateLineFct(false);
1484 if (res.size()==1) {
1485 *out << res[0].asString();
1486 }
1487 }
1488
1489 printNewLines = noTrailingNewLine==0 || ( noTrailingNewLine==2 && ( token.size() || lineNumber>1) );
1490 *out << getNewLine();
1491 *out << flush;
1492 }
1493
processSyntaxChangeState(State myState)1494 bool CodeGenerator::processSyntaxChangeState(State myState)
1495 {
1496 State newState=STANDARD;
1497 bool eof=false,
1498 exitState=false;
1499
1500 openTag ( KEYWORD );
1501 do {
1502
1503 if (myState==EMBEDDED_CODE_END) {
1504 if (!nestedLangs.empty()) {
1505 nestedLangs.pop();
1506 }
1507 // load host language syntax
1508 if (!nestedLangs.empty()) {
1509 loadLanguage(nestedLangs.top(), true);
1510 }
1511 matchRegex(line, EMBEDDED_CODE_BEGIN); // match remaining line using the host syntax
1512 }
1513
1514 printMaskedToken ( newState!=_WS );
1515
1516 newState= getCurrentState(myState);
1517
1518 switch ( newState ) {
1519 case _WS:
1520 processWsState();
1521 break;
1522 case _EOL:
1523 insertLineNumber();
1524 exitState=true;
1525 break;
1526 case _EOF:
1527 eof = true;
1528 break;
1529 default:
1530 exitState=true;
1531 break;
1532 }
1533 } while ( !exitState && !eof );
1534 closeTag ( KEYWORD );
1535
1536 return eof;
1537 }
1538
1539
processKeywordState(State myState)1540 bool CodeGenerator::processKeywordState ( State myState )
1541 {
1542 State newState=STANDARD;
1543 unsigned int myClassID=currentKeywordClass;
1544 bool eof=false,
1545 exitState=false;
1546
1547 openKWTag ( myClassID );
1548 do {
1549 printMaskedToken ( newState!=_WS,
1550 ( currentSyntax->isIgnoreCase() ) ? keywordCase : StringTools::CASE_UNCHANGED );
1551 newState= getCurrentState(myState);
1552 switch ( newState ) {
1553 case _WS:
1554 processWsState();
1555 exitState=isolateTags;
1556 break;
1557 case _EOL:
1558 insertLineNumber();
1559 exitState=true;
1560
1561 break;
1562 case _EOF:
1563 eof = true;
1564 break;
1565 case KEYWORD_END:
1566 exitState=true;
1567 break;
1568 default:
1569 exitState= ( myClassID!=currentKeywordClass ) || ( myState!=newState );
1570 break;
1571 }
1572 } while ( !exitState && !eof );
1573
1574 closeKWTag ( myClassID );
1575
1576 currentKeywordClass=0;
1577 return eof;
1578 }
1579
processNumberState()1580 bool CodeGenerator::processNumberState()
1581 {
1582 State newState=STANDARD;
1583 bool eof=false,
1584 exitState=false;
1585 openTag ( NUMBER );
1586 do {
1587 printMaskedToken ( newState!=_WS );
1588 newState= getCurrentState(NUMBER);
1589 switch ( newState ) {
1590 case _WS:
1591 processWsState();
1592 exitState=isolateTags;
1593 break;
1594 case _EOL:
1595 insertLineNumber();
1596 exitState=true;
1597 break;
1598 case _EOF:
1599 eof = true;
1600 break;
1601 default:
1602 exitState=newState!=NUMBER;
1603 break;
1604 }
1605 } while ( !exitState && !eof );
1606
1607 closeTag ( NUMBER );
1608 return eof;
1609 }
1610
1611
processMultiLineCommentState()1612 bool CodeGenerator::processMultiLineCommentState()
1613 {
1614 int commentCount=1;
1615 int openDelimID=currentSyntax->getOpenDelimiterID ( token, ML_COMMENT);
1616 State newState=STANDARD;
1617 bool eof=false, exitState=false, containedTestCase=false;
1618 unsigned int startColumn=lineIndex - token.size() ;
1619 openTag ( ML_COMMENT );
1620 do {
1621 printMaskedToken (newState!=_WS );
1622 newState= getCurrentState(ML_COMMENT);
1623
1624 switch ( newState ) {
1625 case _WS:
1626 processWsState();
1627 break;
1628 case _EOL:
1629 wsBuffer += closeTags[ML_COMMENT];
1630 insertLineNumber();
1631 wsBuffer += openTags[ML_COMMENT];
1632 startColumn=0;
1633 break;
1634 case _EOF:
1635 eof = true;
1636 break;
1637 case _TESTPOS:
1638 runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
1639 printMaskedToken();
1640 containedTestCase=true;
1641 break;
1642 case ML_COMMENT:
1643
1644 if ( currentSyntax->allowNestedMLComments() ) {
1645 ++commentCount;
1646 }
1647 // if delimiters are equal, close the comment by continuing to
1648 // ML_COMMENT_END section
1649 if (currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, ML_COMMENT ))) break;
1650
1651 case ML_COMMENT_END:
1652
1653 if (!currentSyntax->matchesOpenDelimiter (token, ML_COMMENT_END, openDelimID)) {
1654 break;
1655 }
1656 commentCount--;
1657 if ( !commentCount ) {
1658 printMaskedToken();
1659 exitState=true;
1660 }
1661 break;
1662 default:
1663 break;
1664 }
1665 } while ( !exitState && !eof );
1666
1667 closeTag ( ML_COMMENT );
1668
1669 if (containedTestCase){
1670 stateTraceCurrent.clear();
1671 }
1672 return eof;
1673 }
1674
1675
processSingleLineCommentState()1676 bool CodeGenerator::processSingleLineCommentState()
1677 {
1678 State newState=STANDARD;
1679 bool eof=false, exitState=false, containedTestCase=false;
1680 unsigned int startColumn = lineIndex - token.size() ;
1681
1682 openTag ( SL_COMMENT );
1683 do {
1684 printMaskedToken ( newState!=_WS );
1685 newState= getCurrentState(SL_COMMENT);
1686
1687 switch ( newState ) {
1688 case _WS:
1689 processWsState();
1690 break;
1691 case _EOL:
1692 printMaskedToken();
1693 if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
1694 exitState=false;
1695 } else {
1696 exitState=true;
1697 }
1698 if ( !exitState ) wsBuffer += closeTags[SL_COMMENT];
1699 insertLineNumber();
1700 if ( !exitState ) wsBuffer += openTags[SL_COMMENT];
1701
1702 break;
1703 case _EOF:
1704 eof = true;
1705 break;
1706 case _TESTPOS:
1707 runSyntaxTestcases(token=="<" ? startColumn : lineIndex - 1 );
1708 printMaskedToken();
1709 containedTestCase=true;
1710 break;
1711
1712 default:
1713 break;
1714 }
1715 } while ( !exitState && !eof );
1716
1717 closeTag ( SL_COMMENT );
1718
1719 if (containedTestCase) {
1720 stateTraceCurrent.clear();
1721 }
1722
1723 return eof;
1724 }
1725
processDirectiveState()1726 bool CodeGenerator::processDirectiveState()
1727 {
1728 State newState=STANDARD;
1729 bool eof=false, exitState=false;
1730
1731 openTag ( DIRECTIVE );
1732 do {
1733 printMaskedToken ( newState!=_WS );
1734 newState= getCurrentState(DIRECTIVE);
1735 switch ( newState ) {
1736 case _WS:
1737 processWsState();
1738 break;
1739 case DIRECTIVE_END:
1740 printMaskedToken();
1741 exitState=true;
1742 break;
1743 case _EOL:
1744 printMaskedToken();
1745
1746 if ( preFormatter.isEnabled() && preFormatter.isWrappedLine ( lineNumber-1 ) ) {
1747 exitState=false;
1748 } else {
1749 if (currentSyntax->getContinuationChar()!=0x13){
1750 exitState= ( terminatingChar!=currentSyntax->getContinuationChar() );
1751 }
1752 }
1753 if ( !exitState ) wsBuffer += closeTags[DIRECTIVE];
1754 insertLineNumber();
1755 if ( !exitState ) wsBuffer += openTags[DIRECTIVE];
1756 break;
1757 case ML_COMMENT:
1758 closeTag ( DIRECTIVE );
1759 eof= processMultiLineCommentState();
1760 openTag ( DIRECTIVE );
1761 break;
1762 case SL_COMMENT:
1763 closeTag ( DIRECTIVE );
1764 eof= processSingleLineCommentState();
1765 openTag ( DIRECTIVE );
1766 exitState=true;
1767 break;
1768 case STRING:
1769 closeTag ( DIRECTIVE );
1770 eof=processStringState ( DIRECTIVE );
1771 openTag ( DIRECTIVE );
1772 break;
1773 case _EOF:
1774 eof = true;
1775 break;
1776 default:
1777 break;
1778 }
1779 } while ( !exitState && !eof );
1780
1781 closeTag ( DIRECTIVE );
1782 return eof;
1783 }
1784
processStringState(State oldState)1785 bool CodeGenerator::processStringState ( State oldState )
1786 {
1787 State newState=STANDARD;
1788 bool eof=false, exitState=false;
1789 bool returnedFromOtherState=false;
1790
1791 State myState= ( oldState==DIRECTIVE ) ? DIRECTIVE_STRING : STRING;
1792
1793 int openDelimID=currentSyntax->getOpenDelimiterID ( token, myState);
1794 string openDelim=token;
1795
1796 //Raw String by definition:
1797 bool isRawString=currentSyntax->delimiterIsRawString(openDelimID) || toggleDynRawString;
1798
1799 // Test if character before string open delimiter token equals to the
1800 // raw string prefix (Example: r" ", r""" """ in Python)
1801
1802 //Raw String Prefix:
1803 if ( lineIndex>token.length() &&line[lineIndex-token.length()-1]==currentSyntax->getRawStringPrefix() ) {
1804 isRawString=true;
1805 }
1806
1807 openTag ( myState );
1808 do {
1809 // true if last token was an escape char
1810 if ( !returnedFromOtherState ) {
1811 printMaskedToken (newState!=_WS );
1812 }
1813 returnedFromOtherState=false;
1814 newState= getCurrentState(myState);
1815
1816 switch ( newState ) {
1817 case _WS:
1818 processWsState();
1819 break;
1820 case _EOL:
1821 wsBuffer += closeTags[myState];
1822 insertLineNumber();
1823 wsBuffer += openTags[myState];
1824 break;
1825 case STRING_END:
1826 if (resultOfHook || currentSyntax->matchesOpenDelimiter (token, STRING_END, openDelimID)) {
1827 if (currentSyntax->assertDelimEqualLength()) {
1828 exitState= openDelim.length()==token.length();
1829 } else {
1830 exitState= true;
1831 }
1832 printMaskedToken();
1833 }
1834 break;
1835 case STRING:
1836 // if there exist multiple string delimiters, close string if
1837 // current delimiter is equal to the opening delimiter
1838 exitState=currentSyntax->delimiterIsDistinct(currentSyntax->getOpenDelimiterID ( token, STRING )) && token==openDelim;
1839 printMaskedToken();
1840 break;
1841 case ESC_CHAR:
1842 if ( !isRawString ) {
1843 closeTag ( myState );
1844 eof=processEscapeCharState();
1845 openTag ( myState );
1846 returnedFromOtherState=true;
1847 } else {
1848 // FIXME not a fix for Python r"""\"""
1849 exitState=token.size()>1 && token[1] == openDelim[0];
1850 printMaskedToken();
1851 }
1852 break;
1853 case STRING_INTERPOLATION:
1854 closeTag ( myState );
1855 eof=processInterpolationState();
1856 openTag ( myState );
1857 returnedFromOtherState=true;
1858 break;
1859
1860 case _EOF:
1861 eof = true;
1862 break;
1863 default:
1864 printMaskedToken();
1865 break;
1866 }
1867 } while ( !exitState && !eof );
1868
1869 closeTag ( myState );
1870
1871 toggleDynRawString = false;
1872
1873 return eof;
1874 }
1875
processSymbolState()1876 bool CodeGenerator::processSymbolState()
1877 {
1878 State newState=STANDARD;
1879 bool eof=false,
1880 exitState=false;
1881
1882 openTag ( SYMBOL );
1883 do {
1884 printMaskedToken ( newState!=_WS );
1885 newState= getCurrentState(SYMBOL);
1886 switch ( newState ) {
1887 case _WS:
1888 processWsState();
1889 exitState=isolateTags;
1890 break;
1891 case _EOL:
1892 insertLineNumber();
1893 exitState=true;
1894 break;
1895 case _EOF:
1896 eof = true;
1897 break;
1898 default:
1899 exitState=newState!=SYMBOL;
1900 break;
1901 }
1902 } while ( !exitState && !eof );
1903
1904 closeTag ( SYMBOL );
1905 return eof;
1906 }
1907
processSyntaxErrorState()1908 bool CodeGenerator::processSyntaxErrorState()
1909 {
1910 State newState=STANDARD;
1911 bool eof=false,
1912 exitState=false;
1913
1914 openTag ( SYNTAX_ERROR );
1915 do {
1916 printMaskedToken ( newState!=_WS );
1917 newState= getCurrentState(SYNTAX_ERROR);
1918 switch ( newState ) {
1919 case _WS:
1920 processWsState();
1921 exitState=isolateTags;
1922 break;
1923 case _EOL:
1924 insertLineNumber();
1925 exitState=true;
1926 break;
1927 case _EOF:
1928 eof = true;
1929 break;
1930 default:
1931 exitState=newState!=SYMBOL;
1932 break;
1933 }
1934 } while ( !exitState && !eof );
1935
1936 closeTag ( SYNTAX_ERROR );
1937 return eof;
1938 }
1939
processEscapeCharState()1940 bool CodeGenerator::processEscapeCharState()
1941 {
1942 State newState=STANDARD;
1943 bool eof=false, exitState=false;
1944 openTag ( ESC_CHAR );
1945 do {
1946 printMaskedToken (newState!=_WS );
1947 newState= getCurrentState(ESC_CHAR);
1948 switch ( newState ) {
1949 case _EOL:
1950 insertLineNumber();
1951 exitState=true;
1952 break;
1953 case _WS:
1954 processWsState();
1955 exitState=isolateTags;
1956 break;
1957 case _EOF:
1958 eof = true;
1959 break;
1960 default:
1961 exitState=newState!=ESC_CHAR;
1962 break;
1963 }
1964 } while ( !exitState && !eof );
1965
1966 closeTag ( ESC_CHAR );
1967 return eof;
1968 }
1969
processInterpolationState()1970 bool CodeGenerator::processInterpolationState()
1971 {
1972 State newState=STANDARD;
1973 bool eof=false, exitState=false;
1974 openTag ( STRING_INTERPOLATION );
1975 do {
1976 printMaskedToken (newState!=_WS );
1977 newState= getCurrentState(STRING_INTERPOLATION);
1978 switch ( newState ) {
1979 case _EOL:
1980 insertLineNumber();
1981 exitState=true;
1982 break;
1983 case _WS:
1984 processWsState();
1985 exitState=isolateTags;
1986 break;
1987 case _EOF:
1988 eof = true;
1989 break;
1990 default:
1991 exitState=newState!=STRING_INTERPOLATION;
1992 break;
1993 }
1994 } while ( !exitState && !eof );
1995
1996 closeTag ( STRING_INTERPOLATION );
1997 return eof;
1998 }
1999
processWsState()2000 void CodeGenerator::processWsState()
2001 {
2002
2003 if ( !maskWs ) {
2004 wsBuffer += token;
2005 token.clear();
2006 return;
2007 }
2008
2009 flushWs(6);
2010
2011 int cntWs=0;
2012 lineIndex--;
2013 PositionState ps(currentState, 0, true);
2014
2015 while ( line[lineIndex]==' ' || line[lineIndex]=='\t' ) {
2016 ++cntWs;
2017 ++lineIndex;
2018 }
2019
2020 if ( cntWs>1 ) {
2021
2022 unsigned int styleID=getStyleID ( currentState, currentKeywordClass );
2023 if ( excludeWs && styleID!=_UNKNOWN ) {
2024 *out << closeTags[styleID];
2025 }
2026
2027 *out << maskWsBegin;
2028 for ( int i=0; i<cntWs; i++ ) {
2029 *out << spacer;
2030 if (applySyntaxTestCase){
2031 stateTraceCurrent.push_back(ps);
2032 }
2033 }
2034 *out << maskWsEnd;
2035 if ( excludeWs && styleID!=_UNKNOWN ) {
2036 *out << openTags[styleID];
2037 }
2038 } else {
2039
2040 *out << spacer; //Bugfix fehlender Space nach Strings
2041 if (applySyntaxTestCase){
2042 stateTraceCurrent.push_back(ps);
2043 }
2044 }
2045
2046 spacer = initialSpacer;
2047
2048 token.clear();
2049 }
2050
flushWs(int arg)2051 void CodeGenerator::flushWs(int arg)
2052 {
2053 PositionState ps(currentState, 0, true);
2054 //workaround condition
2055 for ( size_t i=0; i<wsBuffer.size() && ((arg > 3) || ( (arg<4) && lineIndex>1)) && applySyntaxTestCase ; i++ ) {
2056 stateTraceCurrent.push_back(ps);
2057 //std::cerr <<"\nflush >"<<wsBuffer<<"< arg:"<<arg;
2058 }
2059
2060 //fix canvas whitespace
2061 if (wsBuffer.length() && (outputType==ESC_XTERM256 || outputType==ESC_TRUECOLOR) ){
2062 *out<<maskWsBegin;
2063 }
2064
2065 *out << wsBuffer;
2066 wsBuffer.clear();
2067 }
2068
getTestcaseName(State s,unsigned int kwClass)2069 string CodeGenerator::getTestcaseName(State s, unsigned int kwClass) {
2070 switch (s) {
2071
2072 case STANDARD:
2073 return STY_NAME_STD;
2074 case STRING:
2075 return STY_NAME_STR;
2076 case NUMBER:
2077 return STY_NAME_NUM;
2078 case SL_COMMENT:
2079 return STY_NAME_SLC;
2080 case ML_COMMENT:
2081 return STY_NAME_COM;
2082 case ESC_CHAR:
2083 return STY_NAME_ESC;
2084 case DIRECTIVE:
2085 return STY_NAME_DIR;
2086 case DIRECTIVE_STRING:
2087 return STY_NAME_DST;
2088 case SYMBOL:
2089 return STY_NAME_SYM;
2090 case STRING_INTERPOLATION:
2091 return STY_NAME_IPL;
2092 case SYNTAX_ERROR:
2093 return STY_NAME_ERR;
2094 case _WS:
2095 return "ws";
2096 case KEYWORD: {
2097
2098 if (!kwClass)
2099 return "ws";
2100
2101 char kwName[20] = {0};
2102 snprintf(kwName, sizeof(kwName), "keyword %c", ('a'+kwClass-1));
2103
2104 return string(kwName);
2105 }
2106 default:
2107 return "unknown_test";
2108 }
2109 }
2110
printTrace(const string & s)2111 void CodeGenerator::printTrace(const string &s){
2112 std::cout<<"\n curr "<<lineNumber<<" "<<s<<": ";
2113 for (unsigned int i=0; i< stateTraceCurrent.size(); i++) {
2114 std::cout<<" "<<stateTraceCurrent[i].state;
2115 }
2116 std::cout<<"\n test "<<lineNumber<<" "<<s<<": ";
2117 for (unsigned int i=0; i< stateTraceTest.size(); i++) {
2118 std::cout<<" "<<stateTraceTest[i].state;
2119 }
2120 std::cout<<"\n";
2121 }
2122
2123 //column: lineIndex (not a UTF-8 validated string position)
runSyntaxTestcases(unsigned int column)2124 void CodeGenerator::runSyntaxTestcases(unsigned int column){
2125
2126 if (encoding=="utf-8")
2127 column = StringTools::utf8_strlen(line.substr(0, column));
2128
2129 unsigned int assertGroup=0;
2130 size_t typeDescPos=line.find_first_not_of("\t ^", lineIndex);
2131 State assertState=_UNKNOWN;
2132 bool negation=false;
2133 bool testFailed=false;
2134
2135 ostringstream errMsg;
2136 string prefix;
2137 //printTrace("trace 2");
2138
2139 if (typeDescPos!=string::npos) {
2140
2141 if (line[typeDescPos]=='~') {
2142
2143 negation=true;
2144 prefix="~";
2145 ++typeDescPos;
2146 }
2147
2148 if (line.find(STY_NAME_NUM, typeDescPos)==typeDescPos)
2149 assertState=NUMBER;
2150 //TODO temp. fix to allow old and new string classes
2151 else if (line.find(STY_NAME_STR, typeDescPos)==typeDescPos || line.find("str", typeDescPos)==typeDescPos)
2152 assertState=STRING;
2153 else if (line.find(STY_NAME_ESC, typeDescPos)==typeDescPos)
2154 assertState=ESC_CHAR;
2155 else if (line.find(STY_NAME_IPL, typeDescPos)==typeDescPos)
2156 assertState=STRING_INTERPOLATION;
2157 else if (line.find(STY_NAME_SYM, typeDescPos)==typeDescPos)
2158 assertState=SYMBOL;
2159 else if (line.find(STY_NAME_DIR, typeDescPos)==typeDescPos)
2160 assertState=DIRECTIVE;
2161 else if (line.find(STY_NAME_SLC, typeDescPos)==typeDescPos)
2162 assertState=SL_COMMENT;
2163 else if (line.find(STY_NAME_COM, typeDescPos)==typeDescPos)
2164 assertState=ML_COMMENT;
2165 else if (line.find("ws", typeDescPos)==typeDescPos)
2166 assertState=_WS;
2167 //TODO temp. fix to allow old and new default classes
2168 else if (line.find(STY_NAME_STD, typeDescPos)==typeDescPos || line.find("std", typeDescPos)==typeDescPos)
2169 assertState=STANDARD;
2170 else if (line.find(STY_NAME_DST, typeDescPos)==typeDescPos)
2171 assertState=DIRECTIVE_STRING;
2172
2173 else if (line.find("kw", typeDescPos)==typeDescPos || line.find("st", typeDescPos)==typeDescPos) {
2174 assertState=KEYWORD;
2175 if (isalpha(line[typeDescPos+2]))
2176 assertGroup=line[typeDescPos+2] - 'a' +1;
2177 }
2178
2179 if ( (assertState!=_WS && stateTraceTest[column].state != assertState && !stateTraceTest[column].isWhiteSpace )
2180 || (assertState==_WS && !stateTraceTest[column].isWhiteSpace)
2181 || assertGroup != stateTraceTest[column].kwClass) {
2182
2183 testFailed=!negation;
2184
2185 } else if (negation ) {
2186
2187 //TODO Fix ~ws
2188 if (assertState!=_WS && !stateTraceTest[column].isWhiteSpace )
2189 testFailed=true;
2190 }
2191
2192 if (testFailed) {
2193 errMsg << inFile << " line " << lineNumber << ", column "<< column
2194 << ": got " << getTestcaseName(stateTraceTest[column].state, stateTraceTest[column].kwClass)
2195 << " instead of " << prefix << getTestcaseName(assertState, assertGroup);
2196
2197 failedPosTests.push_back(errMsg.str());
2198 }
2199
2200 }
2201
2202 lineContainedTestCase=true;
2203 }
2204
getNewLine()2205 string CodeGenerator::getNewLine()
2206 {
2207 ostringstream ss;
2208 printSyntaxError(ss);
2209 if (printNewLines)
2210 ss << newLineTag;
2211 return ss.str();
2212 }
2213
callDecorateLineFct(bool isLineStart)2214 Diluculum::LuaValueList CodeGenerator::callDecorateLineFct(bool isLineStart)
2215 {
2216
2217 Diluculum::LuaValueList params;
2218 params.push_back(Diluculum::LuaValue(lineNumber));
2219
2220 return currentSyntax->getLuaState()->call ( isLineStart ?
2221 *currentSyntax->getDecorateLineBeginFct(): *currentSyntax->getDecorateLineEndFct(),
2222 params,"getDecorateLineFct call");
2223 }
2224
setOverrideParams()2225 void CodeGenerator::setOverrideParams() {
2226 if (currentSyntax->requiresParamUpdate()) {
2227 if ( currentSyntax->getOverrideConfigVal("state.string.raw")=="true"){
2228 toggleDynRawString=true; // reset to false in string state fct
2229 }
2230 if ( currentSyntax->getOverrideConfigVal("format.maskws")=="true") {
2231 maskWs=true;
2232 }
2233 if ( currentSyntax->getOverrideConfigVal("format.spacer").size()) {
2234 spacer=currentSyntax->getOverrideConfigVal("format.spacer");
2235 }
2236 }
2237 }
2238
insertLineNumber(bool insertNewLine)2239 void CodeGenerator::insertLineNumber ( bool insertNewLine )
2240 {
2241 if ( insertNewLine ) {
2242 if (currentSyntax->getDecorateLineEndFct()) {
2243 Diluculum::LuaValueList res=callDecorateLineFct(false);
2244 if (res.size()==1) {
2245 setOverrideParams();
2246 wsBuffer +=res[0].asString();
2247 }
2248 }
2249 wsBuffer += getNewLine();
2250 }
2251
2252 if (currentSyntax->getDecorateLineBeginFct()) {
2253 Diluculum::LuaValueList res=callDecorateLineFct(true);
2254 if (res.size()==1) {
2255 setOverrideParams();
2256 wsBuffer += res[0].asString();
2257 }
2258 }
2259
2260 if ( showLineNumbers ) {
2261 ostringstream os;
2262 ostringstream numberPrefix;
2263
2264 os << setw ( getLineNumberWidth() ) << right;
2265 if( numberCurrentLine ) {
2266 if ( lineNumberFillZeroes ) {
2267 os.fill ( '0' );
2268 }
2269 os << lineNumber+lineNumberOffset;
2270 } else {
2271 os << "";
2272 }
2273
2274 numberPrefix << openTags[LINENUMBER];
2275 maskString ( numberPrefix, os.str() );
2276
2277 //use initialSpacer here, spacer can be overridden by plug-in (format.spacer)
2278 numberPrefix << initialSpacer << closeTags[LINENUMBER];
2279 wsBuffer += numberPrefix.str();
2280 }
2281 }
2282
getLineIndex()2283 unsigned int CodeGenerator::getLineIndex()
2284 {
2285 return lineIndex;
2286 }
getLastLineLength()2287 unsigned int CodeGenerator::getLastLineLength()
2288 {
2289 return lastLineLength;
2290 }
2291
requiresTwoPassParsing() const2292 bool CodeGenerator::requiresTwoPassParsing() const {
2293 if (!currentSyntax) return false;
2294 return currentSyntax->getPersistentSnippetsNum()>0;
2295 }
2296
2297
printExternalStyle(const string & outFile)2298 bool CodeGenerator::printExternalStyle ( const string &outFile )
2299 {
2300 if ( !includeStyleDef ) {
2301 ostream *cssOutFile = ( outFile.empty() ? &cout :new ofstream ( outFile.c_str() ) );
2302 if ( !cssOutFile->fail() ) {
2303 if (!omitVersionComment) {
2304 *cssOutFile << styleCommentOpen
2305 <<" Style definition file generated by highlight "
2306 << HIGHLIGHT_VERSION << ", " << HIGHLIGHT_URL
2307 << " " << styleCommentClose << "\n";
2308 }
2309 *cssOutFile << getStyleDefinition()
2310 << "\n";
2311 *cssOutFile << readUserStyleDef();
2312 if ( !outFile.empty() ) delete cssOutFile;
2313 } else {
2314 return false;
2315 }
2316 }
2317 return true;
2318 }
2319
printPersistentState(const string & outFile)2320 bool CodeGenerator::printPersistentState ( const string &outFile )
2321 {
2322 if (!currentSyntax) return false;
2323
2324 ofstream pluginOutFile( outFile.c_str());
2325 if ( !pluginOutFile.fail() ) {
2326
2327 pluginOutFile <<"Description=\"Plugin generated by highlight using the --two-pass option\"\n\n"
2328 <<"Categories = {\"two-pass\" }\n\n"
2329 <<"function syntaxUpdate(desc)\n\n";
2330
2331 pluginOutFile << currentSyntax->getPersistentHookConditions();
2332
2333 for (auto snippet: currentSyntax->getPersistentSnippets())
2334 {
2335 pluginOutFile << snippet <<"\n\n";
2336 }
2337
2338 pluginOutFile<<"end\n\n"
2339 <<"Plugins={\n"
2340 <<" { Type=\"lang\", Chunk=syntaxUpdate }\n"
2341 <<"}\n";
2342 } else {
2343 return false;
2344 }
2345
2346 return true;
2347 }
2348
readUserStyleDef()2349 string CodeGenerator::readUserStyleDef()
2350 {
2351 ostringstream ostr;
2352 if ( !styleInputPath.empty() ) {
2353 ifstream userStyleDef ( styleInputPath.c_str() );
2354 if ( userStyleDef ) {
2355 ostr << "\n" << styleCommentOpen
2356 << " Content of " << styleInputPath
2357 << ": " <<styleCommentClose << "\n";
2358 string line;
2359 while ( getline ( userStyleDef, line ) ) {
2360 ostr << line << "\n";
2361 }
2362 userStyleDef.close();
2363 } else {
2364 ostr << styleCommentOpen
2365 << " ERROR: Could not include " << styleInputPath
2366 << "." << styleCommentClose << "\n";
2367 }
2368 }
2369
2370 string injections=docStyle.getInjections();
2371 if (!injections.empty()) {
2372 ostr << "\n" << styleCommentOpen
2373 << " Plug-in theme injections: " <<styleCommentClose << "\n";
2374 ostr << injections<<"\n";
2375 }
2376 return ostr.str();
2377 }
2378
initPluginScript(const string & script)2379 bool CodeGenerator::initPluginScript(const string& script)
2380 {
2381
2382 if (script.empty()) return true;
2383
2384 try {
2385
2386 userScriptError="";
2387 Diluculum::LuaState ls;
2388
2389 ls.doFile (script);
2390 int listIdx=1;
2391
2392 while (ls["Plugins"][listIdx].value() !=Diluculum::Nil) {
2393
2394 // Theme plugins
2395 if (ls["Plugins"][listIdx]["Type"].value().asString()=="theme") {
2396 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
2397 docStyle.addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
2398 }
2399 }
2400 // Syntax plugins
2401 else if (ls["Plugins"][listIdx]["Type"].value().asString()=="lang") {
2402 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
2403 currentSyntax->addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
2404 }
2405 }
2406 // Format plugins
2407 else if (ls["Plugins"][listIdx]["Type"].value().asString()=="format") {
2408 if (ls["Plugins"][listIdx]["Chunk"].value().type()==LUA_TFUNCTION) {
2409 addUserChunk(ls["Plugins"][listIdx]["Chunk"].value().asFunction());
2410 }
2411 }
2412
2413 listIdx++;
2414 }
2415 } catch (Diluculum::LuaError &err) {
2416 userScriptError=err.what();
2417 return false;
2418 }
2419 return true;
2420 }
2421
resetSyntaxReaders()2422 void CodeGenerator::resetSyntaxReaders() {
2423 for ( map<string, SyntaxReader*>::iterator it=syntaxReaders.begin(); it!=syntaxReaders.end(); it++ ) {
2424 delete it->second;
2425 }
2426 currentSyntax=NULL;
2427 syntaxReaders.clear();
2428 }
2429
syntaxRequiresTwoPassRun()2430 bool CodeGenerator::syntaxRequiresTwoPassRun() {
2431 if (!currentSyntax) return false;
2432 return currentSyntax->requiresTwoPassRun();
2433 }
2434
clearPersistentSnippets()2435 void CodeGenerator::clearPersistentSnippets(){
2436 if (currentSyntax) {
2437 currentSyntax->clearPersistentSnippets();
2438 }
2439 }
2440
updateKeywordClasses()2441 void CodeGenerator::updateKeywordClasses(){
2442
2443 if (openTags.size()) {
2444 if ( openTags.size() >NUMBER_BUILTIN_STATES ) {
2445 // remove dynamic keyword tag delimiters of the old language definition
2446 vector<string>::iterator keyStyleOpenBegin =
2447 openTags.begin() + NUMBER_BUILTIN_STATES;
2448 vector<string>::iterator keyStyleCloseBegin =
2449 closeTags.begin() + NUMBER_BUILTIN_STATES;
2450 openTags.erase ( keyStyleOpenBegin, openTags.end() );
2451 closeTags.erase ( keyStyleCloseBegin, closeTags.end() );
2452 }
2453 // add new keyword tag delimiters
2454
2455 for ( unsigned int i=0; i< currentSyntax->getKeywordClasses().size(); i++ ) {
2456 openTags.push_back ( getKeywordOpenTag ( i ) );
2457 closeTags.push_back ( getKeywordCloseTag ( i ) );
2458 }
2459 }
2460 }
2461
2462
2463 }
2464