1 // Scintilla source code edit control
2 /** @file LexABAQUS.cxx
3  ** Lexer for ABAQUS. Based on the lexer for APDL by Hadar Raz.
4  ** By Sergio Lucato.
5  ** Sort of completely rewritten by Gertjan Kloosterman
6  **/
7 // The License.txt file describes the conditions under which this software may be distributed.
8 
9 // Code folding copyied and modified from LexBasic.cxx
10 
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
17 
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21 
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28 
29 using namespace Scintilla;
30 
IsAKeywordChar(const int ch)31 static inline bool IsAKeywordChar(const int ch) {
32 	return (ch < 0x80 && (isalnum(ch) || (ch == '_') || (ch == ' ')));
33 }
34 
IsASetChar(const int ch)35 static inline bool IsASetChar(const int ch) {
36 	return (ch < 0x80 && (isalnum(ch) || (ch == '_') || (ch == '.') || (ch == '-')));
37 }
38 
ColouriseABAQUSDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList * [],Accessor & styler)39 static void ColouriseABAQUSDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList*[] /* *keywordlists[] */,
40                             Accessor &styler) {
41 	enum localState { KW_LINE_KW, KW_LINE_COMMA, KW_LINE_PAR, KW_LINE_EQ, KW_LINE_VAL, \
42 					  DAT_LINE_VAL, DAT_LINE_COMMA,\
43 					  COMMENT_LINE,\
44 					  ST_ERROR, LINE_END } state ;
45 
46 	// Do not leak onto next line
47 	state = LINE_END ;
48 	initStyle = SCE_ABAQUS_DEFAULT;
49 	StyleContext sc(startPos, length, initStyle, styler);
50 
51 	// Things are actually quite simple
52 	// we have commentlines
53 	// keywordlines and datalines
54 	// On a data line there will only be colouring of numbers
55 	// a keyword line is constructed as
56 	// *word,[ paramname[=paramvalue]]*
57 	// if the line ends with a , the keyword line continues onto the new line
58 
59 	for (; sc.More(); sc.Forward()) {
60 		switch ( state ) {
61         case KW_LINE_KW :
62             if ( sc.atLineEnd ) {
63                 // finished the line in keyword state, switch to LINE_END
64                 sc.SetState(SCE_ABAQUS_DEFAULT) ;
65                 state = LINE_END ;
66             } else if ( IsAKeywordChar(sc.ch) ) {
67                 // nothing changes
68                 state = KW_LINE_KW ;
69             } else if ( sc.ch == ',' ) {
70                 // Well well we say a comma, arguments *MUST* follow
71                 sc.SetState(SCE_ABAQUS_OPERATOR) ;
72                 state = KW_LINE_COMMA ;
73             } else {
74                 // Flag an error
75                 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
76                 state = ST_ERROR ;
77             }
78             // Done with processing
79             break ;
80         case KW_LINE_COMMA :
81             // acomma on a keywordline was seen
82             if ( IsAKeywordChar(sc.ch)) {
83                 sc.SetState(SCE_ABAQUS_ARGUMENT) ;
84                 state = KW_LINE_PAR ;
85             } else if ( sc.atLineEnd || (sc.ch == ',') ) {
86                 // we remain in keyword mode
87                 state = KW_LINE_COMMA ;
88             } else if ( sc.ch == ' ' ) {
89                 sc.SetState(SCE_ABAQUS_DEFAULT) ;
90                 state = KW_LINE_COMMA ;
91             } else {
92                 // Anything else constitutes an error
93                 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
94                 state = ST_ERROR ;
95             }
96             break ;
97         case KW_LINE_PAR :
98             if ( sc.atLineEnd ) {
99                 sc.SetState(SCE_ABAQUS_DEFAULT) ;
100                 state = LINE_END ;
101             } else if ( IsAKeywordChar(sc.ch) || (sc.ch == '-') ) {
102                 // remain in this state
103                 state = KW_LINE_PAR ;
104             } else if ( sc.ch == ',' ) {
105                 sc.SetState(SCE_ABAQUS_OPERATOR) ;
106                 state = KW_LINE_COMMA ;
107             } else if ( sc.ch == '=' ) {
108                 sc.SetState(SCE_ABAQUS_OPERATOR) ;
109                 state = KW_LINE_EQ ;
110             } else {
111                 // Anything else constitutes an error
112                 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
113                 state = ST_ERROR ;
114             }
115             break ;
116         case KW_LINE_EQ :
117             if ( sc.ch == ' ' ) {
118                 sc.SetState(SCE_ABAQUS_DEFAULT) ;
119                 // remain in this state
120                 state = KW_LINE_EQ ;
121             } else if ( IsADigit(sc.ch) || (sc.ch == '-') || (sc.ch == '.' && IsADigit(sc.chNext)) ) {
122                 sc.SetState(SCE_ABAQUS_NUMBER) ;
123                 state = KW_LINE_VAL ;
124             } else if ( IsAKeywordChar(sc.ch) ) {
125                 sc.SetState(SCE_ABAQUS_DEFAULT) ;
126                 state = KW_LINE_VAL ;
127             } else if ( (sc.ch == '\'') || (sc.ch == '\"') ) {
128                 sc.SetState(SCE_ABAQUS_STRING) ;
129                 state = KW_LINE_VAL ;
130             } else {
131                 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
132                 state = ST_ERROR ;
133             }
134             break ;
135         case KW_LINE_VAL :
136             if ( sc.atLineEnd ) {
137                 sc.SetState(SCE_ABAQUS_DEFAULT) ;
138                 state = LINE_END ;
139             } else if ( IsASetChar(sc.ch) && (sc.state == SCE_ABAQUS_DEFAULT) ) {
140                 // nothing changes
141                 state = KW_LINE_VAL ;
142             } else if (( (IsADigit(sc.ch) || sc.ch == '.' || (sc.ch == 'e' || sc.ch == 'E') ||
143                     ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) &&
144                     (sc.state == SCE_ABAQUS_NUMBER)) {
145                 // remain in number mode
146                 state = KW_LINE_VAL ;
147             } else if (sc.state == SCE_ABAQUS_STRING) {
148                 // accept everything until a closing quote
149                 if ( sc.ch == '\'' || sc.ch == '\"' ) {
150                     sc.SetState(SCE_ABAQUS_DEFAULT) ;
151                     state = KW_LINE_VAL ;
152                 }
153             } else if ( sc.ch == ',' ) {
154                 sc.SetState(SCE_ABAQUS_OPERATOR) ;
155                 state = KW_LINE_COMMA ;
156             } else {
157                 // anything else is an error
158                 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
159                 state = ST_ERROR ;
160             }
161             break ;
162         case DAT_LINE_VAL :
163             if ( sc.atLineEnd ) {
164                 sc.SetState(SCE_ABAQUS_DEFAULT) ;
165                 state = LINE_END ;
166             } else if ( IsASetChar(sc.ch) && (sc.state == SCE_ABAQUS_DEFAULT) ) {
167                 // nothing changes
168                 state = DAT_LINE_VAL ;
169             } else if (( (IsADigit(sc.ch) || sc.ch == '.' || (sc.ch == 'e' || sc.ch == 'E') ||
170                     ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) &&
171                     (sc.state == SCE_ABAQUS_NUMBER)) {
172                 // remain in number mode
173                 state = DAT_LINE_VAL ;
174             } else if (sc.state == SCE_ABAQUS_STRING) {
175                 // accept everything until a closing quote
176                 if ( sc.ch == '\'' || sc.ch == '\"' ) {
177                     sc.SetState(SCE_ABAQUS_DEFAULT) ;
178                     state = DAT_LINE_VAL ;
179                 }
180             } else if ( sc.ch == ',' ) {
181                 sc.SetState(SCE_ABAQUS_OPERATOR) ;
182                 state = DAT_LINE_COMMA ;
183             } else {
184                 // anything else is an error
185                 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
186                 state = ST_ERROR ;
187             }
188             break ;
189         case DAT_LINE_COMMA :
190             // a comma on a data line was seen
191             if ( sc.atLineEnd ) {
192                 sc.SetState(SCE_ABAQUS_DEFAULT) ;
193                 state = LINE_END ;
194             } else if ( sc.ch == ' ' ) {
195                 sc.SetState(SCE_ABAQUS_DEFAULT) ;
196                 state = DAT_LINE_COMMA ;
197             } else if (sc.ch == ',')  {
198                 sc.SetState(SCE_ABAQUS_OPERATOR) ;
199                 state = DAT_LINE_COMMA ;
200             } else if ( IsADigit(sc.ch) || (sc.ch == '-')|| (sc.ch == '.' && IsADigit(sc.chNext)) ) {
201                 sc.SetState(SCE_ABAQUS_NUMBER) ;
202                 state = DAT_LINE_VAL ;
203             } else if ( IsAKeywordChar(sc.ch) ) {
204                 sc.SetState(SCE_ABAQUS_DEFAULT) ;
205                 state = DAT_LINE_VAL ;
206             } else if ( (sc.ch == '\'') || (sc.ch == '\"') ) {
207                 sc.SetState(SCE_ABAQUS_STRING) ;
208                 state = DAT_LINE_VAL ;
209             } else {
210                 sc.SetState(SCE_ABAQUS_PROCESSOR) ;
211                 state = ST_ERROR ;
212             }
213             break ;
214         case COMMENT_LINE :
215             if ( sc.atLineEnd ) {
216                 sc.SetState(SCE_ABAQUS_DEFAULT) ;
217                 state = LINE_END ;
218             }
219             break ;
220         case ST_ERROR :
221             if ( sc.atLineEnd ) {
222                 sc.SetState(SCE_ABAQUS_DEFAULT) ;
223                 state = LINE_END ;
224             }
225             break ;
226         case LINE_END :
227             if ( sc.atLineEnd || sc.ch == ' ' ) {
228                 // nothing changes
229                 state = LINE_END ;
230             } else if ( sc.ch == '*' ) {
231                 if ( sc.chNext == '*' ) {
232                     state = COMMENT_LINE ;
233                     sc.SetState(SCE_ABAQUS_COMMENT) ;
234                 } else {
235                     state = KW_LINE_KW ;
236                     sc.SetState(SCE_ABAQUS_STARCOMMAND) ;
237                 }
238             } else {
239                 // it must be a data line, things are as if we are in DAT_LINE_COMMA
240                 if ( sc.ch == ',' ) {
241                     sc.SetState(SCE_ABAQUS_OPERATOR) ;
242                     state = DAT_LINE_COMMA ;
243                 } else if ( IsADigit(sc.ch) || (sc.ch == '-')|| (sc.ch == '.' && IsADigit(sc.chNext)) ) {
244                     sc.SetState(SCE_ABAQUS_NUMBER) ;
245                     state = DAT_LINE_VAL ;
246                 } else if ( IsAKeywordChar(sc.ch) ) {
247                     sc.SetState(SCE_ABAQUS_DEFAULT) ;
248                     state = DAT_LINE_VAL ;
249                 } else if ( (sc.ch == '\'') || (sc.ch == '\"') ) {
250                     sc.SetState(SCE_ABAQUS_STRING) ;
251                     state = DAT_LINE_VAL ;
252                 } else {
253                     sc.SetState(SCE_ABAQUS_PROCESSOR) ;
254                     state = ST_ERROR ;
255                 }
256             }
257             break ;
258 		  }
259    }
260    sc.Complete();
261 }
262 
263 //------------------------------------------------------------------------------
264 // This copyied and modified from LexBasic.cxx
265 //------------------------------------------------------------------------------
266 
267 /* Bits:
268  * 1  - whitespace
269  * 2  - operator
270  * 4  - identifier
271  * 8  - decimal digit
272  * 16 - hex digit
273  * 32 - bin digit
274  */
275 static int character_classification[128] =
276 {
277     0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  0,  1,  0,  0,
278     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
279     1,  2,  0,  2,  2,  2,  2,  2,  2,  2,  6,  2,  2,  2,  10, 6,
280     60, 60, 28, 28, 28, 28, 28, 28, 28, 28, 2,  2,  2,  2,  2,  2,
281     2,  20, 20, 20, 20, 20, 20, 4,  4,  4,  4,  4,  4,  4,  4,  4,
282     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  4,
283     2,  20, 20, 20, 20, 20, 20, 4,  4,  4,  4,  4,  4,  4,  4,  4,
284     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  0
285 };
286 
IsSpace(int c)287 static bool IsSpace(int c) {
288 	return c < 128 && (character_classification[c] & 1);
289 }
290 
IsIdentifier(int c)291 static bool IsIdentifier(int c) {
292 	return c < 128 && (character_classification[c] & 4);
293 }
294 
LowerCase(int c)295 static int LowerCase(int c)
296 {
297 	if (c >= 'A' && c <= 'Z')
298 		return 'a' + c - 'A';
299 	return c;
300 }
301 
LineEnd(Sci_Position line,Accessor & styler)302 static Sci_Position LineEnd(Sci_Position line, Accessor &styler)
303 {
304     const Sci_Position docLines = styler.GetLine(styler.Length() - 1);  // Available last line
305     Sci_Position eol_pos ;
306     // if the line is the last line, the eol_pos is styler.Length()
307     // eol will contain a new line, or a virtual new line
308     if ( docLines == line )
309         eol_pos = styler.Length() ;
310     else
311         eol_pos = styler.LineStart(line + 1) - 1;
312     return eol_pos ;
313 }
314 
LineStart(Sci_Position line,Accessor & styler)315 static Sci_Position LineStart(Sci_Position line, Accessor &styler)
316 {
317     return styler.LineStart(line) ;
318 }
319 
320 // LineType
321 //
322 // bits determines the line type
323 // 1  : data line
324 // 2  : only whitespace
325 // 3  : data line with only whitespace
326 // 4  : keyword line
327 // 5  : block open keyword line
328 // 6  : block close keyword line
329 // 7  : keyword line in error
330 // 8  : comment line
LineType(Sci_Position line,Accessor & styler)331 static int LineType(Sci_Position line, Accessor &styler) {
332     Sci_Position pos = LineStart(line, styler) ;
333     Sci_Position eol_pos = LineEnd(line, styler) ;
334 
335     int c ;
336     char ch = ' ';
337 
338     Sci_Position i = pos ;
339     while ( i < eol_pos ) {
340         c = styler.SafeGetCharAt(i);
341         ch = static_cast<char>(LowerCase(c));
342         // We can say something as soon as no whitespace
343         // was encountered
344         if ( !IsSpace(c) )
345             break ;
346         i++ ;
347     }
348 
349     if ( i >= eol_pos ) {
350         // This is a whitespace line, currently
351         // classifies as data line
352         return 3 ;
353     }
354 
355     if ( ch != '*' ) {
356         // This is a data line
357         return 1 ;
358     }
359 
360     if ( i == eol_pos - 1 ) {
361         // Only a single *, error but make keyword line
362         return 4+3 ;
363     }
364 
365     // This means we can have a second character
366     // if that is also a * this means a comment
367     // otherwise it is a keyword.
368     c = styler.SafeGetCharAt(i+1);
369     ch = static_cast<char>(LowerCase(c));
370     if ( ch == '*' ) {
371         return 8 ;
372     }
373 
374     // At this point we know this is a keyword line
375     // the character at position i is a *
376     // it is not a comment line
377     char word[256] ;
378     int  wlen = 0;
379 
380     word[wlen] = '*' ;
381 	wlen++ ;
382 
383     i++ ;
384     while ( (i < eol_pos) && (wlen < 255) ) {
385         c = styler.SafeGetCharAt(i);
386         ch = static_cast<char>(LowerCase(c));
387 
388         if ( (!IsSpace(c)) && (!IsIdentifier(c)) )
389             break ;
390 
391         if ( IsIdentifier(c) ) {
392             word[wlen] = ch ;
393 			wlen++ ;
394 		}
395 
396         i++ ;
397     }
398 
399     word[wlen] = 0 ;
400 
401     // Make a comparison
402 	if ( !strcmp(word, "*step") ||
403          !strcmp(word, "*part") ||
404          !strcmp(word, "*instance") ||
405          !strcmp(word, "*assembly")) {
406        return 4+1 ;
407     }
408 
409 	if ( !strcmp(word, "*endstep") ||
410          !strcmp(word, "*endpart") ||
411          !strcmp(word, "*endinstance") ||
412          !strcmp(word, "*endassembly")) {
413        return 4+2 ;
414     }
415 
416     return 4 ;
417 }
418 
SafeSetLevel(Sci_Position line,int level,Accessor & styler)419 static void SafeSetLevel(Sci_Position line, int level, Accessor &styler)
420 {
421     if ( line < 0 )
422         return ;
423 
424     int mask = ((~SC_FOLDLEVELHEADERFLAG) | (~SC_FOLDLEVELWHITEFLAG));
425 
426     if ( (level & mask) < 0 )
427         return ;
428 
429     if ( styler.LevelAt(line) != level )
430         styler.SetLevel(line, level) ;
431 }
432 
FoldABAQUSDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * [],Accessor & styler)433 static void FoldABAQUSDoc(Sci_PositionU startPos, Sci_Position length, int,
434 WordList *[], Accessor &styler) {
435     Sci_Position startLine = styler.GetLine(startPos) ;
436     Sci_Position endLine   = styler.GetLine(startPos+length-1) ;
437 
438     // bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
439     // We want to deal with all the cases
440     // To know the correct indentlevel, we need to look back to the
441     // previous command line indentation level
442 	// order of formatting keyline datalines commentlines
443     Sci_Position beginData    = -1 ;
444     Sci_Position beginComment = -1 ;
445     Sci_Position prvKeyLine   = startLine ;
446     Sci_Position prvKeyLineTp =  0 ;
447 
448     // Scan until we find the previous keyword line
449     // this will give us the level reference that we need
450     while ( prvKeyLine > 0 ) {
451         prvKeyLine-- ;
452         prvKeyLineTp = LineType(prvKeyLine, styler) ;
453         if ( prvKeyLineTp & 4 )
454             break ;
455     }
456 
457     // Determine the base line level of all lines following
458     // the previous keyword
459     // new keyword lines are placed on this level
460     //if ( prvKeyLineTp & 4 ) {
461     int level = styler.LevelAt(prvKeyLine) & ~SC_FOLDLEVELHEADERFLAG ;
462     //}
463 
464     // uncomment line below if weird behaviour continues
465     prvKeyLine = -1 ;
466 
467     // Now start scanning over the lines.
468     for ( Sci_Position line = startLine; line <= endLine; line++ ) {
469         int lineType = LineType(line, styler) ;
470 
471         // Check for comment line
472         if ( lineType == 8 ) {
473             if ( beginComment < 0 ) {
474                 beginComment = line ;
475 			}
476         }
477 
478         // Check for data line
479         if ( (lineType == 1) || (lineType == 3) ) {
480             if ( beginData < 0 ) {
481                 if ( beginComment >= 0 ) {
482                     beginData = beginComment ;
483                 } else {
484                     beginData = line ;
485                 }
486             }
487 			beginComment = -1 ;
488 		}
489 
490         // Check for keywordline.
491         // As soon as a keyword line is encountered, we can set the
492         // levels of everything from the previous keyword line to this one
493         if ( lineType & 4 ) {
494             // this is a keyword, we can now place the previous keyword
495             // all its data lines and the remainder
496 
497             // Write comments and data line
498             if ( beginComment < 0 ) {
499                 beginComment = line ;
500 			}
501 
502             if ( beginData < 0 ) {
503                 beginData = beginComment ;
504 				if ( prvKeyLineTp != 5 )
505 					SafeSetLevel(prvKeyLine, level, styler) ;
506 				else
507 					SafeSetLevel(prvKeyLine, level | SC_FOLDLEVELHEADERFLAG, styler) ;
508             } else {
509                 SafeSetLevel(prvKeyLine, level | SC_FOLDLEVELHEADERFLAG, styler) ;
510             }
511 
512             int datLevel = level + 1 ;
513 			if ( !(prvKeyLineTp & 4) ) {
514 				datLevel = level ;
515 			}
516 
517             for ( Sci_Position ll = beginData; ll < beginComment; ll++ )
518                 SafeSetLevel(ll, datLevel, styler) ;
519 
520             // The keyword we just found is going to be written at another level
521             // if we have a type 5 and type 6
522             if ( prvKeyLineTp == 5 ) {
523                 level += 1 ;
524 			}
525 
526             if ( prvKeyLineTp == 6 ) {
527                 level -= 1 ;
528 				if ( level < 0 ) {
529 					level = 0 ;
530 				}
531             }
532 
533             for ( Sci_Position lll = beginComment; lll < line; lll++ )
534                 SafeSetLevel(lll, level, styler) ;
535 
536             // wrap and reset
537             beginComment = -1 ;
538             beginData    = -1 ;
539             prvKeyLine   = line ;
540             prvKeyLineTp = lineType ;
541         }
542 
543     }
544 
545     if ( beginComment < 0 ) {
546         beginComment = endLine + 1 ;
547     } else {
548         // We need to find out whether this comment block is followed by
549         // a data line or a keyword line
550         const Sci_Position docLines = styler.GetLine(styler.Length() - 1);
551 
552         for ( Sci_Position line = endLine + 1; line <= docLines; line++ ) {
553             Sci_Position lineType = LineType(line, styler) ;
554 
555             if ( lineType != 8 ) {
556 				if ( !(lineType & 4) )  {
557 					beginComment = endLine + 1 ;
558 				}
559                 break ;
560 			}
561         }
562     }
563 
564     if ( beginData < 0 ) {
565         beginData = beginComment ;
566 		if ( prvKeyLineTp != 5 )
567 			SafeSetLevel(prvKeyLine, level, styler) ;
568 		else
569 			SafeSetLevel(prvKeyLine, level | SC_FOLDLEVELHEADERFLAG, styler) ;
570     } else {
571         SafeSetLevel(prvKeyLine, level | SC_FOLDLEVELHEADERFLAG, styler) ;
572     }
573 
574     int datLevel = level + 1 ;
575 	if ( !(prvKeyLineTp & 4) ) {
576 		datLevel = level ;
577 	}
578 
579     for ( Sci_Position ll = beginData; ll < beginComment; ll++ )
580         SafeSetLevel(ll, datLevel, styler) ;
581 
582     if ( prvKeyLineTp == 5 ) {
583         level += 1 ;
584     }
585 
586     if ( prvKeyLineTp == 6 ) {
587         level -= 1 ;
588     }
589     for ( Sci_Position m = beginComment; m <= endLine; m++ )
590         SafeSetLevel(m, level, styler) ;
591 }
592 
593 static const char * const abaqusWordListDesc[] = {
594     "processors",
595     "commands",
596     "slashommands",
597     "starcommands",
598     "arguments",
599     "functions",
600     0
601 };
602 
603 LexerModule lmAbaqus(SCLEX_ABAQUS, ColouriseABAQUSDoc, "abaqus", FoldABAQUSDoc, abaqusWordListDesc);
604