1 // Copyright 2008-2010 Sergiu Dotenco. The License.txt file describes the
2 // conditions under which this software may be distributed.
3 
4 /**
5  * @file LexBibTeX.cxx
6  * @brief General BibTeX coloring scheme.
7  * @author Sergiu Dotenco
8  * @date April 18, 2009
9  */
10 
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include <cassert>
15 #include <cctype>
16 
17 #include <string>
18 #include <algorithm>
19 #include <functional>
20 
21 #include "ILexer.h"
22 #include "Scintilla.h"
23 #include "SciLexer.h"
24 
25 #include "PropSetSimple.h"
26 #include "WordList.h"
27 #include "LexAccessor.h"
28 #include "Accessor.h"
29 #include "StyleContext.h"
30 #include "CharacterSet.h"
31 #include "LexerModule.h"
32 
33 using namespace Scintilla;
34 
35 namespace {
IsAlphabetic(unsigned int ch)36 	bool IsAlphabetic(unsigned int ch)
37 	{
38 		return IsASCII(ch) && std::isalpha(ch) != 0;
39 	}
IsAlphaNumeric(char ch)40 	bool IsAlphaNumeric(char ch)
41 	{
42 	    return IsASCII(ch) && std::isalnum(ch);
43 	}
44 
EqualCaseInsensitive(const char * a,const char * b)45 	bool EqualCaseInsensitive(const char* a, const char* b)
46 	{
47 		return CompareCaseInsensitive(a, b) == 0;
48 	}
49 
EntryWithoutKey(const char * name)50 	bool EntryWithoutKey(const char* name)
51 	{
52 		return EqualCaseInsensitive(name,"string");
53 	}
54 
GetClosingBrace(char openbrace)55 	char GetClosingBrace(char openbrace)
56 	{
57 		char result = openbrace;
58 
59 		switch (openbrace) {
60 			case '(': result = ')'; break;
61 			case '{': result = '}'; break;
62 		}
63 
64 		return result;
65 	}
66 
IsEntryStart(char prev,char ch)67 	bool IsEntryStart(char prev, char ch)
68 	{
69 		return prev != '\\' && ch == '@';
70 	}
71 
IsEntryStart(const StyleContext & sc)72 	bool IsEntryStart(const StyleContext& sc)
73 	{
74 		return IsEntryStart(sc.chPrev, sc.ch);
75 	}
76 
ColorizeBibTeX(Sci_PositionU start_pos,Sci_Position length,int,WordList * keywordlists[],Accessor & styler)77 	void ColorizeBibTeX(Sci_PositionU start_pos, Sci_Position length, int /*init_style*/, WordList* keywordlists[], Accessor& styler)
78 	{
79 	    WordList &EntryNames = *keywordlists[0];
80 		bool fold_compact = styler.GetPropertyInt("fold.compact", 1) != 0;
81 
82 		std::string buffer;
83 		buffer.reserve(25);
84 
85 		// We always colorize a section from the beginning, so let's
86 		// search for the @ character which isn't escaped, i.e. \@
87 		while (start_pos > 0 && !IsEntryStart(styler.SafeGetCharAt(start_pos - 1),
88 			styler.SafeGetCharAt(start_pos))) {
89 			--start_pos; ++length;
90 		}
91 
92 		styler.StartAt(start_pos);
93 		styler.StartSegment(start_pos);
94 
95 		Sci_Position current_line = styler.GetLine(start_pos);
96 		int prev_level = styler.LevelAt(current_line) & SC_FOLDLEVELNUMBERMASK;
97 		int current_level = prev_level;
98 		int visible_chars = 0;
99 
100 		bool in_comment = false ;
101 		StyleContext sc(start_pos, length, SCE_BIBTEX_DEFAULT, styler);
102 
103 		bool going = sc.More(); // needed because of a fuzzy end of file state
104 		char closing_brace = 0;
105 		bool collect_entry_name = false;
106 
107 		for (; going; sc.Forward()) {
108 			if (!sc.More())
109 				going = false; // we need to go one behind the end of text
110 
111 			if (in_comment) {
112 				if (sc.atLineEnd) {
113 					sc.SetState(SCE_BIBTEX_DEFAULT);
114 					in_comment = false;
115 				}
116 			}
117 			else {
118 				// Found @entry
119 				if (IsEntryStart(sc)) {
120 					sc.SetState(SCE_BIBTEX_UNKNOWN_ENTRY);
121 					sc.Forward();
122 					++current_level;
123 
124 					buffer.clear();
125 					collect_entry_name = true;
126 				}
127 				else if ((sc.state == SCE_BIBTEX_ENTRY || sc.state == SCE_BIBTEX_UNKNOWN_ENTRY)
128 					&& (sc.ch == '{' || sc.ch == '(')) {
129 					// Entry name colorization done
130 					// Found either a { or a ( after entry's name, e.g. @entry(...) @entry{...}
131 					// Closing counterpart needs to be stored.
132 					closing_brace = GetClosingBrace(sc.ch);
133 
134 					sc.SetState(SCE_BIBTEX_DEFAULT); // Don't colorize { (
135 
136 					// @string doesn't have any key
137 					if (EntryWithoutKey(buffer.c_str()))
138 						sc.ForwardSetState(SCE_BIBTEX_PARAMETER);
139 					else
140 						sc.ForwardSetState(SCE_BIBTEX_KEY); // Key/label colorization
141 				}
142 
143 				// Need to handle the case where entry's key is empty
144 				// e.g. @book{,...}
145 				if (sc.state == SCE_BIBTEX_KEY && sc.ch == ',') {
146 					// Key/label colorization done
147 					sc.SetState(SCE_BIBTEX_DEFAULT); // Don't colorize the ,
148 					sc.ForwardSetState(SCE_BIBTEX_PARAMETER); // Parameter colorization
149 				}
150 				else if (sc.state == SCE_BIBTEX_PARAMETER && sc.ch == '=') {
151 					sc.SetState(SCE_BIBTEX_DEFAULT); // Don't colorize the =
152 					sc.ForwardSetState(SCE_BIBTEX_VALUE); // Parameter value colorization
153 
154 					Sci_Position start = sc.currentPos;
155 
156 					// We need to handle multiple situations:
157 					// 1. name"one two {three}"
158 					// 2. name={one {one two {two}} three}
159 					// 3. year=2005
160 
161 					// Skip ", { until we encounter the first alphanumerical character
162 					while (sc.More() && !(IsAlphaNumeric(sc.ch) || sc.ch == '"' || sc.ch == '{'))
163 						sc.Forward();
164 
165 					if (sc.More()) {
166 						// Store " or {
167 						char ch = sc.ch;
168 
169 						// Not interested in alphanumerical characters
170 						if (IsAlphaNumeric(ch))
171 							ch = 0;
172 
173 						int skipped = 0;
174 
175 						if (ch) {
176 							// Skip preceding " or { such as in name={{test}}.
177 							// Remember how many characters have been skipped
178 							// Make sure that empty values, i.e. "" are also handled correctly
179 							while (sc.More() && (sc.ch == ch && (ch != '"' || skipped < 1))) {
180 								sc.Forward();
181 								++skipped;
182 							}
183 						}
184 
185 						// Closing counterpart for " is the same character
186 						if (ch == '{')
187 							ch = '}';
188 
189 						// We have reached the parameter value
190 						// In case the open character was a alnum char, skip until , is found
191 						// otherwise until skipped == 0
192 						while (sc.More() && (skipped > 0 || (!ch && !(sc.ch == ',' || sc.ch == closing_brace)))) {
193 							// Make sure the character isn't escaped
194 							if (sc.chPrev != '\\') {
195 								// Parameter value contains a { which is the 2nd case described above
196 								if (sc.ch == '{')
197 									++skipped; // Remember it
198 								else if (sc.ch == '}')
199 									--skipped;
200 								else if (skipped == 1 && sc.ch == ch && ch == '"') // Don't ignore cases like {"o}
201 									skipped = 0;
202 							}
203 
204 							sc.Forward();
205 						}
206 					}
207 
208 					// Don't colorize the ,
209 					sc.SetState(SCE_BIBTEX_DEFAULT);
210 
211 					// Skip until the , or entry's closing closing_brace is found
212 					// since this parameter might be the last one
213 					while (sc.More() && !(sc.ch == ',' || sc.ch == closing_brace))
214 						sc.Forward();
215 
216 					int state = SCE_BIBTEX_PARAMETER; // The might be more parameters
217 
218 					// We've reached the closing closing_brace for the bib entry
219 					// in case no " or {} has been used to enclose the value,
220 					// as in 3rd case described above
221 					if (sc.ch == closing_brace) {
222 						--current_level;
223 						// Make sure the text between entries is not colored
224 						// using parameter's style
225 						state = SCE_BIBTEX_DEFAULT;
226 					}
227 
228 					Sci_Position end = sc.currentPos;
229 					current_line = styler.GetLine(end);
230 
231 					// We have possibly skipped some lines, so the folding levels
232 					// have to be adjusted separately
233 					for (Sci_Position i = styler.GetLine(start); i <= styler.GetLine(end); ++i)
234 						styler.SetLevel(i, prev_level);
235 
236 					sc.ForwardSetState(state);
237 				}
238 
239 				if (sc.state == SCE_BIBTEX_PARAMETER && sc.ch == closing_brace) {
240 					sc.SetState(SCE_BIBTEX_DEFAULT);
241 					--current_level;
242 				}
243 
244 				// Non escaped % found which represents a comment until the end of the line
245 				if (sc.chPrev != '\\' && sc.ch == '%') {
246 					in_comment = true;
247 					sc.SetState(SCE_BIBTEX_COMMENT);
248 				}
249 			}
250 
251 			if (sc.state == SCE_BIBTEX_UNKNOWN_ENTRY || sc.state == SCE_BIBTEX_ENTRY) {
252 				if (!IsAlphabetic(sc.ch) && collect_entry_name)
253 					collect_entry_name = false;
254 
255 				if (collect_entry_name) {
256 					buffer += static_cast<char>(tolower(sc.ch));
257                     if (EntryNames.InList(buffer.c_str()))
258                         sc.ChangeState(SCE_BIBTEX_ENTRY);
259                     else
260                         sc.ChangeState(SCE_BIBTEX_UNKNOWN_ENTRY);
261 				}
262 			}
263 
264 			if (sc.atLineEnd) {
265 				int level = prev_level;
266 
267 				if (visible_chars == 0 && fold_compact)
268 					level |= SC_FOLDLEVELWHITEFLAG;
269 
270 				if ((current_level > prev_level))
271 					level |= SC_FOLDLEVELHEADERFLAG;
272 				// else if (current_level < prev_level)
273 				//	level |= SC_FOLDLEVELBOXFOOTERFLAG; // Deprecated
274 
275 				if (level != styler.LevelAt(current_line)) {
276 					styler.SetLevel(current_line, level);
277 				}
278 
279 				++current_line;
280 				prev_level = current_level;
281 				visible_chars = 0;
282 			}
283 
284 			if (!isspacechar(sc.ch))
285 				++visible_chars;
286 		}
287 
288 		sc.Complete();
289 
290 		// Fill in the real level of the next line, keeping the current flags as they will be filled in later
291 		int flagsNext = styler.LevelAt(current_line) & ~SC_FOLDLEVELNUMBERMASK;
292 		styler.SetLevel(current_line, prev_level | flagsNext);
293 	}
294 }
295 static const char * const BibTeXWordLists[] = {
296             "Entry Names",
297             0,
298 };
299 
300 
301 LexerModule lmBibTeX(SCLEX_BIBTEX, ColorizeBibTeX, "bib", 0, BibTeXWordLists);
302 
303 // Entry Names
304 //    article, book, booklet, conference, inbook,
305 //    incollection, inproceedings, manual, mastersthesis,
306 //    misc, phdthesis, proceedings, techreport, unpublished,
307 //    string, url
308 
309