1 // Copyright 2008-2010 Sergiu Dotenco. The License.txt file describes the
2 // conditions under which this software may be distributed.
3 
4 /**
5  * @file LexBibTeX.cxx
6  * @brief General BibTeX coloring scheme.
7  * @author Sergiu Dotenco
8  * @date April 18, 2009
9  */
10 
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include <cassert>
15 #include <cctype>
16 
17 #include <string>
18 #include <algorithm>
19 #include <functional>
20 
21 #include "ILexer.h"
22 #include "Scintilla.h"
23 #include "SciLexer.h"
24 
25 #include "PropSetSimple.h"
26 #include "WordList.h"
27 #include "LexAccessor.h"
28 #include "Accessor.h"
29 #include "StyleContext.h"
30 #include "CharacterSet.h"
31 #include "LexerModule.h"
32 
33 #ifdef SCI_NAMESPACE
34 using namespace Scintilla;
35 #endif
36 
37 namespace {
IsAlphabetic(unsigned int ch)38 	bool IsAlphabetic(unsigned int ch)
39 	{
40 		return IsASCII(ch) && std::isalpha(ch) != 0;
41 	}
IsAlphaNumeric(char ch)42 	bool IsAlphaNumeric(char ch)
43 	{
44 	    return IsASCII(ch) && std::isalnum(ch);
45 	}
46 
EqualCaseInsensitive(const char * a,const char * b)47 	bool EqualCaseInsensitive(const char* a, const char* b)
48 	{
49 		return CompareCaseInsensitive(a, b) == 0;
50 	}
51 
EntryWithoutKey(const char * name)52 	bool EntryWithoutKey(const char* name)
53 	{
54 		return EqualCaseInsensitive(name,"string");
55 	}
56 
GetClosingBrace(char openbrace)57 	char GetClosingBrace(char openbrace)
58 	{
59 		char result = openbrace;
60 
61 		switch (openbrace) {
62 			case '(': result = ')'; break;
63 			case '{': result = '}'; break;
64 		}
65 
66 		return result;
67 	}
68 
IsEntryStart(char prev,char ch)69 	bool IsEntryStart(char prev, char ch)
70 	{
71 		return prev != '\\' && ch == '@';
72 	}
73 
IsEntryStart(const StyleContext & sc)74 	bool IsEntryStart(const StyleContext& sc)
75 	{
76 		return IsEntryStart(sc.chPrev, sc.ch);
77 	}
78 
ColorizeBibTeX(unsigned start_pos,int length,int,WordList * keywordlists[],Accessor & styler)79 	void ColorizeBibTeX(unsigned start_pos, int length, int /*init_style*/, WordList* keywordlists[], Accessor& styler)
80 	{
81 	    WordList &EntryNames = *keywordlists[0];
82 		bool fold_compact = styler.GetPropertyInt("fold.compact", 1) != 0;
83 
84 		std::string buffer;
85 		buffer.reserve(25);
86 
87 		// We always colorize a section from the beginning, so let's
88 		// search for the @ character which isn't escaped, i.e. \@
89 		while (start_pos > 0 && !IsEntryStart(styler.SafeGetCharAt(start_pos - 1),
90 			styler.SafeGetCharAt(start_pos))) {
91 			--start_pos; ++length;
92 		}
93 
94 		styler.StartAt(start_pos);
95 		styler.StartSegment(start_pos);
96 
97 		int current_line = styler.GetLine(start_pos);
98 		int prev_level = styler.LevelAt(current_line) & SC_FOLDLEVELNUMBERMASK;
99 		int current_level = prev_level;
100 		int visible_chars = 0;
101 
102 		bool in_comment = false ;
103 		StyleContext sc(start_pos, length, SCE_BIBTEX_DEFAULT, styler);
104 
105 		bool going = sc.More(); // needed because of a fuzzy end of file state
106 		char closing_brace = 0;
107 		bool collect_entry_name = false;
108 
109 		for (; going; sc.Forward()) {
110 			if (!sc.More())
111 				going = false; // we need to go one behind the end of text
112 
113 			if (in_comment) {
114 				if (sc.atLineEnd) {
115 					sc.SetState(SCE_BIBTEX_DEFAULT);
116 					in_comment = false;
117 				}
118 			}
119 			else {
120 				// Found @entry
121 				if (IsEntryStart(sc)) {
122 					sc.SetState(SCE_BIBTEX_UNKNOWN_ENTRY);
123 					sc.Forward();
124 					++current_level;
125 
126 					buffer.clear();
127 					collect_entry_name = true;
128 				}
129 				else if ((sc.state == SCE_BIBTEX_ENTRY || sc.state == SCE_BIBTEX_UNKNOWN_ENTRY)
130 					&& (sc.ch == '{' || sc.ch == '(')) {
131 					// Entry name colorization done
132 					// Found either a { or a ( after entry's name, e.g. @entry(...) @entry{...}
133 					// Closing counterpart needs to be stored.
134 					closing_brace = GetClosingBrace(sc.ch);
135 
136 					sc.SetState(SCE_BIBTEX_DEFAULT); // Don't colorize { (
137 
138 					// @string doesn't have any key
139 					if (EntryWithoutKey(buffer.c_str()))
140 						sc.ForwardSetState(SCE_BIBTEX_PARAMETER);
141 					else
142 						sc.ForwardSetState(SCE_BIBTEX_KEY); // Key/label colorization
143 				}
144 
145 				// Need to handle the case where entry's key is empty
146 				// e.g. @book{,...}
147 				if (sc.state == SCE_BIBTEX_KEY && sc.ch == ',') {
148 					// Key/label colorization done
149 					sc.SetState(SCE_BIBTEX_DEFAULT); // Don't colorize the ,
150 					sc.ForwardSetState(SCE_BIBTEX_PARAMETER); // Parameter colorization
151 				}
152 				else if (sc.state == SCE_BIBTEX_PARAMETER && sc.ch == '=') {
153 					sc.SetState(SCE_BIBTEX_DEFAULT); // Don't colorize the =
154 					sc.ForwardSetState(SCE_BIBTEX_VALUE); // Parameter value colorization
155 
156 					int start = sc.currentPos;
157 
158 					// We need to handle multiple situations:
159 					// 1. name"one two {three}"
160 					// 2. name={one {one two {two}} three}
161 					// 3. year=2005
162 
163 					// Skip ", { until we encounter the first alphanumerical character
164 					while (sc.More() && !(IsAlphaNumeric(sc.ch) || sc.ch == '"' || sc.ch == '{'))
165 						sc.Forward();
166 
167 					if (sc.More()) {
168 						// Store " or {
169 						char ch = sc.ch;
170 
171 						// Not interested in alphanumerical characters
172 						if (IsAlphaNumeric(ch))
173 							ch = 0;
174 
175 						int skipped = 0;
176 
177 						if (ch) {
178 							// Skip preceding " or { such as in name={{test}}.
179 							// Remember how many characters have been skipped
180 							// Make sure that empty values, i.e. "" are also handled correctly
181 							while (sc.More() && (sc.ch == ch && (ch != '"' || skipped < 1))) {
182 								sc.Forward();
183 								++skipped;
184 							}
185 						}
186 
187 						// Closing counterpart for " is the same character
188 						if (ch == '{')
189 							ch = '}';
190 
191 						// We have reached the parameter value
192 						// In case the open character was a alnum char, skip until , is found
193 						// otherwise until skipped == 0
194 						while (sc.More() && (skipped > 0 || (!ch && !(sc.ch == ',' || sc.ch == closing_brace)))) {
195 							// Make sure the character isn't escaped
196 							if (sc.chPrev != '\\') {
197 								// Parameter value contains a { which is the 2nd case described above
198 								if (sc.ch == '{')
199 									++skipped; // Remember it
200 								else if (sc.ch == '}')
201 									--skipped;
202 								else if (skipped == 1 && sc.ch == ch && ch == '"') // Don't ignore cases like {"o}
203 									skipped = 0;
204 							}
205 
206 							sc.Forward();
207 						}
208 					}
209 
210 					// Don't colorize the ,
211 					sc.SetState(SCE_BIBTEX_DEFAULT);
212 
213 					// Skip until the , or entry's closing closing_brace is found
214 					// since this parameter might be the last one
215 					while (sc.More() && !(sc.ch == ',' || sc.ch == closing_brace))
216 						sc.Forward();
217 
218 					int state = SCE_BIBTEX_PARAMETER; // The might be more parameters
219 
220 					// We've reached the closing closing_brace for the bib entry
221 					// in case no " or {} has been used to enclose the value,
222 					// as in 3rd case described above
223 					if (sc.ch == closing_brace) {
224 						--current_level;
225 						// Make sure the text between entries is not colored
226 						// using parameter's style
227 						state = SCE_BIBTEX_DEFAULT;
228 					}
229 
230 					int end = sc.currentPos;
231 					current_line = styler.GetLine(end);
232 
233 					// We have possibly skipped some lines, so the folding levels
234 					// have to be adjusted separately
235 					for (int i = styler.GetLine(start); i <= styler.GetLine(end); ++i)
236 						styler.SetLevel(i, prev_level);
237 
238 					sc.ForwardSetState(state);
239 				}
240 
241 				if (sc.state == SCE_BIBTEX_PARAMETER && sc.ch == closing_brace) {
242 					sc.SetState(SCE_BIBTEX_DEFAULT);
243 					--current_level;
244 				}
245 
246 				// Non escaped % found which represents a comment until the end of the line
247 				if (sc.chPrev != '\\' && sc.ch == '%') {
248 					in_comment = true;
249 					sc.SetState(SCE_BIBTEX_COMMENT);
250 				}
251 			}
252 
253 			if (sc.state == SCE_BIBTEX_UNKNOWN_ENTRY || sc.state == SCE_BIBTEX_ENTRY) {
254 				if (!IsAlphabetic(sc.ch) && collect_entry_name)
255 					collect_entry_name = false;
256 
257 				if (collect_entry_name) {
258 					buffer += static_cast<char>(tolower(sc.ch));
259                     if (EntryNames.InList(buffer.c_str()))
260                         sc.ChangeState(SCE_BIBTEX_ENTRY);
261                     else
262                         sc.ChangeState(SCE_BIBTEX_UNKNOWN_ENTRY);
263 				}
264 			}
265 
266 			if (sc.atLineEnd) {
267 				int level = prev_level;
268 
269 				if (visible_chars == 0 && fold_compact)
270 					level |= SC_FOLDLEVELWHITEFLAG;
271 
272 				if ((current_level > prev_level))
273 					level |= SC_FOLDLEVELHEADERFLAG;
274 				// else if (current_level < prev_level)
275 				//	level |= SC_FOLDLEVELBOXFOOTERFLAG; // Deprecated
276 
277 				if (level != styler.LevelAt(current_line)) {
278 					styler.SetLevel(current_line, level);
279 				}
280 
281 				++current_line;
282 				prev_level = current_level;
283 				visible_chars = 0;
284 			}
285 
286 			if (!isspacechar(sc.ch))
287 				++visible_chars;
288 		}
289 
290 		sc.Complete();
291 
292 		// Fill in the real level of the next line, keeping the current flags as they will be filled in later
293 		int flagsNext = styler.LevelAt(current_line) & ~SC_FOLDLEVELNUMBERMASK;
294 		styler.SetLevel(current_line, prev_level | flagsNext);
295 	}
296 }
297 static const char * const BibTeXWordLists[] = {
298             "Entry Names",
299             0,
300 };
301 
302 
303 LexerModule lmBibTeX(SCLEX_BIBTEX, ColorizeBibTeX, "bib", 0, BibTeXWordLists);
304 
305 // Entry Names
306 //    article, book, booklet, conference, inbook,
307 //    incollection, inproceedings, manual, mastersthesis,
308 //    misc, phdthesis, proceedings, techreport, unpublished,
309 //    string, url
310 
311