1 // SciTE - Scintilla based Text Editor
2 /** @file EditorConfig.cxx
3  ** Read and interpret settings files in the EditorConfig format.
4  ** http://editorconfig.org/
5  **/
6 // Copyright 2018 by Neil Hodgson <neilh@scintilla.org>
7 // The License.txt file describes the conditions under which this software may be distributed.
8 
9 #include <cassert>
10 
11 #include <string>
12 #include <vector>
13 #include <map>
14 #include <algorithm>
15 #include <memory>
16 #include <chrono>
17 
18 #include "GUI.h"
19 
20 #include "StringHelpers.h"
21 #include "FilePath.h"
22 #include "EditorConfig.h"
23 
24 namespace {
25 
26 struct ECForDirectory {
27 	bool isRoot;
28 	std::string directory;
29 	std::vector<std::string> lines;
30 	ECForDirectory();
31 	void ReadOneDirectory(const FilePath &dir);
32 };
33 
34 class EditorConfig : public IEditorConfig {
35 	std::vector<ECForDirectory> config;
36 public:
37 	~EditorConfig() override;
38 	void ReadFromDirectory(const FilePath &dirStart) override;
39 	std::map<std::string, std::string> MapFromAbsolutePath(const FilePath &absolutePath) const override;
40 	void Clear() noexcept override;
41 };
42 
43 const GUI::gui_char editorConfigName[] = GUI_TEXT(".editorconfig");
44 
PatternMatch(std::u32string_view pattern,std::u32string_view text)45 bool PatternMatch(std::u32string_view pattern, std::u32string_view text) {
46 	if (pattern == text) {
47 		return true;
48 	} else if (pattern.empty()) {
49 		return false;
50 	} else if (pattern.front() == '\\') {
51 		pattern.remove_prefix(1);
52 		if (pattern.empty()) {
53 			// Escape with nothing being escaped
54 			return false;
55 		}
56 		if (text.empty()) {
57 			return false;
58 		}
59 		if (pattern.front() == text.front()) {
60 			pattern.remove_prefix(1);
61 			text.remove_prefix(1);
62 			return PatternMatch(pattern, text);
63 		}
64 		return false;
65 	} else if (pattern.front() == '*') {
66 		pattern.remove_prefix(1);
67 		if (!pattern.empty() && pattern.front() == '*') {
68 			pattern.remove_prefix(1);
69 			// "**" matches anything including "/"
70 			while (!text.empty()) {
71 				if (PatternMatch(pattern, text)) {
72 					return true;
73 				}
74 				text.remove_prefix(1);
75 			}
76 		} else {
77 			while (!text.empty()) {
78 				if (PatternMatch(pattern, text)) {
79 					return true;
80 				}
81 				if (text.front() == '/') {
82 					// "/" not matched by single "*"
83 					return PatternMatch(pattern, text);
84 				}
85 				text.remove_prefix(1);
86 			}
87 		}
88 		assert(text.empty());
89 		// Consumed whole text with wildcard so match if pattern consumed
90 		return pattern.empty();
91 	} else if (text.empty()) {
92 		return false;
93 	} else if (pattern.front() == '?') {
94 		pattern.remove_prefix(1);
95 		text.remove_prefix(1);
96 		return PatternMatch(pattern, text);
97 	} else if (pattern.front() == text.front()) {
98 		pattern.remove_prefix(1);
99 		text.remove_prefix(1);
100 		return PatternMatch(pattern, text);
101 	} else if (pattern.front() == '[') {
102 		pattern.remove_prefix(1);
103 		if (pattern.empty()) {
104 			return false;
105 		}
106 		const bool positive = pattern.front() != '!';
107 		if (!positive) {
108 			pattern.remove_prefix(1);
109 			if (pattern.empty()) {
110 				return false;
111 			}
112 		}
113 		bool inSet = false;
114 		while (!pattern.empty() && pattern.front() != ']') {
115 			if (pattern.front() == text.front()) {
116 				inSet = true;
117 			}
118 			pattern.remove_prefix(1);
119 		}
120 		if (!pattern.empty()) {
121 			pattern.remove_prefix(1);
122 		}
123 		if (inSet != positive) {
124 			return false;
125 		}
126 		text.remove_prefix(1);
127 		return PatternMatch(pattern, text);
128 	} else if (pattern.front() == '{') {
129 		pattern.remove_prefix(1);
130 		if (pattern.empty()) {
131 			return false;
132 		}
133 		std::u32string_view textAlt = text;
134 		bool inSet = false;
135 		size_t altSuccessLen = 0;
136 		while (!pattern.empty()) {
137 			if (pattern.front() == '}' || pattern.front() == ',') {
138 				inSet = true;
139 				text.remove_prefix(altSuccessLen);
140 				break;
141 			}
142 			if (textAlt.empty()) {
143 				return false;
144 			}
145 			if (pattern.front() == textAlt.front()) {
146 				pattern.remove_prefix(1);
147 				textAlt.remove_prefix(1);
148 				altSuccessLen++;
149 			} else {
150 				while (!pattern.empty() && pattern.front() != '}' && pattern.front() != ',') {
151 					pattern.remove_prefix(1);
152 				}
153 				if (!pattern.empty() && pattern.front() == ',') {
154 					pattern.remove_prefix(1);
155 				}
156 				textAlt = text;
157 				altSuccessLen = 0;
158 			}
159 		}
160 		if (!inSet) {
161 			return false;
162 		}
163 		while (!pattern.empty() && pattern.front() != '}') {
164 			pattern.remove_prefix(1);
165 		}
166 		if (!pattern.empty()) {
167 			pattern.remove_prefix(1);
168 		}
169 		return PatternMatch(pattern, text);
170 	}
171 	return false;
172 }
173 
174 }
175 
ECForDirectory()176 ECForDirectory::ECForDirectory() : isRoot(false) {
177 }
178 
ReadOneDirectory(const FilePath & dir)179 void ECForDirectory::ReadOneDirectory(const FilePath &dir) {
180 	directory = dir.AsUTF8();
181 	directory.append("/");
182 	FilePath fpec(dir, editorConfigName);
183 	std::string configString = fpec.Read();
184 	if (configString.size() > 0) {
185 		const std::string_view svUtf8BOM(UTF8BOM);
186 		if (StartsWith(configString, svUtf8BOM)) {
187 			configString.erase(0, svUtf8BOM.length());
188 		}
189 		// Carriage returns aren't wanted
190 		Remove(configString, std::string("\r"));
191 		std::vector<std::string> configLines = StringSplit(configString, '\n');
192 		for (std::string &line : configLines) {
193 			if (line.empty() || StartsWith(line, "#") || StartsWith(line, ";")) {
194 				// Drop comments
195 			} else if (StartsWith(line, "[")) {
196 				// Pattern
197 				lines.push_back(line);
198 			} else if (Contains(line, '=')) {
199 				LowerCaseAZ(line);
200 				Remove(line, std::string(" "));
201 				lines.push_back(line);
202 				std::vector<std::string> nameVal = StringSplit(line, '=');
203 				if (nameVal.size() == 2) {
204 					if ((nameVal[0] == "root") && nameVal[1] == "true") {
205 						isRoot = true;
206 					}
207 				}
208 			}
209 		}
210 	}
211 }
212 
213 EditorConfig::~EditorConfig() = default;
214 
ReadFromDirectory(const FilePath & dirStart)215 void EditorConfig::ReadFromDirectory(const FilePath &dirStart) {
216 	FilePath dir = dirStart;
217 	while (true) {
218 		ECForDirectory ecfd;
219 		ecfd.ReadOneDirectory(dir);
220 		config.insert(config.begin(), ecfd);
221 		if (ecfd.isRoot || !dir.IsSet() || dir.IsRoot()) {
222 			break;
223 		}
224 		// Up a level
225 		dir = dir.Directory();
226 	}
227 }
228 
MapFromAbsolutePath(const FilePath & absolutePath) const229 std::map<std::string, std::string> EditorConfig::MapFromAbsolutePath(const FilePath &absolutePath) const {
230 	std::map<std::string, std::string> ret;
231 	std::string fullPath = absolutePath.AsUTF8();
232 #ifdef WIN32
233 	// Convert Windows path separators to Unix
234 	std::replace(fullPath.begin(), fullPath.end(), '\\', '/');
235 #endif
236 	for (const ECForDirectory &level : config) {
237 		std::string relPath;
238 		if (level.directory.length() <= fullPath.length()) {
239 			relPath = fullPath.substr(level.directory.length());
240 		}
241 		bool inActiveSection = false;
242 		for (auto line : level.lines) {
243 			if (StartsWith(line, "[")) {
244 				std::string pattern = line.substr(1, line.size() - 2);
245 				if (!FilePath::CaseSensitive()) {
246 					pattern = GUI::LowerCaseUTF8(pattern);
247 					relPath = GUI::LowerCaseUTF8(relPath);
248 				}
249 				if ((pattern.find('/') == std::string::npos) && (relPath.find('/') != std::string::npos)) {
250 					// Simple pattern without directories so make match in any directory
251 					pattern.insert(0, "**/");
252 				}
253 				// Convert to u32string to treat as characters, not bytes
254 				std::u32string patternU32 = UTF32FromUTF8(pattern);
255 				std::u32string relPathU32 = UTF32FromUTF8(relPath);
256 				inActiveSection = PatternMatch(patternU32, relPathU32);
257 				// PatternMatch only works with literal filenames, '?', '*', '**', '[]', '[!]', '{,}', '\x'.
258 				// Other formats not yet handled:
259 				//   {num1..num2}
260 			} else if (inActiveSection && Contains(line, '=')) {
261 				const std::vector<std::string> nameVal = StringSplit(line, '=');
262 				if (nameVal.size() == 2) {
263 					if (nameVal[1] == "unset") {
264 						std::map<std::string, std::string>::iterator it = ret.find(nameVal[0]);
265 						if (it != ret.end())
266 							ret.erase(it);
267 					} else {
268 						ret[nameVal[0]] = nameVal[1];
269 					}
270 				}
271 			}
272 		}
273 	}
274 
275 	// Install defaults for indentation/tab
276 
277 	// if indent_style == "tab" and !indent_size: indent_size = "tab"
278 	if (ret.count("indent_style") && ret["indent_style"] == "tab" && !ret.count("indent_size")) {
279 		ret["indent_size"] = "tab";
280 	}
281 
282 	// if indent_size != "tab" and !tab_width: tab_width = indent_size
283 	if (ret.count("indent_size") && ret["indent_size"] != "tab" && !ret.count("tab_width")) {
284 		ret["tab_width"] = ret["indent_size"];
285 	}
286 
287 	// if indent_size == "tab": indent_size = tab_width
288 	if (ret.count("indent_size") && ret["indent_size"] == "tab" && ret.count("tab_width")) {
289 		ret["indent_size"] = ret["tab_width"];
290 	}
291 
292 	return ret;
293 }
294 
Clear()295 void EditorConfig::Clear() noexcept {
296 	config.clear();
297 }
298 
299 #if defined(TESTING)
300 
TestPatternMatch()301 static void TestPatternMatch() {
302 	// Literals
303 	assert(PatternMatch(U"", U""));
304 	assert(PatternMatch(U"a", U"a"));
305 	assert(PatternMatch(U"a", U"b") == false);
306 	assert(PatternMatch(U"ab", U"ab"));
307 	assert(PatternMatch(U"ab", U"a") == false);
308 	assert(PatternMatch(U"a", U"ab") == false);
309 
310 	// * matches anything except for '/'
311 	assert(PatternMatch(U"*", U""));
312 	assert(PatternMatch(U"*", U"a"));
313 	assert(PatternMatch(U"*", U"ab"));
314 
315 	assert(PatternMatch(U"a*", U"a"));
316 	assert(PatternMatch(U"a*", U"ab"));
317 	assert(PatternMatch(U"a*", U"abc"));
318 	assert(PatternMatch(U"a*", U"bc") == false);
319 
320 	assert(PatternMatch(U"*a", U"a"));
321 	assert(PatternMatch(U"*a", U"za"));
322 	assert(PatternMatch(U"*a", U"yza"));
323 	assert(PatternMatch(U"*a", U"xyz") == false);
324 	assert(PatternMatch(U"a*z", U"a/z") == false);
325 	assert(PatternMatch(U"a*b*c", U"abc"));
326 	assert(PatternMatch(U"a*b*c", U"a1b234c"));
327 
328 	// ? matches one character
329 	assert(PatternMatch(U"?", U"a"));
330 	assert(PatternMatch(U"?", U"") == false);
331 	assert(PatternMatch(U"a?c", U"abc"));
332 
333 	// [set] matches one character from set
334 	assert(PatternMatch(U"a[123]z", U"a2z"));
335 	assert(PatternMatch(U"a[123]z", U"az") == false);
336 	assert(PatternMatch(U"a[123]z", U"a2") == false);
337 
338 	// [!set] matches one character not from set
339 	assert(PatternMatch(U"a[!123]z", U"ayz"));
340 	assert(PatternMatch(U"a[!123]", U"az"));
341 	assert(PatternMatch(U"a[!123]", U"a2") == false);
342 
343 	// ** matches anything including '/'
344 	assert(PatternMatch(U"**a", U"a"));
345 	assert(PatternMatch(U"**a", U"za"));
346 	assert(PatternMatch(U"**a", U"yza"));
347 	assert(PatternMatch(U"**a", U"xyz") == false);
348 	assert(PatternMatch(U"a**z", U"a/z"));
349 	assert(PatternMatch(U"a**z", U"a/b/z"));
350 	assert(PatternMatch(U"a**", U"a/b/z"));
351 
352 	// {alt1,alt2,...} matches any of the alternatives
353 	assert(PatternMatch(U"<{ab}>", U"<ab>"));
354 	assert(PatternMatch(U"<{ab,lm,xyz}>", U"<ab>"));
355 	assert(PatternMatch(U"<{ab,lm,xyz}>", U"<lm>"));
356 	assert(PatternMatch(U"<{ab,lm,xyz}>", U"<xyz>"));
357 	assert(PatternMatch(U"<{ab,lm,xyz}>", U"<rs>") == false);
358 }
359 
360 #endif
361 
Create()362 std::unique_ptr<IEditorConfig> IEditorConfig::Create() {
363 #if defined(TESTING)
364 	TestPatternMatch();
365 #endif
366 	return std::make_unique<EditorConfig>();
367 }
368