1 /* This file is part of the Spring engine (GPL v2 or later), see LICENSE.html */
2 
3 #include "FileFilter.h"
4 
5 
6 #include <boost/regex.hpp>
7 
8 #include <limits.h>
9 #include <ctype.h>
10 #include <sstream>
11 #include <vector>
12 
13 using std::string;
14 using std::vector;
15 
16 
17 class CFileFilter : public IFileFilter
18 {
19 public:
20 	void AddRule(const string& rule);
21 	bool Match(const string& filename) const;
22 
23 private:
24 	string glob_to_regex(const string& glob);
25 
26 	struct Rule {
RuleCFileFilter::Rule27 		Rule() : negate(false) {}
28 		string glob;
29 		boost::regex regex;
30 		bool negate;
31 	};
32 
33 	vector<Rule> rules;
34 };
35 
36 
Create()37 IFileFilter* IFileFilter::Create()
38 {
39 	return new CFileFilter();
40 }
41 
42 
43 /** @brief Add a filtering rule.
44 
45 A rule can be:
46  - An empty line, this is ignored,
47  - A line starting with a '#', this serves as a comment and is ignored,
48  - A path starting with a path separator ('/' or '\'): this is an absolute
49    path and matches only against the entire leading part of the filename
50    passed to Match(): '/foo' matches 'foo' and 'foo/bar', but not 'bar/foo'.
51  - Any other path is a relative path and is matched less strict: as long as
52    there is a consecutive set of path elements matching the rule, there is a
53    match: 'b/c/d' matches 'b/c/d', but also 'a/b/c/d/e'.
54 
55 Note that:
56  - Leading and trailing whitespace is ignored.
57  - Globbing characters '*' and '?' can be used, both do NOT match path
58    separators (like in shell, but unlike fnmatch(), or so I've been told.)
59    e.g. 'foo\\*\\baz' matches 'foo/bar/baz' but not 'foo/ba/r/baz'.
60  - Any path separator matches any other path separator, so there is no need to
61    worry about converting them: 'foo/bar' matches 'foo\\bar' and 'foo:bar' too.
62  - A path can be prefixed with an exclamation mark '!', this negates the
63    pattern. Because the rules are matched in-order, one can use this to exclude
64    a file from a more generic pattern.
65  - By default, no file matches. This can be changed using AddRule("*") ofc.
66 */
AddRule(const string & rule)67 void CFileFilter::AddRule(const string& rule)
68 {
69 	if (rule.empty())
70 		return;
71 
72 	// Split lines if line endings are present.
73 	if (rule.find('\n') != string::npos) {
74 		size_t beg = 0, end = 0;
75 		while ((end = rule.find('\n', beg)) != string::npos) {
76 			//printf("line: %s\n", rule.substr(beg, end - beg).c_str());
77 			AddRule(rule.substr(beg, end - beg));
78 			beg = end + 1;
79 		}
80 		AddRule(rule.substr(beg));
81 		return;
82 	}
83 
84 	// Eat leading whitespace, return if we reach end of string.
85 	size_t p = 0;
86 	while (isspace(rule[p]))
87 		if (++p >= rule.length())
88 			return;
89 
90 	// Nothing to do if the rule is a comment.
91 	if (rule[p] == '#')
92 		return;
93 
94 	// Eat trailing whitespace, return if we meet p.
95 	size_t q = rule.length() - 1;
96 	while (isspace(rule[q])) {
97 		if (--q < p) {
98 			return;
99 		}
100 	}
101 
102 	// Build the rule.
103 	Rule r;
104 	if (rule[p] == '!') {
105 		r.negate = true;
106 		if (++p > q) {
107 			return;
108 		}
109 	}
110 	r.glob = rule.substr(p, 1 + q - p);
111 	r.regex = boost::regex(glob_to_regex(r.glob)
112 		, boost::regex::icase | boost::regex::no_escape_in_lists);
113 	rules.push_back(r);
114 	//printf("added %s%s: %s\n", r.negate ? "!" : "", r.glob.c_str(), r.regex.expression());
115 }
116 
117 
118 /** @brief Checks whether filename matches this filter. */
Match(const string & filename) const119 bool CFileFilter::Match(const string& filename) const
120 {
121 	bool match = false;
122 	for (vector<Rule>::const_iterator it = rules.begin(); it != rules.end(); ++it) {
123 		if (boost::regex_search(filename, it->regex))
124 			match = !it->negate;
125 	}
126 	return match;
127 }
128 
129 
glob_to_regex(const string & glob)130 string CFileFilter::glob_to_regex(const string& glob) // FIXME remove; duplicate in FileSystem::ConvertGlobToRegex
131 {
132 #define PATH_SEPARATORS "/\\:"
133 
134 	std::stringstream regex;
135 	string::const_iterator i = glob.begin();
136 
137 	// If the path starts with a path separator, we take it as an absolute path
138 	// (relative to whatever is passed to Match() later on), so we insert the
139 	// begin anchor.
140 
141 	// Otherwise we 'just' need to make sure the glob matches only full path
142 	// elements, so we require either start of line OR path separator.
143 
144 	if ((i != glob.end() && *i == '/') || *i == '\\') {
145 		regex << '^';
146 		++i;
147 	}
148 	else
149 		regex << "(^|[" PATH_SEPARATORS "])";
150 
151 	for (; i != glob.end(); ++i) {
152 		char c = *i;
153 		switch (c) {
154 			case '*':
155 				// In (shell) globbing the wildcards match anything except path separators.
156 				regex << "[^" PATH_SEPARATORS "]*";
157 				break;
158 			case '?':
159 				regex << "[^" PATH_SEPARATORS "]";
160 				break;
161 			case '/':
162 			case '\\':
163 			case ':':
164 				// Any path separator matches any other path separator.
165 				// (So we don't have to manually convert slashes before search.)
166 				regex << "[" PATH_SEPARATORS "]";
167 				break;
168 			default:
169 				if (!(isalnum(c) || c == '_'))
170 					regex << '\\';
171 				regex << c;
172 				break;
173 		}
174 	}
175 
176 	// Make sure we only match full path elements. (see above)
177 	regex << "([" PATH_SEPARATORS "]|$)";
178 
179 	return regex.str();
180 }
181