1 /* This file is part of the Spring engine (GPL v2 or later), see LICENSE.html */
2
3 #include "FileFilter.h"
4
5
6 #include <boost/regex.hpp>
7
8 #include <limits.h>
9 #include <ctype.h>
10 #include <sstream>
11 #include <vector>
12
13 using std::string;
14 using std::vector;
15
16
17 class CFileFilter : public IFileFilter
18 {
19 public:
20 void AddRule(const string& rule);
21 bool Match(const string& filename) const;
22
23 private:
24 string glob_to_regex(const string& glob);
25
26 struct Rule {
RuleCFileFilter::Rule27 Rule() : negate(false) {}
28 string glob;
29 boost::regex regex;
30 bool negate;
31 };
32
33 vector<Rule> rules;
34 };
35
36
Create()37 IFileFilter* IFileFilter::Create()
38 {
39 return new CFileFilter();
40 }
41
42
43 /** @brief Add a filtering rule.
44
45 A rule can be:
46 - An empty line, this is ignored,
47 - A line starting with a '#', this serves as a comment and is ignored,
48 - A path starting with a path separator ('/' or '\'): this is an absolute
49 path and matches only against the entire leading part of the filename
50 passed to Match(): '/foo' matches 'foo' and 'foo/bar', but not 'bar/foo'.
51 - Any other path is a relative path and is matched less strict: as long as
52 there is a consecutive set of path elements matching the rule, there is a
53 match: 'b/c/d' matches 'b/c/d', but also 'a/b/c/d/e'.
54
55 Note that:
56 - Leading and trailing whitespace is ignored.
57 - Globbing characters '*' and '?' can be used, both do NOT match path
58 separators (like in shell, but unlike fnmatch(), or so I've been told.)
59 e.g. 'foo\\*\\baz' matches 'foo/bar/baz' but not 'foo/ba/r/baz'.
60 - Any path separator matches any other path separator, so there is no need to
61 worry about converting them: 'foo/bar' matches 'foo\\bar' and 'foo:bar' too.
62 - A path can be prefixed with an exclamation mark '!', this negates the
63 pattern. Because the rules are matched in-order, one can use this to exclude
64 a file from a more generic pattern.
65 - By default, no file matches. This can be changed using AddRule("*") ofc.
66 */
AddRule(const string & rule)67 void CFileFilter::AddRule(const string& rule)
68 {
69 if (rule.empty())
70 return;
71
72 // Split lines if line endings are present.
73 if (rule.find('\n') != string::npos) {
74 size_t beg = 0, end = 0;
75 while ((end = rule.find('\n', beg)) != string::npos) {
76 //printf("line: %s\n", rule.substr(beg, end - beg).c_str());
77 AddRule(rule.substr(beg, end - beg));
78 beg = end + 1;
79 }
80 AddRule(rule.substr(beg));
81 return;
82 }
83
84 // Eat leading whitespace, return if we reach end of string.
85 size_t p = 0;
86 while (isspace(rule[p]))
87 if (++p >= rule.length())
88 return;
89
90 // Nothing to do if the rule is a comment.
91 if (rule[p] == '#')
92 return;
93
94 // Eat trailing whitespace, return if we meet p.
95 size_t q = rule.length() - 1;
96 while (isspace(rule[q])) {
97 if (--q < p) {
98 return;
99 }
100 }
101
102 // Build the rule.
103 Rule r;
104 if (rule[p] == '!') {
105 r.negate = true;
106 if (++p > q) {
107 return;
108 }
109 }
110 r.glob = rule.substr(p, 1 + q - p);
111 r.regex = boost::regex(glob_to_regex(r.glob)
112 , boost::regex::icase | boost::regex::no_escape_in_lists);
113 rules.push_back(r);
114 //printf("added %s%s: %s\n", r.negate ? "!" : "", r.glob.c_str(), r.regex.expression());
115 }
116
117
118 /** @brief Checks whether filename matches this filter. */
Match(const string & filename) const119 bool CFileFilter::Match(const string& filename) const
120 {
121 bool match = false;
122 for (vector<Rule>::const_iterator it = rules.begin(); it != rules.end(); ++it) {
123 if (boost::regex_search(filename, it->regex))
124 match = !it->negate;
125 }
126 return match;
127 }
128
129
glob_to_regex(const string & glob)130 string CFileFilter::glob_to_regex(const string& glob) // FIXME remove; duplicate in FileSystem::ConvertGlobToRegex
131 {
132 #define PATH_SEPARATORS "/\\:"
133
134 std::stringstream regex;
135 string::const_iterator i = glob.begin();
136
137 // If the path starts with a path separator, we take it as an absolute path
138 // (relative to whatever is passed to Match() later on), so we insert the
139 // begin anchor.
140
141 // Otherwise we 'just' need to make sure the glob matches only full path
142 // elements, so we require either start of line OR path separator.
143
144 if ((i != glob.end() && *i == '/') || *i == '\\') {
145 regex << '^';
146 ++i;
147 }
148 else
149 regex << "(^|[" PATH_SEPARATORS "])";
150
151 for (; i != glob.end(); ++i) {
152 char c = *i;
153 switch (c) {
154 case '*':
155 // In (shell) globbing the wildcards match anything except path separators.
156 regex << "[^" PATH_SEPARATORS "]*";
157 break;
158 case '?':
159 regex << "[^" PATH_SEPARATORS "]";
160 break;
161 case '/':
162 case '\\':
163 case ':':
164 // Any path separator matches any other path separator.
165 // (So we don't have to manually convert slashes before search.)
166 regex << "[" PATH_SEPARATORS "]";
167 break;
168 default:
169 if (!(isalnum(c) || c == '_'))
170 regex << '\\';
171 regex << c;
172 break;
173 }
174 }
175
176 // Make sure we only match full path elements. (see above)
177 regex << "([" PATH_SEPARATORS "]|$)";
178
179 return regex.str();
180 }
181