1 use failure::Error;
2 use glob::glob;
3
4 #[derive(Debug, Fail)]
5 #[fail(display = "no matches")]
6 pub struct NoMatchesError;
7
8 type Result<I> = std::result::Result<I, Error>;
9
10 #[derive(Debug, Clone)]
11 pub enum LiteralOrGlob {
12 Literal(String),
13 AnyString,
14 AnyChar,
15 }
16
17 /// A word which includes patterns. We don't expand words
18 /// into the `Vec<String>` directly since the patterns has
19 /// two different meanings: path glob and match in `case`.
20 #[derive(Debug)]
21 pub struct PatternWord {
22 fragments: Vec<LiteralOrGlob>,
23 }
24
25 impl PatternWord {
new(fragments: Vec<LiteralOrGlob>) -> PatternWord26 pub fn new(fragments: Vec<LiteralOrGlob>) -> PatternWord {
27 PatternWord { fragments }
28 }
29
fragments(&self) -> &[LiteralOrGlob]30 pub fn fragments(&self) -> &[LiteralOrGlob] {
31 &self.fragments
32 }
33
34 /// Returns a string. Pattern characters such as `*` are treated as a literal.
into_string(self) -> String35 pub fn into_string(self) -> String {
36 let mut string = String::new();
37 for frag in self.fragments {
38 match frag {
39 LiteralOrGlob::Literal(lit) => string += &lit,
40 LiteralOrGlob::AnyChar => string.push('?'),
41 LiteralOrGlob::AnyString => string.push('*'),
42 }
43 }
44
45 string
46 }
47
48 //// Expand patterns as a file path globbing.
expand_glob(self) -> Result<Vec<String>>49 pub fn expand_glob(self) -> Result<Vec<String>> {
50 let includes_glob = self.fragments.iter().any(|frag| match frag {
51 LiteralOrGlob::AnyString => true,
52 LiteralOrGlob::AnyChar => true,
53 _ => false,
54 });
55
56 let mut expanded_words = Vec::new();
57 if includes_glob {
58 let mut pattern = String::new();
59 for frag in self.fragments {
60 match frag {
61 LiteralOrGlob::Literal(lit) => {
62 pattern += lit.replace("*", "[*]").replace("?", "[?]").as_str();
63 }
64 LiteralOrGlob::AnyChar => {
65 pattern.push('?');
66 }
67 LiteralOrGlob::AnyString => {
68 pattern.push('*');
69 }
70 }
71 }
72
73 let mut paths = Vec::new();
74 for entry in glob(&pattern).expect("failed to glob") {
75 match entry {
76 Ok(path) => {
77 paths.push(path.to_str().unwrap().to_string());
78 }
79 Err(e) => error!("glob error: {:?}", e),
80 }
81 }
82 if paths.is_empty() {
83 return Err(Error::from(NoMatchesError));
84 }
85
86 expanded_words.extend(paths);
87 } else {
88 let mut s = String::new();
89 for frag in self.fragments {
90 if let LiteralOrGlob::Literal(lit) = frag {
91 s += &lit;
92 }
93 }
94
95 expanded_words.push(s);
96 }
97
98 Ok(expanded_words)
99 }
100 }
101
102 #[derive(Debug, PartialEq)]
103 pub enum RegexSpan {
104 Literal(char),
105 /// Zero or arbitrary-length any characters. `*`.
106 AnyString,
107 /// `?`. Note that in the shell world, `?` means an any character; it
108 /// consumes exactly one character. Not optional.
109 AnyChar,
110 }
111
slice_or_empty<T>(slice: &[T], start: usize) -> &[T]112 fn slice_or_empty<T>(slice: &[T], start: usize) -> &[T] {
113 if slice.len() < start {
114 &[]
115 } else {
116 &slice[start..]
117 }
118 }
119
str_slice_or_empty(slice: &str, start: usize) -> &str120 fn str_slice_or_empty(slice: &str, start: usize) -> &str {
121 if slice.len() < start {
122 ""
123 } else {
124 &slice[start..]
125 }
126 }
127
match_one(pat: &RegexSpan, ch: char) -> bool128 fn match_one(pat: &RegexSpan, ch: char) -> bool {
129 trace!("regex: one: pattern = {:?}, ch={:?}", pat, ch);
130
131 match pat {
132 RegexSpan::Literal(span_ch) => *span_ch == ch,
133 RegexSpan::AnyChar => true,
134 _ => false,
135 }
136 }
137
138 #[derive(Debug, PartialEq)]
139 pub struct MatchResult {
140 start: usize,
141 end: usize,
142 }
143
144 /// A regex engine based on @nadrane's work: https://nickdrane.com/build-your-own-regex/
145 /// Returns the index of the matched part or None.
regex_match(pattern: &[RegexSpan], text: &str, index: usize) -> Option<usize>146 fn regex_match(pattern: &[RegexSpan], text: &str, index: usize) -> Option<usize> {
147 trace!(
148 "regex: match: pattern = {:?}, text='{}', index = {}",
149 pattern,
150 text,
151 index
152 );
153
154 match pattern.get(0) {
155 Some(RegexSpan::AnyChar) | Some(RegexSpan::Literal(_)) => {
156 if text.is_empty() {
157 return None;
158 }
159
160 if !match_one(&pattern[0], text.chars().next().unwrap()) {
161 return None;
162 }
163
164 regex_match(
165 slice_or_empty(pattern, 1),
166 str_slice_or_empty(text, 1),
167 index + 1,
168 )
169 }
170 Some(RegexSpan::AnyString) => {
171 if text.is_empty() {
172 if pattern.len() > 1 {
173 // There're some remaining regex spans after this AnyString.
174 return regex_match(slice_or_empty(pattern, 1), text, index);
175 } else {
176 // We've consumed all regex spans.
177 return Some(index);
178 }
179 }
180
181 // A. consume a character by the wildcard.
182 if let Some(index) = regex_match(pattern, str_slice_or_empty(text, 1), index + 1) {
183 return Some(index);
184 }
185
186 // B. skip the wildcard.
187 regex_match(slice_or_empty(pattern, 1), text, index)
188 }
189 None => {
190 // The `pattern` is empty.
191 Some(index)
192 }
193 }
194 }
195
pattern_word_match(pattern: &PatternWord, text: &str) -> Option<MatchResult>196 fn pattern_word_match(pattern: &PatternWord, text: &str) -> Option<MatchResult> {
197 trace!("pattern_word_match: text = '{}'", text);
198 let mut spans = Vec::new();
199 for frag in &pattern.fragments {
200 match frag {
201 LiteralOrGlob::AnyChar => {
202 spans.push(RegexSpan::AnyChar);
203 }
204 LiteralOrGlob::AnyString => {
205 spans.push(RegexSpan::AnyString);
206 }
207 LiteralOrGlob::Literal(s) => {
208 for ch in s.chars() {
209 spans.push(RegexSpan::Literal(ch));
210 }
211 }
212 }
213 }
214
215 for start in 0..text.len() {
216 trace!("regex: from = {}", start);
217 if let Some(end) = regex_match(&spans, &text[start..], start) {
218 return Some(MatchResult {
219 start,
220 end: end.saturating_sub(1),
221 });
222 }
223 }
224
225 None
226 }
227
match_pattern(pattern: &PatternWord, text: &str) -> bool228 pub fn match_pattern(pattern: &PatternWord, text: &str) -> bool {
229 pattern_word_match(pattern, text).is_some()
230 }
231
match_pattern_all(pattern: &PatternWord, text: &str) -> bool232 pub fn match_pattern_all(pattern: &PatternWord, text: &str) -> bool {
233 match pattern_word_match(pattern, text) {
234 Some(MatchResult { start, end }) => start == 0 && end == text.len() - 1,
235 None => false,
236 }
237 }
238
replace_pattern( pattern: &PatternWord, text: &str, replacement: &str, replace_all: bool, ) -> String239 pub fn replace_pattern(
240 pattern: &PatternWord,
241 text: &str,
242 replacement: &str,
243 replace_all: bool,
244 ) -> String {
245 let mut remaining = text;
246 let mut text = String::new();
247 loop {
248 if let Some(m) = pattern_word_match(pattern, remaining) {
249 text += &remaining[..m.start];
250 text += replacement;
251
252 if remaining.len() < m.end + 1 {
253 // Reached to the end of text.
254 remaining = "";
255 break;
256 }
257
258 remaining = &remaining[(m.end + 1)..];
259 } else {
260 // No matches.
261 break;
262 }
263
264 if !replace_all {
265 break;
266 }
267 }
268
269 text += remaining;
270 text
271 }
272
273 #[cfg(test)]
274 mod tests {
275 use super::*;
276
277 #[test]
literal_only()278 fn literal_only() {
279 let pat = PatternWord {
280 fragments: vec![LiteralOrGlob::Literal("abc".to_owned())],
281 };
282
283 assert_eq!(
284 pattern_word_match(&pat, "abc"),
285 Some(MatchResult { start: 0, end: 2 })
286 );
287
288 assert_eq!(
289 pattern_word_match(&pat, "xxabcxx"),
290 Some(MatchResult { start: 2, end: 4 })
291 );
292
293 assert_eq!(pattern_word_match(&pat, ""), None,);
294
295 assert_eq!(pattern_word_match(&pat, "xyz"), None);
296
297 assert_eq!(
298 replace_pattern(&pat, "_abc_abc_abc_", "X", false),
299 "_X_abc_abc_".to_owned()
300 );
301
302 assert_eq!(
303 replace_pattern(&pat, "_abc_abc_abc_", "X", true),
304 "_X_X_X_".to_owned()
305 );
306 }
307
308 #[test]
wildcard()309 fn wildcard() {
310 // "?"
311 let pat = PatternWord {
312 fragments: vec![LiteralOrGlob::AnyChar],
313 };
314
315 assert_eq!(pattern_word_match(&pat, ""), None);
316
317 assert_eq!(
318 pattern_word_match(&pat, "@"),
319 Some(MatchResult { start: 0, end: 0 })
320 );
321
322 assert_eq!(replace_pattern(&pat, "aaa", "X", false), "Xaa".to_owned());
323
324 assert_eq!(replace_pattern(&pat, "aaa", "X", true), "XXX".to_owned());
325
326 // "*"
327 let pat = PatternWord {
328 fragments: vec![LiteralOrGlob::AnyString],
329 };
330
331 assert_eq!(pattern_word_match(&pat, ""), None,);
332
333 assert_eq!(
334 pattern_word_match(&pat, "x"),
335 Some(MatchResult { start: 0, end: 0 })
336 );
337
338 assert_eq!(
339 pattern_word_match(&pat, "xyz"),
340 Some(MatchResult { start: 0, end: 2 })
341 );
342
343 // "1?34"
344 let pat = PatternWord {
345 fragments: vec![
346 LiteralOrGlob::Literal("1".to_owned()),
347 LiteralOrGlob::AnyChar,
348 LiteralOrGlob::Literal("34".to_owned()),
349 ],
350 };
351
352 assert_eq!(
353 pattern_word_match(&pat, "abc1234"),
354 Some(MatchResult { start: 3, end: 6 })
355 );
356
357 assert_eq!(
358 replace_pattern(&pat, "_1A34_1B34_1C34_", "X", false),
359 "_X_1B34_1C34_".to_owned()
360 );
361
362 assert_eq!(
363 replace_pattern(&pat, "_1A34_1B34_1C34_", "X", true),
364 "_X_X_X_".to_owned()
365 );
366
367 // "1*4"
368 let pat = PatternWord {
369 fragments: vec![
370 LiteralOrGlob::Literal("1".to_owned()),
371 LiteralOrGlob::AnyString,
372 LiteralOrGlob::Literal("4".to_owned()),
373 ],
374 };
375
376 assert_eq!(
377 pattern_word_match(&pat, "##1234##"),
378 Some(MatchResult { start: 2, end: 5 })
379 );
380 }
381
382 #[test]
complex_pattern()383 fn complex_pattern() {
384 // "1?3*78*9"
385 let pat = PatternWord {
386 fragments: vec![
387 LiteralOrGlob::Literal("1".to_owned()),
388 LiteralOrGlob::AnyChar,
389 LiteralOrGlob::Literal("3".to_owned()),
390 LiteralOrGlob::AnyString,
391 LiteralOrGlob::Literal("7".to_owned()),
392 LiteralOrGlob::Literal("8".to_owned()),
393 LiteralOrGlob::AnyString,
394 LiteralOrGlob::Literal("9".to_owned()),
395 ],
396 };
397
398 assert_eq!(
399 pattern_word_match(&pat, "__123456789__"),
400 Some(MatchResult { start: 2, end: 10 })
401 );
402
403 assert_eq!(pattern_word_match(&pat, "__12x3456789__"), None);
404 }
405 }
406