1 use failure::Error;
2 use glob::glob;
3 
4 #[derive(Debug, Fail)]
5 #[fail(display = "no matches")]
6 pub struct NoMatchesError;
7 
8 type Result<I> = std::result::Result<I, Error>;
9 
10 #[derive(Debug, Clone)]
11 pub enum LiteralOrGlob {
12     Literal(String),
13     AnyString,
14     AnyChar,
15 }
16 
17 /// A word which includes patterns. We don't expand words
18 /// into the `Vec<String>` directly since the patterns has
19 /// two different meanings: path glob and match in `case`.
20 #[derive(Debug)]
21 pub struct PatternWord {
22     fragments: Vec<LiteralOrGlob>,
23 }
24 
25 impl PatternWord {
new(fragments: Vec<LiteralOrGlob>) -> PatternWord26     pub fn new(fragments: Vec<LiteralOrGlob>) -> PatternWord {
27         PatternWord { fragments }
28     }
29 
fragments(&self) -> &[LiteralOrGlob]30     pub fn fragments(&self) -> &[LiteralOrGlob] {
31         &self.fragments
32     }
33 
34     /// Returns a string. Pattern characters such as `*` are treated as a literal.
into_string(self) -> String35     pub fn into_string(self) -> String {
36         let mut string = String::new();
37         for frag in self.fragments {
38             match frag {
39                 LiteralOrGlob::Literal(lit) => string += &lit,
40                 LiteralOrGlob::AnyChar => string.push('?'),
41                 LiteralOrGlob::AnyString => string.push('*'),
42             }
43         }
44 
45         string
46     }
47 
48     //// Expand patterns as a file path globbing.
expand_glob(self) -> Result<Vec<String>>49     pub fn expand_glob(self) -> Result<Vec<String>> {
50         let includes_glob = self.fragments.iter().any(|frag| match frag {
51             LiteralOrGlob::AnyString => true,
52             LiteralOrGlob::AnyChar => true,
53             _ => false,
54         });
55 
56         let mut expanded_words = Vec::new();
57         if includes_glob {
58             let mut pattern = String::new();
59             for frag in self.fragments {
60                 match frag {
61                     LiteralOrGlob::Literal(lit) => {
62                         pattern += lit.replace("*", "[*]").replace("?", "[?]").as_str();
63                     }
64                     LiteralOrGlob::AnyChar => {
65                         pattern.push('?');
66                     }
67                     LiteralOrGlob::AnyString => {
68                         pattern.push('*');
69                     }
70                 }
71             }
72 
73             let mut paths = Vec::new();
74             for entry in glob(&pattern).expect("failed to glob") {
75                 match entry {
76                     Ok(path) => {
77                         paths.push(path.to_str().unwrap().to_string());
78                     }
79                     Err(e) => error!("glob error: {:?}", e),
80                 }
81             }
82             if paths.is_empty() {
83                 return Err(Error::from(NoMatchesError));
84             }
85 
86             expanded_words.extend(paths);
87         } else {
88             let mut s = String::new();
89             for frag in self.fragments {
90                 if let LiteralOrGlob::Literal(lit) = frag {
91                     s += &lit;
92                 }
93             }
94 
95             expanded_words.push(s);
96         }
97 
98         Ok(expanded_words)
99     }
100 }
101 
102 #[derive(Debug, PartialEq)]
103 pub enum RegexSpan {
104     Literal(char),
105     /// Zero or arbitrary-length any characters. `*`.
106     AnyString,
107     /// `?`. Note that in the shell world, `?` means an any character; it
108     /// consumes exactly one character. Not optional.
109     AnyChar,
110 }
111 
slice_or_empty<T>(slice: &[T], start: usize) -> &[T]112 fn slice_or_empty<T>(slice: &[T], start: usize) -> &[T] {
113     if slice.len() < start {
114         &[]
115     } else {
116         &slice[start..]
117     }
118 }
119 
str_slice_or_empty(slice: &str, start: usize) -> &str120 fn str_slice_or_empty(slice: &str, start: usize) -> &str {
121     if slice.len() < start {
122         ""
123     } else {
124         &slice[start..]
125     }
126 }
127 
match_one(pat: &RegexSpan, ch: char) -> bool128 fn match_one(pat: &RegexSpan, ch: char) -> bool {
129     trace!("regex: one: pattern = {:?}, ch={:?}", pat, ch);
130 
131     match pat {
132         RegexSpan::Literal(span_ch) => *span_ch == ch,
133         RegexSpan::AnyChar => true,
134         _ => false,
135     }
136 }
137 
138 #[derive(Debug, PartialEq)]
139 pub struct MatchResult {
140     start: usize,
141     end: usize,
142 }
143 
144 /// A regex engine based on @nadrane's work: https://nickdrane.com/build-your-own-regex/
145 /// Returns the index of the matched part or None.
regex_match(pattern: &[RegexSpan], text: &str, index: usize) -> Option<usize>146 fn regex_match(pattern: &[RegexSpan], text: &str, index: usize) -> Option<usize> {
147     trace!(
148         "regex: match: pattern = {:?}, text='{}', index = {}",
149         pattern,
150         text,
151         index
152     );
153 
154     match pattern.get(0) {
155         Some(RegexSpan::AnyChar) | Some(RegexSpan::Literal(_)) => {
156             if text.is_empty() {
157                 return None;
158             }
159 
160             if !match_one(&pattern[0], text.chars().next().unwrap()) {
161                 return None;
162             }
163 
164             regex_match(
165                 slice_or_empty(pattern, 1),
166                 str_slice_or_empty(text, 1),
167                 index + 1,
168             )
169         }
170         Some(RegexSpan::AnyString) => {
171             if text.is_empty() {
172                 if pattern.len() > 1 {
173                     // There're some remaining regex spans after this AnyString.
174                     return regex_match(slice_or_empty(pattern, 1), text, index);
175                 } else {
176                     // We've consumed all regex spans.
177                     return Some(index);
178                 }
179             }
180 
181             // A. consume a character by the wildcard.
182             if let Some(index) = regex_match(pattern, str_slice_or_empty(text, 1), index + 1) {
183                 return Some(index);
184             }
185 
186             // B. skip the wildcard.
187             regex_match(slice_or_empty(pattern, 1), text, index)
188         }
189         None => {
190             // The `pattern` is empty.
191             Some(index)
192         }
193     }
194 }
195 
pattern_word_match(pattern: &PatternWord, text: &str) -> Option<MatchResult>196 fn pattern_word_match(pattern: &PatternWord, text: &str) -> Option<MatchResult> {
197     trace!("pattern_word_match: text = '{}'", text);
198     let mut spans = Vec::new();
199     for frag in &pattern.fragments {
200         match frag {
201             LiteralOrGlob::AnyChar => {
202                 spans.push(RegexSpan::AnyChar);
203             }
204             LiteralOrGlob::AnyString => {
205                 spans.push(RegexSpan::AnyString);
206             }
207             LiteralOrGlob::Literal(s) => {
208                 for ch in s.chars() {
209                     spans.push(RegexSpan::Literal(ch));
210                 }
211             }
212         }
213     }
214 
215     for start in 0..text.len() {
216         trace!("regex: from = {}", start);
217         if let Some(end) = regex_match(&spans, &text[start..], start) {
218             return Some(MatchResult {
219                 start,
220                 end: end.saturating_sub(1),
221             });
222         }
223     }
224 
225     None
226 }
227 
match_pattern(pattern: &PatternWord, text: &str) -> bool228 pub fn match_pattern(pattern: &PatternWord, text: &str) -> bool {
229     pattern_word_match(pattern, text).is_some()
230 }
231 
match_pattern_all(pattern: &PatternWord, text: &str) -> bool232 pub fn match_pattern_all(pattern: &PatternWord, text: &str) -> bool {
233     match pattern_word_match(pattern, text) {
234         Some(MatchResult { start, end }) => start == 0 && end == text.len() - 1,
235         None => false,
236     }
237 }
238 
replace_pattern( pattern: &PatternWord, text: &str, replacement: &str, replace_all: bool, ) -> String239 pub fn replace_pattern(
240     pattern: &PatternWord,
241     text: &str,
242     replacement: &str,
243     replace_all: bool,
244 ) -> String {
245     let mut remaining = text;
246     let mut text = String::new();
247     loop {
248         if let Some(m) = pattern_word_match(pattern, remaining) {
249             text += &remaining[..m.start];
250             text += replacement;
251 
252             if remaining.len() < m.end + 1 {
253                 // Reached to the end of text.
254                 remaining = "";
255                 break;
256             }
257 
258             remaining = &remaining[(m.end + 1)..];
259         } else {
260             // No matches.
261             break;
262         }
263 
264         if !replace_all {
265             break;
266         }
267     }
268 
269     text += remaining;
270     text
271 }
272 
273 #[cfg(test)]
274 mod tests {
275     use super::*;
276 
277     #[test]
literal_only()278     fn literal_only() {
279         let pat = PatternWord {
280             fragments: vec![LiteralOrGlob::Literal("abc".to_owned())],
281         };
282 
283         assert_eq!(
284             pattern_word_match(&pat, "abc"),
285             Some(MatchResult { start: 0, end: 2 })
286         );
287 
288         assert_eq!(
289             pattern_word_match(&pat, "xxabcxx"),
290             Some(MatchResult { start: 2, end: 4 })
291         );
292 
293         assert_eq!(pattern_word_match(&pat, ""), None,);
294 
295         assert_eq!(pattern_word_match(&pat, "xyz"), None);
296 
297         assert_eq!(
298             replace_pattern(&pat, "_abc_abc_abc_", "X", false),
299             "_X_abc_abc_".to_owned()
300         );
301 
302         assert_eq!(
303             replace_pattern(&pat, "_abc_abc_abc_", "X", true),
304             "_X_X_X_".to_owned()
305         );
306     }
307 
308     #[test]
wildcard()309     fn wildcard() {
310         // "?"
311         let pat = PatternWord {
312             fragments: vec![LiteralOrGlob::AnyChar],
313         };
314 
315         assert_eq!(pattern_word_match(&pat, ""), None);
316 
317         assert_eq!(
318             pattern_word_match(&pat, "@"),
319             Some(MatchResult { start: 0, end: 0 })
320         );
321 
322         assert_eq!(replace_pattern(&pat, "aaa", "X", false), "Xaa".to_owned());
323 
324         assert_eq!(replace_pattern(&pat, "aaa", "X", true), "XXX".to_owned());
325 
326         // "*"
327         let pat = PatternWord {
328             fragments: vec![LiteralOrGlob::AnyString],
329         };
330 
331         assert_eq!(pattern_word_match(&pat, ""), None,);
332 
333         assert_eq!(
334             pattern_word_match(&pat, "x"),
335             Some(MatchResult { start: 0, end: 0 })
336         );
337 
338         assert_eq!(
339             pattern_word_match(&pat, "xyz"),
340             Some(MatchResult { start: 0, end: 2 })
341         );
342 
343         // "1?34"
344         let pat = PatternWord {
345             fragments: vec![
346                 LiteralOrGlob::Literal("1".to_owned()),
347                 LiteralOrGlob::AnyChar,
348                 LiteralOrGlob::Literal("34".to_owned()),
349             ],
350         };
351 
352         assert_eq!(
353             pattern_word_match(&pat, "abc1234"),
354             Some(MatchResult { start: 3, end: 6 })
355         );
356 
357         assert_eq!(
358             replace_pattern(&pat, "_1A34_1B34_1C34_", "X", false),
359             "_X_1B34_1C34_".to_owned()
360         );
361 
362         assert_eq!(
363             replace_pattern(&pat, "_1A34_1B34_1C34_", "X", true),
364             "_X_X_X_".to_owned()
365         );
366 
367         // "1*4"
368         let pat = PatternWord {
369             fragments: vec![
370                 LiteralOrGlob::Literal("1".to_owned()),
371                 LiteralOrGlob::AnyString,
372                 LiteralOrGlob::Literal("4".to_owned()),
373             ],
374         };
375 
376         assert_eq!(
377             pattern_word_match(&pat, "##1234##"),
378             Some(MatchResult { start: 2, end: 5 })
379         );
380     }
381 
382     #[test]
complex_pattern()383     fn complex_pattern() {
384         // "1?3*78*9"
385         let pat = PatternWord {
386             fragments: vec![
387                 LiteralOrGlob::Literal("1".to_owned()),
388                 LiteralOrGlob::AnyChar,
389                 LiteralOrGlob::Literal("3".to_owned()),
390                 LiteralOrGlob::AnyString,
391                 LiteralOrGlob::Literal("7".to_owned()),
392                 LiteralOrGlob::Literal("8".to_owned()),
393                 LiteralOrGlob::AnyString,
394                 LiteralOrGlob::Literal("9".to_owned()),
395             ],
396         };
397 
398         assert_eq!(
399             pattern_word_match(&pat, "__123456789__"),
400             Some(MatchResult { start: 2, end: 10 })
401         );
402 
403         assert_eq!(pattern_word_match(&pat, "__12x3456789__"), None);
404     }
405 }
406