1 use std::str;
2 
3 use find_byte::find_byte;
4 
5 use re_bytes;
6 use re_unicode;
7 
expand_str( caps: &re_unicode::Captures, mut replacement: &str, dst: &mut String, )8 pub fn expand_str(
9     caps: &re_unicode::Captures,
10     mut replacement: &str,
11     dst: &mut String,
12 ) {
13     while !replacement.is_empty() {
14         match find_byte(b'$', replacement.as_bytes()) {
15             None => break,
16             Some(i) => {
17                 dst.push_str(&replacement[..i]);
18                 replacement = &replacement[i..];
19             }
20         }
21         if replacement.as_bytes().get(1).map_or(false, |&b| b == b'$') {
22             dst.push_str("$");
23             replacement = &replacement[2..];
24             continue;
25         }
26         debug_assert!(!replacement.is_empty());
27         let cap_ref = match find_cap_ref(replacement.as_bytes()) {
28             Some(cap_ref) => cap_ref,
29             None => {
30                 dst.push_str("$");
31                 replacement = &replacement[1..];
32                 continue;
33             }
34         };
35         replacement = &replacement[cap_ref.end..];
36         match cap_ref.cap {
37             Ref::Number(i) => {
38                 dst.push_str(caps.get(i).map(|m| m.as_str()).unwrap_or(""));
39             }
40             Ref::Named(name) => {
41                 dst.push_str(
42                     caps.name(name).map(|m| m.as_str()).unwrap_or(""),
43                 );
44             }
45         }
46     }
47     dst.push_str(replacement);
48 }
49 
expand_bytes( caps: &re_bytes::Captures, mut replacement: &[u8], dst: &mut Vec<u8>, )50 pub fn expand_bytes(
51     caps: &re_bytes::Captures,
52     mut replacement: &[u8],
53     dst: &mut Vec<u8>,
54 ) {
55     while !replacement.is_empty() {
56         match find_byte(b'$', replacement) {
57             None => break,
58             Some(i) => {
59                 dst.extend(&replacement[..i]);
60                 replacement = &replacement[i..];
61             }
62         }
63         if replacement.get(1).map_or(false, |&b| b == b'$') {
64             dst.push(b'$');
65             replacement = &replacement[2..];
66             continue;
67         }
68         debug_assert!(!replacement.is_empty());
69         let cap_ref = match find_cap_ref(replacement) {
70             Some(cap_ref) => cap_ref,
71             None => {
72                 dst.push(b'$');
73                 replacement = &replacement[1..];
74                 continue;
75             }
76         };
77         replacement = &replacement[cap_ref.end..];
78         match cap_ref.cap {
79             Ref::Number(i) => {
80                 dst.extend(caps.get(i).map(|m| m.as_bytes()).unwrap_or(b""));
81             }
82             Ref::Named(name) => {
83                 dst.extend(
84                     caps.name(name).map(|m| m.as_bytes()).unwrap_or(b""),
85                 );
86             }
87         }
88     }
89     dst.extend(replacement);
90 }
91 
92 /// `CaptureRef` represents a reference to a capture group inside some text.
93 /// The reference is either a capture group name or a number.
94 ///
95 /// It is also tagged with the position in the text following the
96 /// capture reference.
97 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
98 struct CaptureRef<'a> {
99     cap: Ref<'a>,
100     end: usize,
101 }
102 
103 /// A reference to a capture group in some text.
104 ///
105 /// e.g., `$2`, `$foo`, `${foo}`.
106 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
107 enum Ref<'a> {
108     Named(&'a str),
109     Number(usize),
110 }
111 
112 impl<'a> From<&'a str> for Ref<'a> {
from(x: &'a str) -> Ref<'a>113     fn from(x: &'a str) -> Ref<'a> {
114         Ref::Named(x)
115     }
116 }
117 
118 impl From<usize> for Ref<'static> {
from(x: usize) -> Ref<'static>119     fn from(x: usize) -> Ref<'static> {
120         Ref::Number(x)
121     }
122 }
123 
124 /// Parses a possible reference to a capture group name in the given text,
125 /// starting at the beginning of `replacement`.
126 ///
127 /// If no such valid reference could be found, None is returned.
find_cap_ref(replacement: &[u8]) -> Option<CaptureRef>128 fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
129     let mut i = 0;
130     let rep: &[u8] = replacement.as_ref();
131     if rep.len() <= 1 || rep[0] != b'$' {
132         return None;
133     }
134     i += 1;
135     if rep[i] == b'{' {
136         return find_cap_ref_braced(rep, i + 1);
137     }
138     let mut cap_end = i;
139     while rep.get(cap_end).map_or(false, is_valid_cap_letter) {
140         cap_end += 1;
141     }
142     if cap_end == i {
143         return None;
144     }
145     // We just verified that the range 0..cap_end is valid ASCII, so it must
146     // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
147     // check with either unsafe or by parsing the number straight from &[u8].
148     let cap =
149         str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name");
150     Some(CaptureRef {
151         cap: match cap.parse::<u32>() {
152             Ok(i) => Ref::Number(i as usize),
153             Err(_) => Ref::Named(cap),
154         },
155         end: cap_end,
156     })
157 }
158 
find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef>159 fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef> {
160     let start = i;
161     while rep.get(i).map_or(false, |&b| b != b'}') {
162         i += 1;
163     }
164     if !rep.get(i).map_or(false, |&b| b == b'}') {
165         return None;
166     }
167     // When looking at braced names, we don't put any restrictions on the name,
168     // so it's possible it could be invalid UTF-8. But a capture group name
169     // can never be invalid UTF-8, so if we have invalid UTF-8, then we can
170     // safely return None.
171     let cap = match str::from_utf8(&rep[start..i]) {
172         Err(_) => return None,
173         Ok(cap) => cap,
174     };
175     Some(CaptureRef {
176         cap: match cap.parse::<u32>() {
177             Ok(i) => Ref::Number(i as usize),
178             Err(_) => Ref::Named(cap),
179         },
180         end: i + 1,
181     })
182 }
183 
184 /// Returns true if and only if the given byte is allowed in a capture name.
is_valid_cap_letter(b: &u8) -> bool185 fn is_valid_cap_letter(b: &u8) -> bool {
186     match *b {
187         b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true,
188         _ => false,
189     }
190 }
191 
192 #[cfg(test)]
193 mod tests {
194     use super::{find_cap_ref, CaptureRef};
195 
196     macro_rules! find {
197         ($name:ident, $text:expr) => {
198             #[test]
199             fn $name() {
200                 assert_eq!(None, find_cap_ref($text.as_bytes()));
201             }
202         };
203         ($name:ident, $text:expr, $capref:expr) => {
204             #[test]
205             fn $name() {
206                 assert_eq!(Some($capref), find_cap_ref($text.as_bytes()));
207             }
208         };
209     }
210 
211     macro_rules! c {
212         ($name_or_number:expr, $pos:expr) => {
213             CaptureRef { cap: $name_or_number.into(), end: $pos }
214         };
215     }
216 
217     find!(find_cap_ref1, "$foo", c!("foo", 4));
218     find!(find_cap_ref2, "${foo}", c!("foo", 6));
219     find!(find_cap_ref3, "$0", c!(0, 2));
220     find!(find_cap_ref4, "$5", c!(5, 2));
221     find!(find_cap_ref5, "$10", c!(10, 3));
222     // See https://github.com/rust-lang/regex/pull/585
223     // for more on characters following numbers
224     find!(find_cap_ref6, "$42a", c!("42a", 4));
225     find!(find_cap_ref7, "${42}a", c!(42, 5));
226     find!(find_cap_ref8, "${42");
227     find!(find_cap_ref9, "${42 ");
228     find!(find_cap_ref10, " $0 ");
229     find!(find_cap_ref11, "$");
230     find!(find_cap_ref12, " ");
231     find!(find_cap_ref13, "");
232     find!(find_cap_ref14, "$1-$2", c!(1, 2));
233     find!(find_cap_ref15, "$1_$2", c!("1_", 3));
234     find!(find_cap_ref16, "$x-$y", c!("x", 2));
235     find!(find_cap_ref17, "$x_$y", c!("x_", 3));
236     find!(find_cap_ref18, "${#}", c!("#", 4));
237     find!(find_cap_ref19, "${Z[}", c!("Z[", 5));
238 }
239