1 use std::str;
2
3 use memchr::memchr;
4
5 use re_bytes;
6 use re_unicode;
7
expand_str( caps: &re_unicode::Captures, mut replacement: &str, dst: &mut String, )8 pub fn expand_str(
9 caps: &re_unicode::Captures,
10 mut replacement: &str,
11 dst: &mut String,
12 ) {
13 while !replacement.is_empty() {
14 match memchr(b'$', replacement.as_bytes()) {
15 None => break,
16 Some(i) => {
17 dst.push_str(&replacement[..i]);
18 replacement = &replacement[i..];
19 }
20 }
21 if replacement.as_bytes().get(1).map_or(false, |&b| b == b'$') {
22 dst.push_str("$");
23 replacement = &replacement[2..];
24 continue;
25 }
26 debug_assert!(!replacement.is_empty());
27 let cap_ref = match find_cap_ref(replacement) {
28 Some(cap_ref) => cap_ref,
29 None => {
30 dst.push_str("$");
31 replacement = &replacement[1..];
32 continue;
33 }
34 };
35 replacement = &replacement[cap_ref.end..];
36 match cap_ref.cap {
37 Ref::Number(i) => {
38 dst.push_str(
39 caps.get(i).map(|m| m.as_str()).unwrap_or(""));
40 }
41 Ref::Named(name) => {
42 dst.push_str(
43 caps.name(name).map(|m| m.as_str()).unwrap_or(""));
44 }
45 }
46 }
47 dst.push_str(replacement);
48 }
49
expand_bytes( caps: &re_bytes::Captures, mut replacement: &[u8], dst: &mut Vec<u8>, )50 pub fn expand_bytes(
51 caps: &re_bytes::Captures,
52 mut replacement: &[u8],
53 dst: &mut Vec<u8>,
54 ) {
55 while !replacement.is_empty() {
56 match memchr(b'$', replacement) {
57 None => break,
58 Some(i) => {
59 dst.extend(&replacement[..i]);
60 replacement = &replacement[i..];
61 }
62 }
63 if replacement.get(1).map_or(false, |&b| b == b'$') {
64 dst.push(b'$');
65 replacement = &replacement[2..];
66 continue;
67 }
68 debug_assert!(!replacement.is_empty());
69 let cap_ref = match find_cap_ref(replacement) {
70 Some(cap_ref) => cap_ref,
71 None => {
72 dst.push(b'$');
73 replacement = &replacement[1..];
74 continue;
75 }
76 };
77 replacement = &replacement[cap_ref.end..];
78 match cap_ref.cap {
79 Ref::Number(i) => {
80 dst.extend(
81 caps.get(i).map(|m| m.as_bytes()).unwrap_or(b""));
82 }
83 Ref::Named(name) => {
84 dst.extend(
85 caps.name(name).map(|m| m.as_bytes()).unwrap_or(b""));
86 }
87 }
88 }
89 dst.extend(replacement);
90 }
91
92 /// `CaptureRef` represents a reference to a capture group inside some text.
93 /// The reference is either a capture group name or a number.
94 ///
95 /// It is also tagged with the position in the text immediately proceeding the
96 /// capture reference.
97 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
98 struct CaptureRef<'a> {
99 cap: Ref<'a>,
100 end: usize,
101 }
102
103 /// A reference to a capture group in some text.
104 ///
105 /// e.g., `$2`, `$foo`, `${foo}`.
106 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
107 enum Ref<'a> {
108 Named(&'a str),
109 Number(usize),
110 }
111
112 impl<'a> From<&'a str> for Ref<'a> {
from(x: &'a str) -> Ref<'a>113 fn from(x: &'a str) -> Ref<'a> {
114 Ref::Named(x)
115 }
116 }
117
118 impl From<usize> for Ref<'static> {
from(x: usize) -> Ref<'static>119 fn from(x: usize) -> Ref<'static> {
120 Ref::Number(x)
121 }
122 }
123
124 /// Parses a possible reference to a capture group name in the given text,
125 /// starting at the beginning of `replacement`.
126 ///
127 /// If no such valid reference could be found, None is returned.
find_cap_ref<T: ?Sized + AsRef<[u8]>>( replacement: &T, ) -> Option<CaptureRef>128 fn find_cap_ref<T: ?Sized + AsRef<[u8]>>(
129 replacement: &T,
130 ) -> Option<CaptureRef> {
131 let mut i = 0;
132 let rep: &[u8] = replacement.as_ref();
133 if rep.len() <= 1 || rep[0] != b'$' {
134 return None;
135 }
136 let mut brace = false;
137 i += 1;
138 if rep[i] == b'{' {
139 brace = true;
140 i += 1;
141 }
142 let mut cap_end = i;
143 while rep.get(cap_end).map_or(false, is_valid_cap_letter) {
144 cap_end += 1;
145 }
146 if cap_end == i {
147 return None;
148 }
149 // We just verified that the range 0..cap_end is valid ASCII, so it must
150 // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
151 // check with either unsafe or by parsing the number straight from &[u8].
152 let cap = str::from_utf8(&rep[i..cap_end])
153 .expect("valid UTF-8 capture name");
154 if brace {
155 if !rep.get(cap_end).map_or(false, |&b| b == b'}') {
156 return None;
157 }
158 cap_end += 1;
159 }
160 Some(CaptureRef {
161 cap: match cap.parse::<u32>() {
162 Ok(i) => Ref::Number(i as usize),
163 Err(_) => Ref::Named(cap),
164 },
165 end: cap_end,
166 })
167 }
168
169 /// Returns true if and only if the given byte is allowed in a capture name.
is_valid_cap_letter(b: &u8) -> bool170 fn is_valid_cap_letter(b: &u8) -> bool {
171 match *b {
172 b'0' ... b'9' | b'a' ... b'z' | b'A' ... b'Z' | b'_' => true,
173 _ => false,
174 }
175 }
176
177 #[cfg(test)]
178 mod tests {
179 use super::{CaptureRef, find_cap_ref};
180
181 macro_rules! find {
182 ($name:ident, $text:expr) => {
183 #[test]
184 fn $name() {
185 assert_eq!(None, find_cap_ref($text));
186 }
187 };
188 ($name:ident, $text:expr, $capref:expr) => {
189 #[test]
190 fn $name() {
191 assert_eq!(Some($capref), find_cap_ref($text));
192 }
193 };
194 }
195
196 macro_rules! c {
197 ($name_or_number:expr, $pos:expr) => {
198 CaptureRef { cap: $name_or_number.into(), end: $pos }
199 };
200 }
201
202 find!(find_cap_ref1, "$foo", c!("foo", 4));
203 find!(find_cap_ref2, "${foo}", c!("foo", 6));
204 find!(find_cap_ref3, "$0", c!(0, 2));
205 find!(find_cap_ref4, "$5", c!(5, 2));
206 find!(find_cap_ref5, "$10", c!(10, 3));
207 find!(find_cap_ref6, "$42a", c!("42a", 4));
208 find!(find_cap_ref7, "${42}a", c!(42, 5));
209 find!(find_cap_ref8, "${42");
210 find!(find_cap_ref9, "${42 ");
211 find!(find_cap_ref10, " $0 ");
212 find!(find_cap_ref11, "$");
213 find!(find_cap_ref12, " ");
214 find!(find_cap_ref13, "");
215 }
216