1 use super::{Capturing, FlatToken, ForceCollect, Parser, ReplaceRange, TokenCursor, TrailingToken};
2 use rustc_ast::token::{self, DelimToken, Token, TokenKind};
3 use rustc_ast::tokenstream::{AttrAnnotatedTokenStream, AttributesData, CreateTokenStream};
4 use rustc_ast::tokenstream::{AttrAnnotatedTokenTree, DelimSpan, LazyTokenStream, Spacing};
5 use rustc_ast::{self as ast};
6 use rustc_ast::{AstLike, AttrVec, Attribute};
7 use rustc_errors::PResult;
8 use rustc_span::{sym, Span, DUMMY_SP};
9
10 use std::convert::TryInto;
11 use std::ops::Range;
12
13 /// A wrapper type to ensure that the parser handles outer attributes correctly.
14 /// When we parse outer attributes, we need to ensure that we capture tokens
15 /// for the attribute target. This allows us to perform cfg-expansion on
16 /// a token stream before we invoke a derive proc-macro.
17 ///
18 /// This wrapper prevents direct access to the underlying `Vec<ast::Attribute>`.
19 /// Parsing code can only get access to the underlying attributes
20 /// by passing an `AttrWrapper` to `collect_tokens_trailing_tokens`.
21 /// This makes it difficult to accidentally construct an AST node
22 /// (which stores a `Vec<ast::Attribute>`) without first collecting tokens.
23 ///
24 /// This struct has its own module, to ensure that the parser code
25 /// cannot directly access the `attrs` field
26 #[derive(Debug, Clone)]
27 pub struct AttrWrapper {
28 attrs: AttrVec,
29 // The start of the outer attributes in the token cursor.
30 // This allows us to create a `ReplaceRange` for the entire attribute
31 // target, including outer attributes.
32 start_pos: usize,
33 }
34
35 // This struct is passed around very frequently,
36 // so make sure it doesn't accidentally get larger
37 #[cfg(target_arch = "x86_64")]
38 rustc_data_structures::static_assert_size!(AttrWrapper, 16);
39
40 impl AttrWrapper {
new(attrs: AttrVec, start_pos: usize) -> AttrWrapper41 pub(super) fn new(attrs: AttrVec, start_pos: usize) -> AttrWrapper {
42 AttrWrapper { attrs, start_pos }
43 }
empty() -> AttrWrapper44 pub fn empty() -> AttrWrapper {
45 AttrWrapper { attrs: AttrVec::new(), start_pos: usize::MAX }
46 }
47 // FIXME: Delay span bug here?
take_for_recovery(self) -> AttrVec48 pub(crate) fn take_for_recovery(self) -> AttrVec {
49 self.attrs
50 }
51
52 // FIXME: require passing an NT to prevent misuse of this method
prepend_to_nt_inner(self, attrs: &mut Vec<Attribute>)53 pub(crate) fn prepend_to_nt_inner(self, attrs: &mut Vec<Attribute>) {
54 let mut self_attrs: Vec<_> = self.attrs.into();
55 std::mem::swap(attrs, &mut self_attrs);
56 attrs.extend(self_attrs);
57 }
58
is_empty(&self) -> bool59 pub fn is_empty(&self) -> bool {
60 self.attrs.is_empty()
61 }
62
maybe_needs_tokens(&self) -> bool63 pub fn maybe_needs_tokens(&self) -> bool {
64 crate::parser::attr::maybe_needs_tokens(&self.attrs)
65 }
66 }
67
68 /// Returns `true` if `attrs` contains a `cfg` or `cfg_attr` attribute
has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool69 fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
70 // NOTE: Builtin attributes like `cfg` and `cfg_attr` cannot be renamed via imports.
71 // Therefore, the absence of a literal `cfg` or `cfg_attr` guarantees that
72 // we don't need to do any eager expansion.
73 attrs.iter().any(|attr| {
74 attr.ident().map_or(false, |ident| ident.name == sym::cfg || ident.name == sym::cfg_attr)
75 })
76 }
77
78 // Produces a `TokenStream` on-demand. Using `cursor_snapshot`
79 // and `num_calls`, we can reconstruct the `TokenStream` seen
80 // by the callback. This allows us to avoid producing a `TokenStream`
81 // if it is never needed - for example, a captured `macro_rules!`
82 // argument that is never passed to a proc macro.
83 // In practice token stream creation happens rarely compared to
84 // calls to `collect_tokens` (see some statistics in #78736),
85 // so we are doing as little up-front work as possible.
86 //
87 // This also makes `Parser` very cheap to clone, since
88 // there is no intermediate collection buffer to clone.
89 #[derive(Clone)]
90 struct LazyTokenStreamImpl {
91 start_token: (Token, Spacing),
92 cursor_snapshot: TokenCursor,
93 num_calls: usize,
94 break_last_token: bool,
95 replace_ranges: Box<[ReplaceRange]>,
96 }
97
98 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
99 rustc_data_structures::static_assert_size!(LazyTokenStreamImpl, 144);
100
101 impl CreateTokenStream for LazyTokenStreamImpl {
create_token_stream(&self) -> AttrAnnotatedTokenStream102 fn create_token_stream(&self) -> AttrAnnotatedTokenStream {
103 // The token produced by the final call to `next` or `next_desugared`
104 // was not actually consumed by the callback. The combination
105 // of chaining the initial token and using `take` produces the desired
106 // result - we produce an empty `TokenStream` if no calls were made,
107 // and omit the final token otherwise.
108 let mut cursor_snapshot = self.cursor_snapshot.clone();
109 let tokens =
110 std::iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1))
111 .chain((0..self.num_calls).map(|_| {
112 let token = if cursor_snapshot.desugar_doc_comments {
113 cursor_snapshot.next_desugared()
114 } else {
115 cursor_snapshot.next()
116 };
117 (FlatToken::Token(token.0), token.1)
118 }))
119 .take(self.num_calls);
120
121 if !self.replace_ranges.is_empty() {
122 let mut tokens: Vec<_> = tokens.collect();
123 let mut replace_ranges = self.replace_ranges.clone();
124 replace_ranges.sort_by_key(|(range, _)| range.start);
125
126 #[cfg(debug_assertions)]
127 {
128 for [(range, tokens), (next_range, next_tokens)] in replace_ranges.array_windows() {
129 assert!(
130 range.end <= next_range.start || range.end >= next_range.end,
131 "Replace ranges should either be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
132 range,
133 tokens,
134 next_range,
135 next_tokens,
136 );
137 }
138 }
139
140 // Process the replace ranges, starting from the highest start
141 // position and working our way back. If have tokens like:
142 //
143 // `#[cfg(FALSE)]` struct Foo { #[cfg(FALSE)] field: bool }`
144 //
145 // Then we will generate replace ranges for both
146 // the `#[cfg(FALSE)] field: bool` and the entire
147 // `#[cfg(FALSE)]` struct Foo { #[cfg(FALSE)] field: bool }`
148 //
149 // By starting processing from the replace range with the greatest
150 // start position, we ensure that any replace range which encloses
151 // another replace range will capture the *replaced* tokens for the inner
152 // range, not the original tokens.
153 for (range, new_tokens) in replace_ranges.iter().rev() {
154 assert!(!range.is_empty(), "Cannot replace an empty range: {:?}", range);
155 // Replace ranges are only allowed to decrease the number of tokens.
156 assert!(
157 range.len() >= new_tokens.len(),
158 "Range {:?} has greater len than {:?}",
159 range,
160 new_tokens
161 );
162
163 // Replace any removed tokens with `FlatToken::Empty`.
164 // This keeps the total length of `tokens` constant throughout the
165 // replacement process, allowing us to use all of the `ReplaceRanges` entries
166 // without adjusting indices.
167 let filler = std::iter::repeat((FlatToken::Empty, Spacing::Alone))
168 .take(range.len() - new_tokens.len());
169
170 tokens.splice(
171 (range.start as usize)..(range.end as usize),
172 new_tokens.clone().into_iter().chain(filler),
173 );
174 }
175 make_token_stream(tokens.into_iter(), self.break_last_token)
176 } else {
177 make_token_stream(tokens, self.break_last_token)
178 }
179 }
180 }
181
182 impl<'a> Parser<'a> {
183 /// Records all tokens consumed by the provided callback,
184 /// including the current token. These tokens are collected
185 /// into a `LazyTokenStream`, and returned along with the result
186 /// of the callback.
187 ///
188 /// Note: If your callback consumes an opening delimiter
189 /// (including the case where you call `collect_tokens`
190 /// when the current token is an opening delimeter),
191 /// you must also consume the corresponding closing delimiter.
192 ///
193 /// That is, you can consume
194 /// `something ([{ }])` or `([{}])`, but not `([{}]`
195 ///
196 /// This restriction shouldn't be an issue in practice,
197 /// since this function is used to record the tokens for
198 /// a parsed AST item, which always has matching delimiters.
collect_tokens_trailing_token<R: AstLike>( &mut self, attrs: AttrWrapper, force_collect: ForceCollect, f: impl FnOnce(&mut Self, Vec<ast::Attribute>) -> PResult<'a, (R, TrailingToken)>, ) -> PResult<'a, R>199 pub fn collect_tokens_trailing_token<R: AstLike>(
200 &mut self,
201 attrs: AttrWrapper,
202 force_collect: ForceCollect,
203 f: impl FnOnce(&mut Self, Vec<ast::Attribute>) -> PResult<'a, (R, TrailingToken)>,
204 ) -> PResult<'a, R> {
205 // We only bail out when nothing could possibly observe the collected tokens:
206 // 1. We cannot be force collecting tokens (since force-collecting requires tokens
207 // by definition
208 if matches!(force_collect, ForceCollect::No)
209 // None of our outer attributes can require tokens (e.g. a proc-macro)
210 && !attrs.maybe_needs_tokens()
211 // If our target supports custom inner attributes, then we cannot bail
212 // out early, since we may need to capture tokens for a custom inner attribute
213 // invocation.
214 && !R::SUPPORTS_CUSTOM_INNER_ATTRS
215 // Never bail out early in `capture_cfg` mode, since there might be `#[cfg]`
216 // or `#[cfg_attr]` attributes.
217 && !self.capture_cfg
218 {
219 return Ok(f(self, attrs.attrs.into())?.0);
220 }
221
222 let start_token = (self.token.clone(), self.token_spacing);
223 let cursor_snapshot = self.token_cursor.clone();
224
225 let has_outer_attrs = !attrs.attrs.is_empty();
226 let prev_capturing = std::mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
227 let replace_ranges_start = self.capture_state.replace_ranges.len();
228
229 let ret = f(self, attrs.attrs.into());
230
231 self.capture_state.capturing = prev_capturing;
232
233 let (mut ret, trailing) = ret?;
234
235 // When we're not in `capture-cfg` mode, then bail out early if:
236 // 1. Our target doesn't support tokens at all (e.g we're parsing an `NtIdent`)
237 // so there's nothing for us to do.
238 // 2. Our target already has tokens set (e.g. we've parsed something
239 // like `#[my_attr] $item`. The actual parsing code takes care of prepending
240 // any attributes to the nonterminal, so we don't need to modify the
241 // already captured tokens.
242 // Note that this check is independent of `force_collect`- if we already
243 // have tokens, or can't even store them, then there's never a need to
244 // force collection of new tokens.
245 if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {
246 return Ok(ret);
247 }
248
249 // This is very similar to the bail out check at the start of this function.
250 // Now that we've parsed an AST node, we have more information available.
251 if matches!(force_collect, ForceCollect::No)
252 // We now have inner attributes available, so this check is more precise
253 // than `attrs.maybe_needs_tokens()` at the start of the function.
254 // As a result, we don't need to check `R::SUPPORTS_CUSTOM_INNER_ATTRS`
255 && !crate::parser::attr::maybe_needs_tokens(ret.attrs())
256 // Subtle: We call `has_cfg_or_cfg_attr` with the attrs from `ret`.
257 // This ensures that we consider inner attributes (e.g. `#![cfg]`),
258 // which require us to have tokens available
259 // We also call `has_cfg_or_cfg_attr` at the beginning of this function,
260 // but we only bail out if there's no possibility of inner attributes
261 // (!R::SUPPORTS_CUSTOM_INNER_ATTRS)
262 // We only catpure about `#[cfg]` or `#[cfg_attr]` in `capture_cfg`
263 // mode - during normal parsing, we don't need any special capturing
264 // for those attributes, since they're builtin.
265 && !(self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()))
266 {
267 return Ok(ret);
268 }
269
270 let mut inner_attr_replace_ranges = Vec::new();
271 // Take the captured ranges for any inner attributes that we parsed.
272 for inner_attr in ret.attrs().iter().filter(|a| a.style == ast::AttrStyle::Inner) {
273 if let Some(attr_range) = self.capture_state.inner_attr_ranges.remove(&inner_attr.id) {
274 inner_attr_replace_ranges.push(attr_range);
275 } else {
276 self.sess
277 .span_diagnostic
278 .delay_span_bug(inner_attr.span, "Missing token range for attribute");
279 }
280 }
281
282 let replace_ranges_end = self.capture_state.replace_ranges.len();
283
284 let cursor_snapshot_next_calls = cursor_snapshot.num_next_calls;
285 let mut end_pos = self.token_cursor.num_next_calls;
286
287 // Capture a trailing token if requested by the callback 'f'
288 match trailing {
289 TrailingToken::None => {}
290 TrailingToken::Semi => {
291 assert_eq!(self.token.kind, token::Semi);
292 end_pos += 1;
293 }
294 TrailingToken::MaybeComma => {
295 if self.token.kind == token::Comma {
296 end_pos += 1;
297 }
298 }
299 }
300
301 // If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens),
302 // then extend the range of captured tokens to include it, since the parser
303 // was not actually bumped past it. When the `LazyTokenStream` gets converted
304 // into a `AttrAnnotatedTokenStream`, we will create the proper token.
305 if self.token_cursor.break_last_token {
306 assert_eq!(
307 trailing,
308 TrailingToken::None,
309 "Cannot set `break_last_token` and have trailing token"
310 );
311 end_pos += 1;
312 }
313
314 let num_calls = end_pos - cursor_snapshot_next_calls;
315
316 // If we have no attributes, then we will never need to
317 // use any replace ranges.
318 let replace_ranges: Box<[ReplaceRange]> = if ret.attrs().is_empty() && !self.capture_cfg {
319 Box::new([])
320 } else {
321 // Grab any replace ranges that occur *inside* the current AST node.
322 // We will perform the actual replacement when we convert the `LazyTokenStream`
323 // to a `AttrAnnotatedTokenStream`
324 let start_calls: u32 = cursor_snapshot_next_calls.try_into().unwrap();
325 self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end]
326 .iter()
327 .cloned()
328 .chain(inner_attr_replace_ranges.clone().into_iter())
329 .map(|(range, tokens)| {
330 ((range.start - start_calls)..(range.end - start_calls), tokens)
331 })
332 .collect()
333 };
334
335 let tokens = LazyTokenStream::new(LazyTokenStreamImpl {
336 start_token,
337 num_calls,
338 cursor_snapshot,
339 break_last_token: self.token_cursor.break_last_token,
340 replace_ranges,
341 });
342
343 // If we support tokens at all
344 if let Some(target_tokens) = ret.tokens_mut() {
345 if let Some(target_tokens) = target_tokens {
346 assert!(
347 !self.capture_cfg,
348 "Encountered existing tokens with capture_cfg set: {:?}",
349 target_tokens
350 );
351 } else {
352 // Store se our newly captured tokens into the AST node
353 *target_tokens = Some(tokens.clone());
354 };
355 }
356
357 let final_attrs = ret.attrs();
358
359 // If `capture_cfg` is set and we're inside a recursive call to
360 // `collect_tokens_trailing_token`, then we need to register a replace range
361 // if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion
362 // on the captured token stream.
363 if self.capture_cfg
364 && matches!(self.capture_state.capturing, Capturing::Yes)
365 && has_cfg_or_cfg_attr(&final_attrs)
366 {
367 let attr_data = AttributesData { attrs: final_attrs.to_vec().into(), tokens };
368
369 // Replace the entire AST node that we just parsed, including attributes,
370 // with a `FlatToken::AttrTarget`. If this AST node is inside an item
371 // that has `#[derive]`, then this will allow us to cfg-expand this
372 // AST node.
373 let start_pos =
374 if has_outer_attrs { attrs.start_pos } else { cursor_snapshot_next_calls };
375 let new_tokens = vec![(FlatToken::AttrTarget(attr_data), Spacing::Alone)];
376
377 assert!(
378 !self.token_cursor.break_last_token,
379 "Should not have unglued last token with cfg attr"
380 );
381 let range: Range<u32> = (start_pos.try_into().unwrap())..(end_pos.try_into().unwrap());
382 self.capture_state.replace_ranges.push((range, new_tokens));
383 self.capture_state.replace_ranges.extend(inner_attr_replace_ranges);
384 }
385
386 // Only clear our `replace_ranges` when we're finished capturing entirely.
387 if matches!(self.capture_state.capturing, Capturing::No) {
388 self.capture_state.replace_ranges.clear();
389 // We don't clear `inner_attr_ranges`, as doing so repeatedly
390 // had a measureable performance impact. Most inner attributes that
391 // we insert will get removed - when we drop the parser, we'll free
392 // up the memory used by any attributes that we didn't remove from the map.
393 }
394 Ok(ret)
395 }
396 }
397
398 /// Converts a flattened iterator of tokens (including open and close delimiter tokens)
399 /// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
400 /// of open and close delims.
401 // FIXME(#67062): Currently, we don't parse `None`-delimited groups correctly,
402 // which can cause us to end up with mismatched `None` delimiters in our
403 // captured tokens. This function contains several hacks to work around this -
404 // essentially, we throw away mismatched `None` delimiters when we encounter them.
405 // Once we properly parse `None` delimiters, they can be captured just like any
406 // other tokens, and these hacks can be removed.
make_token_stream( mut iter: impl Iterator<Item = (FlatToken, Spacing)>, break_last_token: bool, ) -> AttrAnnotatedTokenStream407 fn make_token_stream(
408 mut iter: impl Iterator<Item = (FlatToken, Spacing)>,
409 break_last_token: bool,
410 ) -> AttrAnnotatedTokenStream {
411 #[derive(Debug)]
412 struct FrameData {
413 open: Span,
414 open_delim: DelimToken,
415 inner: Vec<(AttrAnnotatedTokenTree, Spacing)>,
416 }
417 let mut stack =
418 vec![FrameData { open: DUMMY_SP, open_delim: DelimToken::NoDelim, inner: vec![] }];
419 let mut token_and_spacing = iter.next();
420 while let Some((token, spacing)) = token_and_spacing {
421 match token {
422 FlatToken::Token(Token { kind: TokenKind::OpenDelim(delim), span }) => {
423 stack.push(FrameData { open: span, open_delim: delim, inner: vec![] });
424 }
425 FlatToken::Token(Token { kind: TokenKind::CloseDelim(delim), span }) => {
426 // HACK: If we enconter a mismatched `None` delimiter at the top
427 // level, just ignore it.
428 if matches!(delim, DelimToken::NoDelim)
429 && (stack.len() == 1
430 || !matches!(stack.last_mut().unwrap().open_delim, DelimToken::NoDelim))
431 {
432 token_and_spacing = iter.next();
433 continue;
434 }
435 let frame_data = stack
436 .pop()
437 .unwrap_or_else(|| panic!("Token stack was empty for token: {:?}", token));
438
439 // HACK: If our current frame has a mismatched opening `None` delimiter,
440 // merge our current frame with the one above it. That is, transform
441 // `[ { < first second } third ]` into `[ { first second } third ]`
442 if !matches!(delim, DelimToken::NoDelim)
443 && matches!(frame_data.open_delim, DelimToken::NoDelim)
444 {
445 stack.last_mut().unwrap().inner.extend(frame_data.inner);
446 // Process our closing delimiter again, this time at the previous
447 // frame in the stack
448 token_and_spacing = Some((token, spacing));
449 continue;
450 }
451
452 assert_eq!(
453 frame_data.open_delim, delim,
454 "Mismatched open/close delims: open={:?} close={:?}",
455 frame_data.open, span
456 );
457 let dspan = DelimSpan::from_pair(frame_data.open, span);
458 let stream = AttrAnnotatedTokenStream::new(frame_data.inner);
459 let delimited = AttrAnnotatedTokenTree::Delimited(dspan, delim, stream);
460 stack
461 .last_mut()
462 .unwrap_or_else(|| {
463 panic!("Bottom token frame is missing for token: {:?}", token)
464 })
465 .inner
466 .push((delimited, Spacing::Alone));
467 }
468 FlatToken::Token(token) => stack
469 .last_mut()
470 .expect("Bottom token frame is missing!")
471 .inner
472 .push((AttrAnnotatedTokenTree::Token(token), spacing)),
473 FlatToken::AttrTarget(data) => stack
474 .last_mut()
475 .expect("Bottom token frame is missing!")
476 .inner
477 .push((AttrAnnotatedTokenTree::Attributes(data), spacing)),
478 FlatToken::Empty => {}
479 }
480 token_and_spacing = iter.next();
481 }
482 // HACK: If we don't have a closing `None` delimiter for our last
483 // frame, merge the frame with the top-level frame. That is,
484 // turn `< first second` into `first second`
485 if stack.len() == 2 && stack[1].open_delim == DelimToken::NoDelim {
486 let temp_buf = stack.pop().unwrap();
487 stack.last_mut().unwrap().inner.extend(temp_buf.inner);
488 }
489 let mut final_buf = stack.pop().expect("Missing final buf!");
490 if break_last_token {
491 let (last_token, spacing) = final_buf.inner.pop().unwrap();
492 if let AttrAnnotatedTokenTree::Token(last_token) = last_token {
493 let unglued_first = last_token.kind.break_two_token_op().unwrap().0;
494
495 // A 'unglued' token is always two ASCII characters
496 let mut first_span = last_token.span.shrink_to_lo();
497 first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1));
498
499 final_buf.inner.push((
500 AttrAnnotatedTokenTree::Token(Token::new(unglued_first, first_span)),
501 spacing,
502 ));
503 } else {
504 panic!("Unexpected last token {:?}", last_token)
505 }
506 }
507 assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack);
508 AttrAnnotatedTokenStream::new(final_buf.inner)
509 }
510