1 // Formatting and tools for comments.
2
3 use std::{self, borrow::Cow, iter};
4
5 use itertools::{multipeek, MultiPeek};
6 use rustc_span::Span;
7
8 use crate::config::Config;
9 use crate::rewrite::RewriteContext;
10 use crate::shape::{Indent, Shape};
11 use crate::string::{rewrite_string, StringFormat};
12 use crate::utils::{
13 count_newlines, first_line_width, last_line_width, trim_left_preserve_layout,
14 trimmed_last_line_width, unicode_str_width,
15 };
16 use crate::{ErrorKind, FormattingError};
17
is_custom_comment(comment: &str) -> bool18 fn is_custom_comment(comment: &str) -> bool {
19 if !comment.starts_with("//") {
20 false
21 } else if let Some(c) = comment.chars().nth(2) {
22 !c.is_alphanumeric() && !c.is_whitespace()
23 } else {
24 false
25 }
26 }
27
28 #[derive(Copy, Clone, PartialEq, Eq)]
29 pub(crate) enum CommentStyle<'a> {
30 DoubleSlash,
31 TripleSlash,
32 Doc,
33 SingleBullet,
34 DoubleBullet,
35 Exclamation,
36 Custom(&'a str),
37 }
38
custom_opener(s: &str) -> &str39 fn custom_opener(s: &str) -> &str {
40 s.lines().next().map_or("", |first_line| {
41 first_line
42 .find(' ')
43 .map_or(first_line, |space_index| &first_line[0..=space_index])
44 })
45 }
46
47 impl<'a> CommentStyle<'a> {
48 /// Returns `true` if the commenting style covers a line only.
is_line_comment(&self) -> bool49 pub(crate) fn is_line_comment(&self) -> bool {
50 match *self {
51 CommentStyle::DoubleSlash
52 | CommentStyle::TripleSlash
53 | CommentStyle::Doc
54 | CommentStyle::Custom(_) => true,
55 _ => false,
56 }
57 }
58
59 /// Returns `true` if the commenting style can span over multiple lines.
is_block_comment(&self) -> bool60 pub(crate) fn is_block_comment(&self) -> bool {
61 match *self {
62 CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
63 true
64 }
65 _ => false,
66 }
67 }
68
69 /// Returns `true` if the commenting style is for documentation.
is_doc_comment(&self) -> bool70 pub(crate) fn is_doc_comment(&self) -> bool {
71 matches!(*self, CommentStyle::TripleSlash | CommentStyle::Doc)
72 }
73
opener(&self) -> &'a str74 pub(crate) fn opener(&self) -> &'a str {
75 match *self {
76 CommentStyle::DoubleSlash => "// ",
77 CommentStyle::TripleSlash => "/// ",
78 CommentStyle::Doc => "//! ",
79 CommentStyle::SingleBullet => "/* ",
80 CommentStyle::DoubleBullet => "/** ",
81 CommentStyle::Exclamation => "/*! ",
82 CommentStyle::Custom(opener) => opener,
83 }
84 }
85
closer(&self) -> &'a str86 pub(crate) fn closer(&self) -> &'a str {
87 match *self {
88 CommentStyle::DoubleSlash
89 | CommentStyle::TripleSlash
90 | CommentStyle::Custom(..)
91 | CommentStyle::Doc => "",
92 CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
93 " */"
94 }
95 }
96 }
97
line_start(&self) -> &'a str98 pub(crate) fn line_start(&self) -> &'a str {
99 match *self {
100 CommentStyle::DoubleSlash => "// ",
101 CommentStyle::TripleSlash => "/// ",
102 CommentStyle::Doc => "//! ",
103 CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
104 " * "
105 }
106 CommentStyle::Custom(opener) => opener,
107 }
108 }
109
to_str_tuplet(&self) -> (&'a str, &'a str, &'a str)110 pub(crate) fn to_str_tuplet(&self) -> (&'a str, &'a str, &'a str) {
111 (self.opener(), self.closer(), self.line_start())
112 }
113 }
114
comment_style(orig: &str, normalize_comments: bool) -> CommentStyle<'_>115 pub(crate) fn comment_style(orig: &str, normalize_comments: bool) -> CommentStyle<'_> {
116 if !normalize_comments {
117 if orig.starts_with("/**") && !orig.starts_with("/**/") {
118 CommentStyle::DoubleBullet
119 } else if orig.starts_with("/*!") {
120 CommentStyle::Exclamation
121 } else if orig.starts_with("/*") {
122 CommentStyle::SingleBullet
123 } else if orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/') {
124 CommentStyle::TripleSlash
125 } else if orig.starts_with("//!") {
126 CommentStyle::Doc
127 } else if is_custom_comment(orig) {
128 CommentStyle::Custom(custom_opener(orig))
129 } else {
130 CommentStyle::DoubleSlash
131 }
132 } else if (orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/'))
133 || (orig.starts_with("/**") && !orig.starts_with("/**/"))
134 {
135 CommentStyle::TripleSlash
136 } else if orig.starts_with("//!") || orig.starts_with("/*!") {
137 CommentStyle::Doc
138 } else if is_custom_comment(orig) {
139 CommentStyle::Custom(custom_opener(orig))
140 } else {
141 CommentStyle::DoubleSlash
142 }
143 }
144
145 /// Returns true if the last line of the passed string finishes with a block-comment.
is_last_comment_block(s: &str) -> bool146 pub(crate) fn is_last_comment_block(s: &str) -> bool {
147 s.trim_end().ends_with("*/")
148 }
149
150 /// Combine `prev_str` and `next_str` into a single `String`. `span` may contain
151 /// comments between two strings. If there are such comments, then that will be
152 /// recovered. If `allow_extend` is true and there is no comment between the two
153 /// strings, then they will be put on a single line as long as doing so does not
154 /// exceed max width.
combine_strs_with_missing_comments( context: &RewriteContext<'_>, prev_str: &str, next_str: &str, span: Span, shape: Shape, allow_extend: bool, ) -> Option<String>155 pub(crate) fn combine_strs_with_missing_comments(
156 context: &RewriteContext<'_>,
157 prev_str: &str,
158 next_str: &str,
159 span: Span,
160 shape: Shape,
161 allow_extend: bool,
162 ) -> Option<String> {
163 trace!(
164 "combine_strs_with_missing_comments `{}` `{}` {:?} {:?}",
165 prev_str,
166 next_str,
167 span,
168 shape
169 );
170
171 let mut result =
172 String::with_capacity(prev_str.len() + next_str.len() + shape.indent.width() + 128);
173 result.push_str(prev_str);
174 let mut allow_one_line = !prev_str.contains('\n') && !next_str.contains('\n');
175 let first_sep =
176 if prev_str.is_empty() || next_str.is_empty() || trimmed_last_line_width(prev_str) == 0 {
177 ""
178 } else {
179 " "
180 };
181 let mut one_line_width =
182 last_line_width(prev_str) + first_line_width(next_str) + first_sep.len();
183
184 let config = context.config;
185 let indent = shape.indent;
186 let missing_comment = rewrite_missing_comment(span, shape, context)?;
187
188 if missing_comment.is_empty() {
189 if allow_extend && one_line_width <= shape.width {
190 result.push_str(first_sep);
191 } else if !prev_str.is_empty() {
192 result.push_str(&indent.to_string_with_newline(config))
193 }
194 result.push_str(next_str);
195 return Some(result);
196 }
197
198 // We have a missing comment between the first expression and the second expression.
199
200 // Peek the the original source code and find out whether there is a newline between the first
201 // expression and the second expression or the missing comment. We will preserve the original
202 // layout whenever possible.
203 let original_snippet = context.snippet(span);
204 let prefer_same_line = if let Some(pos) = original_snippet.find('/') {
205 !original_snippet[..pos].contains('\n')
206 } else {
207 !original_snippet.contains('\n')
208 };
209
210 one_line_width -= first_sep.len();
211 let first_sep = if prev_str.is_empty() || missing_comment.is_empty() {
212 Cow::from("")
213 } else {
214 let one_line_width = last_line_width(prev_str) + first_line_width(&missing_comment) + 1;
215 if prefer_same_line && one_line_width <= shape.width {
216 Cow::from(" ")
217 } else {
218 indent.to_string_with_newline(config)
219 }
220 };
221 result.push_str(&first_sep);
222 result.push_str(&missing_comment);
223
224 let second_sep = if missing_comment.is_empty() || next_str.is_empty() {
225 Cow::from("")
226 } else if missing_comment.starts_with("//") {
227 indent.to_string_with_newline(config)
228 } else {
229 one_line_width += missing_comment.len() + first_sep.len() + 1;
230 allow_one_line &= !missing_comment.starts_with("//") && !missing_comment.contains('\n');
231 if prefer_same_line && allow_one_line && one_line_width <= shape.width {
232 Cow::from(" ")
233 } else {
234 indent.to_string_with_newline(config)
235 }
236 };
237 result.push_str(&second_sep);
238 result.push_str(next_str);
239
240 Some(result)
241 }
242
rewrite_doc_comment(orig: &str, shape: Shape, config: &Config) -> Option<String>243 pub(crate) fn rewrite_doc_comment(orig: &str, shape: Shape, config: &Config) -> Option<String> {
244 identify_comment(orig, false, shape, config, true)
245 }
246
rewrite_comment( orig: &str, block_style: bool, shape: Shape, config: &Config, ) -> Option<String>247 pub(crate) fn rewrite_comment(
248 orig: &str,
249 block_style: bool,
250 shape: Shape,
251 config: &Config,
252 ) -> Option<String> {
253 identify_comment(orig, block_style, shape, config, false)
254 }
255
identify_comment( orig: &str, block_style: bool, shape: Shape, config: &Config, is_doc_comment: bool, ) -> Option<String>256 fn identify_comment(
257 orig: &str,
258 block_style: bool,
259 shape: Shape,
260 config: &Config,
261 is_doc_comment: bool,
262 ) -> Option<String> {
263 let style = comment_style(orig, false);
264
265 // Computes the byte length of line taking into account a newline if the line is part of a
266 // paragraph.
267 fn compute_len(orig: &str, line: &str) -> usize {
268 if orig.len() > line.len() {
269 if orig.as_bytes()[line.len()] == b'\r' {
270 line.len() + 2
271 } else {
272 line.len() + 1
273 }
274 } else {
275 line.len()
276 }
277 }
278
279 // Get the first group of line comments having the same commenting style.
280 //
281 // Returns a tuple with:
282 // - a boolean indicating if there is a blank line
283 // - a number indicating the size of the first group of comments
284 fn consume_same_line_comments(
285 style: CommentStyle<'_>,
286 orig: &str,
287 line_start: &str,
288 ) -> (bool, usize) {
289 let mut first_group_ending = 0;
290 let mut hbl = false;
291
292 for line in orig.lines() {
293 let trimmed_line = line.trim_start();
294 if trimmed_line.is_empty() {
295 hbl = true;
296 break;
297 } else if trimmed_line.starts_with(line_start)
298 || comment_style(trimmed_line, false) == style
299 {
300 first_group_ending += compute_len(&orig[first_group_ending..], line);
301 } else {
302 break;
303 }
304 }
305 (hbl, first_group_ending)
306 }
307
308 let (has_bare_lines, first_group_ending) = match style {
309 CommentStyle::DoubleSlash | CommentStyle::TripleSlash | CommentStyle::Doc => {
310 let line_start = style.line_start().trim_start();
311 consume_same_line_comments(style, orig, line_start)
312 }
313 CommentStyle::Custom(opener) => {
314 let trimmed_opener = opener.trim_end();
315 consume_same_line_comments(style, orig, trimmed_opener)
316 }
317 // for a block comment, search for the closing symbol
318 CommentStyle::DoubleBullet | CommentStyle::SingleBullet | CommentStyle::Exclamation => {
319 let closer = style.closer().trim_start();
320 let mut count = orig.matches(closer).count();
321 let mut closing_symbol_offset = 0;
322 let mut hbl = false;
323 let mut first = true;
324 for line in orig.lines() {
325 closing_symbol_offset += compute_len(&orig[closing_symbol_offset..], line);
326 let mut trimmed_line = line.trim_start();
327 if !trimmed_line.starts_with('*')
328 && !trimmed_line.starts_with("//")
329 && !trimmed_line.starts_with("/*")
330 {
331 hbl = true;
332 }
333
334 // Remove opener from consideration when searching for closer
335 if first {
336 let opener = style.opener().trim_end();
337 trimmed_line = &trimmed_line[opener.len()..];
338 first = false;
339 }
340 if trimmed_line.ends_with(closer) {
341 count -= 1;
342 if count == 0 {
343 break;
344 }
345 }
346 }
347 (hbl, closing_symbol_offset)
348 }
349 };
350
351 let (first_group, rest) = orig.split_at(first_group_ending);
352 let rewritten_first_group =
353 if !config.normalize_comments() && has_bare_lines && style.is_block_comment() {
354 trim_left_preserve_layout(first_group, shape.indent, config)?
355 } else if !config.normalize_comments()
356 && !config.wrap_comments()
357 && !config.format_code_in_doc_comments()
358 {
359 light_rewrite_comment(first_group, shape.indent, config, is_doc_comment)
360 } else {
361 rewrite_comment_inner(
362 first_group,
363 block_style,
364 style,
365 shape,
366 config,
367 is_doc_comment || style.is_doc_comment(),
368 )?
369 };
370 if rest.is_empty() {
371 Some(rewritten_first_group)
372 } else {
373 identify_comment(
374 rest.trim_start(),
375 block_style,
376 shape,
377 config,
378 is_doc_comment,
379 )
380 .map(|rest_str| {
381 format!(
382 "{}\n{}{}{}",
383 rewritten_first_group,
384 // insert back the blank line
385 if has_bare_lines && style.is_line_comment() {
386 "\n"
387 } else {
388 ""
389 },
390 shape.indent.to_string(config),
391 rest_str
392 )
393 })
394 }
395 }
396
397 /// Enum indicating if the code block contains rust based on attributes
398 enum CodeBlockAttribute {
399 Rust,
400 NotRust,
401 }
402
403 impl CodeBlockAttribute {
404 /// Parse comma separated attributes list. Return rust only if all
405 /// attributes are valid rust attributes
406 /// See <https://doc.rust-lang.org/rustdoc/print.html#attributes>
new(attributes: &str) -> CodeBlockAttribute407 fn new(attributes: &str) -> CodeBlockAttribute {
408 for attribute in attributes.split(',') {
409 match attribute.trim() {
410 "" | "rust" | "should_panic" | "no_run" | "edition2015" | "edition2018"
411 | "edition2021" => (),
412 "ignore" | "compile_fail" | "text" => return CodeBlockAttribute::NotRust,
413 _ => return CodeBlockAttribute::NotRust,
414 }
415 }
416 CodeBlockAttribute::Rust
417 }
418 }
419
420 /// Block that is formatted as an item.
421 ///
422 /// An item starts with either a star `*` or a dash `-`. Different level of indentation are
423 /// handled by shrinking the shape accordingly.
424 struct ItemizedBlock {
425 /// the lines that are identified as part of an itemized block
426 lines: Vec<String>,
427 /// the number of whitespaces up to the item sigil
428 indent: usize,
429 /// the string that marks the start of an item
430 opener: String,
431 /// sequence of whitespaces to prefix new lines that are part of the item
432 line_start: String,
433 }
434
435 impl ItemizedBlock {
436 /// Returns `true` if the line is formatted as an item
is_itemized_line(line: &str) -> bool437 fn is_itemized_line(line: &str) -> bool {
438 let trimmed = line.trim_start();
439 trimmed.starts_with("* ") || trimmed.starts_with("- ")
440 }
441
442 /// Creates a new ItemizedBlock described with the given line.
443 /// The `is_itemized_line` needs to be called first.
new(line: &str) -> ItemizedBlock444 fn new(line: &str) -> ItemizedBlock {
445 let space_to_sigil = line.chars().take_while(|c| c.is_whitespace()).count();
446 let indent = space_to_sigil + 2;
447 ItemizedBlock {
448 lines: vec![line[indent..].to_string()],
449 indent,
450 opener: line[..indent].to_string(),
451 line_start: " ".repeat(indent),
452 }
453 }
454
455 /// Returns a `StringFormat` used for formatting the content of an item.
create_string_format<'a>(&'a self, fmt: &'a StringFormat<'_>) -> StringFormat<'a>456 fn create_string_format<'a>(&'a self, fmt: &'a StringFormat<'_>) -> StringFormat<'a> {
457 StringFormat {
458 opener: "",
459 closer: "",
460 line_start: "",
461 line_end: "",
462 shape: Shape::legacy(fmt.shape.width.saturating_sub(self.indent), Indent::empty()),
463 trim_end: true,
464 config: fmt.config,
465 }
466 }
467
468 /// Returns `true` if the line is part of the current itemized block.
469 /// If it is, then it is added to the internal lines list.
add_line(&mut self, line: &str) -> bool470 fn add_line(&mut self, line: &str) -> bool {
471 if !ItemizedBlock::is_itemized_line(line)
472 && self.indent <= line.chars().take_while(|c| c.is_whitespace()).count()
473 {
474 self.lines.push(line.to_string());
475 return true;
476 }
477 false
478 }
479
480 /// Returns the block as a string, with each line trimmed at the start.
trimmed_block_as_string(&self) -> String481 fn trimmed_block_as_string(&self) -> String {
482 self.lines
483 .iter()
484 .map(|line| format!("{} ", line.trim_start()))
485 .collect::<String>()
486 }
487
488 /// Returns the block as a string under its original form.
original_block_as_string(&self) -> String489 fn original_block_as_string(&self) -> String {
490 self.lines.join("\n")
491 }
492 }
493
494 struct CommentRewrite<'a> {
495 result: String,
496 code_block_buffer: String,
497 is_prev_line_multi_line: bool,
498 code_block_attr: Option<CodeBlockAttribute>,
499 item_block: Option<ItemizedBlock>,
500 comment_line_separator: String,
501 indent_str: String,
502 max_width: usize,
503 fmt_indent: Indent,
504 fmt: StringFormat<'a>,
505
506 opener: String,
507 closer: String,
508 line_start: String,
509 }
510
511 impl<'a> CommentRewrite<'a> {
new( orig: &'a str, block_style: bool, shape: Shape, config: &'a Config, ) -> CommentRewrite<'a>512 fn new(
513 orig: &'a str,
514 block_style: bool,
515 shape: Shape,
516 config: &'a Config,
517 ) -> CommentRewrite<'a> {
518 let (opener, closer, line_start) = if block_style {
519 CommentStyle::SingleBullet.to_str_tuplet()
520 } else {
521 comment_style(orig, config.normalize_comments()).to_str_tuplet()
522 };
523
524 let max_width = shape
525 .width
526 .checked_sub(closer.len() + opener.len())
527 .unwrap_or(1);
528 let indent_str = shape.indent.to_string_with_newline(config).to_string();
529
530 let mut cr = CommentRewrite {
531 result: String::with_capacity(orig.len() * 2),
532 code_block_buffer: String::with_capacity(128),
533 is_prev_line_multi_line: false,
534 code_block_attr: None,
535 item_block: None,
536 comment_line_separator: format!("{}{}", indent_str, line_start),
537 max_width,
538 indent_str,
539 fmt_indent: shape.indent,
540
541 fmt: StringFormat {
542 opener: "",
543 closer: "",
544 line_start,
545 line_end: "",
546 shape: Shape::legacy(max_width, shape.indent),
547 trim_end: true,
548 config,
549 },
550
551 opener: opener.to_owned(),
552 closer: closer.to_owned(),
553 line_start: line_start.to_owned(),
554 };
555 cr.result.push_str(opener);
556 cr
557 }
558
join_block(s: &str, sep: &str) -> String559 fn join_block(s: &str, sep: &str) -> String {
560 let mut result = String::with_capacity(s.len() + 128);
561 let mut iter = s.lines().peekable();
562 while let Some(line) = iter.next() {
563 result.push_str(line);
564 result.push_str(match iter.peek() {
565 Some(next_line) if next_line.is_empty() => sep.trim_end(),
566 Some(..) => sep,
567 None => "",
568 });
569 }
570 result
571 }
572
finish(mut self) -> String573 fn finish(mut self) -> String {
574 if !self.code_block_buffer.is_empty() {
575 // There is a code block that is not properly enclosed by backticks.
576 // We will leave them untouched.
577 self.result.push_str(&self.comment_line_separator);
578 self.result.push_str(&Self::join_block(
579 &trim_custom_comment_prefix(&self.code_block_buffer),
580 &self.comment_line_separator,
581 ));
582 }
583
584 if let Some(ref ib) = self.item_block {
585 // the last few lines are part of an itemized block
586 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
587 let item_fmt = ib.create_string_format(&self.fmt);
588 self.result.push_str(&self.comment_line_separator);
589 self.result.push_str(&ib.opener);
590 match rewrite_string(
591 &ib.trimmed_block_as_string(),
592 &item_fmt,
593 self.max_width.saturating_sub(ib.indent),
594 ) {
595 Some(s) => self.result.push_str(&Self::join_block(
596 &s,
597 &format!("{}{}", self.comment_line_separator, ib.line_start),
598 )),
599 None => self.result.push_str(&Self::join_block(
600 &ib.original_block_as_string(),
601 &self.comment_line_separator,
602 )),
603 };
604 }
605
606 self.result.push_str(&self.closer);
607 if self.result.ends_with(&self.opener) && self.opener.ends_with(' ') {
608 // Trailing space.
609 self.result.pop();
610 }
611
612 self.result
613 }
614
handle_line( &mut self, orig: &'a str, i: usize, line: &'a str, has_leading_whitespace: bool, ) -> bool615 fn handle_line(
616 &mut self,
617 orig: &'a str,
618 i: usize,
619 line: &'a str,
620 has_leading_whitespace: bool,
621 ) -> bool {
622 let is_last = i == count_newlines(orig);
623
624 if let Some(ref mut ib) = self.item_block {
625 if ib.add_line(line) {
626 return false;
627 }
628 self.is_prev_line_multi_line = false;
629 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
630 let item_fmt = ib.create_string_format(&self.fmt);
631 self.result.push_str(&self.comment_line_separator);
632 self.result.push_str(&ib.opener);
633 match rewrite_string(
634 &ib.trimmed_block_as_string(),
635 &item_fmt,
636 self.max_width.saturating_sub(ib.indent),
637 ) {
638 Some(s) => self.result.push_str(&Self::join_block(
639 &s,
640 &format!("{}{}", self.comment_line_separator, ib.line_start),
641 )),
642 None => self.result.push_str(&Self::join_block(
643 &ib.original_block_as_string(),
644 &self.comment_line_separator,
645 )),
646 };
647 } else if self.code_block_attr.is_some() {
648 if line.starts_with("```") {
649 let code_block = match self.code_block_attr.as_ref().unwrap() {
650 CodeBlockAttribute::Rust
651 if self.fmt.config.format_code_in_doc_comments()
652 && !self.code_block_buffer.is_empty() =>
653 {
654 let mut config = self.fmt.config.clone();
655 config.set().wrap_comments(false);
656 if let Some(s) =
657 crate::format_code_block(&self.code_block_buffer, &config, false)
658 {
659 trim_custom_comment_prefix(&s.snippet)
660 } else {
661 trim_custom_comment_prefix(&self.code_block_buffer)
662 }
663 }
664 _ => trim_custom_comment_prefix(&self.code_block_buffer),
665 };
666 if !code_block.is_empty() {
667 self.result.push_str(&self.comment_line_separator);
668 self.result
669 .push_str(&Self::join_block(&code_block, &self.comment_line_separator));
670 }
671 self.code_block_buffer.clear();
672 self.result.push_str(&self.comment_line_separator);
673 self.result.push_str(line);
674 self.code_block_attr = None;
675 } else {
676 self.code_block_buffer
677 .push_str(&hide_sharp_behind_comment(line));
678 self.code_block_buffer.push('\n');
679 }
680 return false;
681 }
682
683 self.code_block_attr = None;
684 self.item_block = None;
685 if let Some(stripped) = line.strip_prefix("```") {
686 self.code_block_attr = Some(CodeBlockAttribute::new(stripped))
687 } else if self.fmt.config.wrap_comments() && ItemizedBlock::is_itemized_line(line) {
688 let ib = ItemizedBlock::new(line);
689 self.item_block = Some(ib);
690 return false;
691 }
692
693 if self.result == self.opener {
694 let force_leading_whitespace = &self.opener == "/* " && count_newlines(orig) == 0;
695 if !has_leading_whitespace && !force_leading_whitespace && self.result.ends_with(' ') {
696 self.result.pop();
697 }
698 if line.is_empty() {
699 return false;
700 }
701 } else if self.is_prev_line_multi_line && !line.is_empty() {
702 self.result.push(' ')
703 } else if is_last && line.is_empty() {
704 // trailing blank lines are unwanted
705 if !self.closer.is_empty() {
706 self.result.push_str(&self.indent_str);
707 }
708 return true;
709 } else {
710 self.result.push_str(&self.comment_line_separator);
711 if !has_leading_whitespace && self.result.ends_with(' ') {
712 self.result.pop();
713 }
714 }
715
716 if self.fmt.config.wrap_comments()
717 && unicode_str_width(line) > self.fmt.shape.width
718 && !has_url(line)
719 {
720 match rewrite_string(line, &self.fmt, self.max_width) {
721 Some(ref s) => {
722 self.is_prev_line_multi_line = s.contains('\n');
723 self.result.push_str(s);
724 }
725 None if self.is_prev_line_multi_line => {
726 // We failed to put the current `line` next to the previous `line`.
727 // Remove the trailing space, then start rewrite on the next line.
728 self.result.pop();
729 self.result.push_str(&self.comment_line_separator);
730 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
731 match rewrite_string(line, &self.fmt, self.max_width) {
732 Some(ref s) => {
733 self.is_prev_line_multi_line = s.contains('\n');
734 self.result.push_str(s);
735 }
736 None => {
737 self.is_prev_line_multi_line = false;
738 self.result.push_str(line);
739 }
740 }
741 }
742 None => {
743 self.is_prev_line_multi_line = false;
744 self.result.push_str(line);
745 }
746 }
747
748 self.fmt.shape = if self.is_prev_line_multi_line {
749 // 1 = " "
750 let offset = 1 + last_line_width(&self.result) - self.line_start.len();
751 Shape {
752 width: self.max_width.saturating_sub(offset),
753 indent: self.fmt_indent,
754 offset: self.fmt.shape.offset + offset,
755 }
756 } else {
757 Shape::legacy(self.max_width, self.fmt_indent)
758 };
759 } else {
760 if line.is_empty() && self.result.ends_with(' ') && !is_last {
761 // Remove space if this is an empty comment or a doc comment.
762 self.result.pop();
763 }
764 self.result.push_str(line);
765 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
766 self.is_prev_line_multi_line = false;
767 }
768
769 false
770 }
771 }
772
rewrite_comment_inner( orig: &str, block_style: bool, style: CommentStyle<'_>, shape: Shape, config: &Config, is_doc_comment: bool, ) -> Option<String>773 fn rewrite_comment_inner(
774 orig: &str,
775 block_style: bool,
776 style: CommentStyle<'_>,
777 shape: Shape,
778 config: &Config,
779 is_doc_comment: bool,
780 ) -> Option<String> {
781 let mut rewriter = CommentRewrite::new(orig, block_style, shape, config);
782
783 let line_breaks = count_newlines(orig.trim_end());
784 let lines = orig
785 .lines()
786 .enumerate()
787 .map(|(i, mut line)| {
788 line = trim_end_unless_two_whitespaces(line.trim_start(), is_doc_comment);
789 // Drop old closer.
790 if i == line_breaks && line.ends_with("*/") && !line.starts_with("//") {
791 line = line[..(line.len() - 2)].trim_end();
792 }
793
794 line
795 })
796 .map(|s| left_trim_comment_line(s, &style))
797 .map(|(line, has_leading_whitespace)| {
798 if orig.starts_with("/*") && line_breaks == 0 {
799 (
800 line.trim_start(),
801 has_leading_whitespace || config.normalize_comments(),
802 )
803 } else {
804 (line, has_leading_whitespace || config.normalize_comments())
805 }
806 });
807
808 for (i, (line, has_leading_whitespace)) in lines.enumerate() {
809 if rewriter.handle_line(orig, i, line, has_leading_whitespace) {
810 break;
811 }
812 }
813
814 Some(rewriter.finish())
815 }
816
817 const RUSTFMT_CUSTOM_COMMENT_PREFIX: &str = "//#### ";
818
hide_sharp_behind_comment(s: &str) -> Cow<'_, str>819 fn hide_sharp_behind_comment(s: &str) -> Cow<'_, str> {
820 let s_trimmed = s.trim();
821 if s_trimmed.starts_with("# ") || s_trimmed == "#" {
822 Cow::from(format!("{}{}", RUSTFMT_CUSTOM_COMMENT_PREFIX, s))
823 } else {
824 Cow::from(s)
825 }
826 }
827
trim_custom_comment_prefix(s: &str) -> String828 fn trim_custom_comment_prefix(s: &str) -> String {
829 s.lines()
830 .map(|line| {
831 let left_trimmed = line.trim_start();
832 if left_trimmed.starts_with(RUSTFMT_CUSTOM_COMMENT_PREFIX) {
833 left_trimmed.trim_start_matches(RUSTFMT_CUSTOM_COMMENT_PREFIX)
834 } else {
835 line
836 }
837 })
838 .collect::<Vec<_>>()
839 .join("\n")
840 }
841
842 /// Returns `true` if the given string MAY include URLs or alike.
has_url(s: &str) -> bool843 fn has_url(s: &str) -> bool {
844 // This function may return false positive, but should get its job done in most cases.
845 s.contains("https://") || s.contains("http://") || s.contains("ftp://") || s.contains("file://")
846 }
847
848 /// Given the span, rewrite the missing comment inside it if available.
849 /// Note that the given span must only include comments (or leading/trailing whitespaces).
rewrite_missing_comment( span: Span, shape: Shape, context: &RewriteContext<'_>, ) -> Option<String>850 pub(crate) fn rewrite_missing_comment(
851 span: Span,
852 shape: Shape,
853 context: &RewriteContext<'_>,
854 ) -> Option<String> {
855 let missing_snippet = context.snippet(span);
856 let trimmed_snippet = missing_snippet.trim();
857 // check the span starts with a comment
858 let pos = trimmed_snippet.find('/');
859 if !trimmed_snippet.is_empty() && pos.is_some() {
860 rewrite_comment(trimmed_snippet, false, shape, context.config)
861 } else {
862 Some(String::new())
863 }
864 }
865
866 /// Recover the missing comments in the specified span, if available.
867 /// The layout of the comments will be preserved as long as it does not break the code
868 /// and its total width does not exceed the max width.
recover_missing_comment_in_span( span: Span, shape: Shape, context: &RewriteContext<'_>, used_width: usize, ) -> Option<String>869 pub(crate) fn recover_missing_comment_in_span(
870 span: Span,
871 shape: Shape,
872 context: &RewriteContext<'_>,
873 used_width: usize,
874 ) -> Option<String> {
875 let missing_comment = rewrite_missing_comment(span, shape, context)?;
876 if missing_comment.is_empty() {
877 Some(String::new())
878 } else {
879 let missing_snippet = context.snippet(span);
880 let pos = missing_snippet.find('/')?;
881 // 1 = ` `
882 let total_width = missing_comment.len() + used_width + 1;
883 let force_new_line_before_comment =
884 missing_snippet[..pos].contains('\n') || total_width > context.config.max_width();
885 let sep = if force_new_line_before_comment {
886 shape.indent.to_string_with_newline(context.config)
887 } else {
888 Cow::from(" ")
889 };
890 Some(format!("{}{}", sep, missing_comment))
891 }
892 }
893
894 /// Trim trailing whitespaces unless they consist of two or more whitespaces.
trim_end_unless_two_whitespaces(s: &str, is_doc_comment: bool) -> &str895 fn trim_end_unless_two_whitespaces(s: &str, is_doc_comment: bool) -> &str {
896 if is_doc_comment && s.ends_with(" ") {
897 s
898 } else {
899 s.trim_end()
900 }
901 }
902
903 /// Trims whitespace and aligns to indent, but otherwise does not change comments.
light_rewrite_comment( orig: &str, offset: Indent, config: &Config, is_doc_comment: bool, ) -> String904 fn light_rewrite_comment(
905 orig: &str,
906 offset: Indent,
907 config: &Config,
908 is_doc_comment: bool,
909 ) -> String {
910 let lines: Vec<&str> = orig
911 .lines()
912 .map(|l| {
913 // This is basically just l.trim(), but in the case that a line starts
914 // with `*` we want to leave one space before it, so it aligns with the
915 // `*` in `/*`.
916 let first_non_whitespace = l.find(|c| !char::is_whitespace(c));
917 let left_trimmed = if let Some(fnw) = first_non_whitespace {
918 if l.as_bytes()[fnw] == b'*' && fnw > 0 {
919 &l[fnw - 1..]
920 } else {
921 &l[fnw..]
922 }
923 } else {
924 ""
925 };
926 // Preserve markdown's double-space line break syntax in doc comment.
927 trim_end_unless_two_whitespaces(left_trimmed, is_doc_comment)
928 })
929 .collect();
930 lines.join(&format!("\n{}", offset.to_string(config)))
931 }
932
933 /// Trims comment characters and possibly a single space from the left of a string.
934 /// Does not trim all whitespace. If a single space is trimmed from the left of the string,
935 /// this function returns true.
left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle<'_>) -> (&'a str, bool)936 fn left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle<'_>) -> (&'a str, bool) {
937 if line.starts_with("//! ")
938 || line.starts_with("/// ")
939 || line.starts_with("/*! ")
940 || line.starts_with("/** ")
941 {
942 (&line[4..], true)
943 } else if let CommentStyle::Custom(opener) = *style {
944 if let Some(stripped) = line.strip_prefix(opener) {
945 (stripped, true)
946 } else {
947 (&line[opener.trim_end().len()..], false)
948 }
949 } else if line.starts_with("/* ")
950 || line.starts_with("// ")
951 || line.starts_with("//!")
952 || line.starts_with("///")
953 || line.starts_with("** ")
954 || line.starts_with("/*!")
955 || (line.starts_with("/**") && !line.starts_with("/**/"))
956 {
957 (&line[3..], line.chars().nth(2).unwrap() == ' ')
958 } else if line.starts_with("/*")
959 || line.starts_with("* ")
960 || line.starts_with("//")
961 || line.starts_with("**")
962 {
963 (&line[2..], line.chars().nth(1).unwrap() == ' ')
964 } else if let Some(stripped) = line.strip_prefix('*') {
965 (stripped, false)
966 } else {
967 (line, line.starts_with(' '))
968 }
969 }
970
971 pub(crate) trait FindUncommented {
find_uncommented(&self, pat: &str) -> Option<usize>972 fn find_uncommented(&self, pat: &str) -> Option<usize>;
find_last_uncommented(&self, pat: &str) -> Option<usize>973 fn find_last_uncommented(&self, pat: &str) -> Option<usize>;
974 }
975
976 impl FindUncommented for str {
find_uncommented(&self, pat: &str) -> Option<usize>977 fn find_uncommented(&self, pat: &str) -> Option<usize> {
978 let mut needle_iter = pat.chars();
979 for (kind, (i, b)) in CharClasses::new(self.char_indices()) {
980 match needle_iter.next() {
981 None => {
982 return Some(i - pat.len());
983 }
984 Some(c) => match kind {
985 FullCodeCharKind::Normal | FullCodeCharKind::InString if b == c => {}
986 _ => {
987 needle_iter = pat.chars();
988 }
989 },
990 }
991 }
992
993 // Handle case where the pattern is a suffix of the search string
994 match needle_iter.next() {
995 Some(_) => None,
996 None => Some(self.len() - pat.len()),
997 }
998 }
999
find_last_uncommented(&self, pat: &str) -> Option<usize>1000 fn find_last_uncommented(&self, pat: &str) -> Option<usize> {
1001 if let Some(left) = self.find_uncommented(pat) {
1002 let mut result = left;
1003 // add 1 to use find_last_uncommented for &str after pat
1004 while let Some(next) = self[(result + 1)..].find_last_uncommented(pat) {
1005 result += next + 1;
1006 }
1007 Some(result)
1008 } else {
1009 None
1010 }
1011 }
1012 }
1013
1014 // Returns the first byte position after the first comment. The given string
1015 // is expected to be prefixed by a comment, including delimiters.
1016 // Good: `/* /* inner */ outer */ code();`
1017 // Bad: `code(); // hello\n world!`
find_comment_end(s: &str) -> Option<usize>1018 pub(crate) fn find_comment_end(s: &str) -> Option<usize> {
1019 let mut iter = CharClasses::new(s.char_indices());
1020 for (kind, (i, _c)) in &mut iter {
1021 if kind == FullCodeCharKind::Normal || kind == FullCodeCharKind::InString {
1022 return Some(i);
1023 }
1024 }
1025
1026 // Handle case where the comment ends at the end of `s`.
1027 if iter.status == CharClassesStatus::Normal {
1028 Some(s.len())
1029 } else {
1030 None
1031 }
1032 }
1033
1034 /// Returns `true` if text contains any comment.
contains_comment(text: &str) -> bool1035 pub(crate) fn contains_comment(text: &str) -> bool {
1036 CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
1037 }
1038
1039 pub(crate) struct CharClasses<T>
1040 where
1041 T: Iterator,
1042 T::Item: RichChar,
1043 {
1044 base: MultiPeek<T>,
1045 status: CharClassesStatus,
1046 }
1047
1048 pub(crate) trait RichChar {
get_char(&self) -> char1049 fn get_char(&self) -> char;
1050 }
1051
1052 impl RichChar for char {
get_char(&self) -> char1053 fn get_char(&self) -> char {
1054 *self
1055 }
1056 }
1057
1058 impl RichChar for (usize, char) {
get_char(&self) -> char1059 fn get_char(&self) -> char {
1060 self.1
1061 }
1062 }
1063
1064 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
1065 enum CharClassesStatus {
1066 Normal,
1067 /// Character is within a string
1068 LitString,
1069 LitStringEscape,
1070 /// Character is within a raw string
1071 LitRawString(u32),
1072 RawStringPrefix(u32),
1073 RawStringSuffix(u32),
1074 LitChar,
1075 LitCharEscape,
1076 /// Character inside a block comment, with the integer indicating the nesting deepness of the
1077 /// comment
1078 BlockComment(u32),
1079 /// Character inside a block-commented string, with the integer indicating the nesting deepness
1080 /// of the comment
1081 StringInBlockComment(u32),
1082 /// Status when the '/' has been consumed, but not yet the '*', deepness is
1083 /// the new deepness (after the comment opening).
1084 BlockCommentOpening(u32),
1085 /// Status when the '*' has been consumed, but not yet the '/', deepness is
1086 /// the new deepness (after the comment closing).
1087 BlockCommentClosing(u32),
1088 /// Character is within a line comment
1089 LineComment,
1090 }
1091
1092 /// Distinguish between functional part of code and comments
1093 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
1094 pub(crate) enum CodeCharKind {
1095 Normal,
1096 Comment,
1097 }
1098
1099 /// Distinguish between functional part of code and comments,
1100 /// describing opening and closing of comments for ease when chunking
1101 /// code from tagged characters
1102 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
1103 pub(crate) enum FullCodeCharKind {
1104 Normal,
1105 /// The first character of a comment, there is only one for a comment (always '/')
1106 StartComment,
1107 /// Any character inside a comment including the second character of comment
1108 /// marks ("//", "/*")
1109 InComment,
1110 /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
1111 EndComment,
1112 /// Start of a mutlitine string inside a comment
1113 StartStringCommented,
1114 /// End of a mutlitine string inside a comment
1115 EndStringCommented,
1116 /// Inside a commented string
1117 InStringCommented,
1118 /// Start of a mutlitine string
1119 StartString,
1120 /// End of a mutlitine string
1121 EndString,
1122 /// Inside a string.
1123 InString,
1124 }
1125
1126 impl FullCodeCharKind {
is_comment(self) -> bool1127 pub(crate) fn is_comment(self) -> bool {
1128 match self {
1129 FullCodeCharKind::StartComment
1130 | FullCodeCharKind::InComment
1131 | FullCodeCharKind::EndComment
1132 | FullCodeCharKind::StartStringCommented
1133 | FullCodeCharKind::InStringCommented
1134 | FullCodeCharKind::EndStringCommented => true,
1135 _ => false,
1136 }
1137 }
1138
1139 /// Returns true if the character is inside a comment
inside_comment(self) -> bool1140 pub(crate) fn inside_comment(self) -> bool {
1141 match self {
1142 FullCodeCharKind::InComment
1143 | FullCodeCharKind::StartStringCommented
1144 | FullCodeCharKind::InStringCommented
1145 | FullCodeCharKind::EndStringCommented => true,
1146 _ => false,
1147 }
1148 }
1149
is_string(self) -> bool1150 pub(crate) fn is_string(self) -> bool {
1151 self == FullCodeCharKind::InString || self == FullCodeCharKind::StartString
1152 }
1153
1154 /// Returns true if the character is within a commented string
is_commented_string(self) -> bool1155 pub(crate) fn is_commented_string(self) -> bool {
1156 self == FullCodeCharKind::InStringCommented
1157 || self == FullCodeCharKind::StartStringCommented
1158 }
1159
to_codecharkind(self) -> CodeCharKind1160 fn to_codecharkind(self) -> CodeCharKind {
1161 if self.is_comment() {
1162 CodeCharKind::Comment
1163 } else {
1164 CodeCharKind::Normal
1165 }
1166 }
1167 }
1168
1169 impl<T> CharClasses<T>
1170 where
1171 T: Iterator,
1172 T::Item: RichChar,
1173 {
new(base: T) -> CharClasses<T>1174 pub(crate) fn new(base: T) -> CharClasses<T> {
1175 CharClasses {
1176 base: multipeek(base),
1177 status: CharClassesStatus::Normal,
1178 }
1179 }
1180 }
1181
is_raw_string_suffix<T>(iter: &mut MultiPeek<T>, count: u32) -> bool where T: Iterator, T::Item: RichChar,1182 fn is_raw_string_suffix<T>(iter: &mut MultiPeek<T>, count: u32) -> bool
1183 where
1184 T: Iterator,
1185 T::Item: RichChar,
1186 {
1187 for _ in 0..count {
1188 match iter.peek() {
1189 Some(c) if c.get_char() == '#' => continue,
1190 _ => return false,
1191 }
1192 }
1193 true
1194 }
1195
1196 impl<T> Iterator for CharClasses<T>
1197 where
1198 T: Iterator,
1199 T::Item: RichChar,
1200 {
1201 type Item = (FullCodeCharKind, T::Item);
1202
next(&mut self) -> Option<(FullCodeCharKind, T::Item)>1203 fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
1204 let item = self.base.next()?;
1205 let chr = item.get_char();
1206 let mut char_kind = FullCodeCharKind::Normal;
1207 self.status = match self.status {
1208 CharClassesStatus::LitRawString(sharps) => {
1209 char_kind = FullCodeCharKind::InString;
1210 match chr {
1211 '"' => {
1212 if sharps == 0 {
1213 char_kind = FullCodeCharKind::Normal;
1214 CharClassesStatus::Normal
1215 } else if is_raw_string_suffix(&mut self.base, sharps) {
1216 CharClassesStatus::RawStringSuffix(sharps)
1217 } else {
1218 CharClassesStatus::LitRawString(sharps)
1219 }
1220 }
1221 _ => CharClassesStatus::LitRawString(sharps),
1222 }
1223 }
1224 CharClassesStatus::RawStringPrefix(sharps) => {
1225 char_kind = FullCodeCharKind::InString;
1226 match chr {
1227 '#' => CharClassesStatus::RawStringPrefix(sharps + 1),
1228 '"' => CharClassesStatus::LitRawString(sharps),
1229 _ => CharClassesStatus::Normal, // Unreachable.
1230 }
1231 }
1232 CharClassesStatus::RawStringSuffix(sharps) => {
1233 match chr {
1234 '#' => {
1235 if sharps == 1 {
1236 CharClassesStatus::Normal
1237 } else {
1238 char_kind = FullCodeCharKind::InString;
1239 CharClassesStatus::RawStringSuffix(sharps - 1)
1240 }
1241 }
1242 _ => CharClassesStatus::Normal, // Unreachable
1243 }
1244 }
1245 CharClassesStatus::LitString => {
1246 char_kind = FullCodeCharKind::InString;
1247 match chr {
1248 '"' => CharClassesStatus::Normal,
1249 '\\' => CharClassesStatus::LitStringEscape,
1250 _ => CharClassesStatus::LitString,
1251 }
1252 }
1253 CharClassesStatus::LitStringEscape => {
1254 char_kind = FullCodeCharKind::InString;
1255 CharClassesStatus::LitString
1256 }
1257 CharClassesStatus::LitChar => match chr {
1258 '\\' => CharClassesStatus::LitCharEscape,
1259 '\'' => CharClassesStatus::Normal,
1260 _ => CharClassesStatus::LitChar,
1261 },
1262 CharClassesStatus::LitCharEscape => CharClassesStatus::LitChar,
1263 CharClassesStatus::Normal => match chr {
1264 'r' => match self.base.peek().map(RichChar::get_char) {
1265 Some('#') | Some('"') => {
1266 char_kind = FullCodeCharKind::InString;
1267 CharClassesStatus::RawStringPrefix(0)
1268 }
1269 _ => CharClassesStatus::Normal,
1270 },
1271 '"' => {
1272 char_kind = FullCodeCharKind::InString;
1273 CharClassesStatus::LitString
1274 }
1275 '\'' => {
1276 // HACK: Work around mut borrow.
1277 match self.base.peek() {
1278 Some(next) if next.get_char() == '\\' => {
1279 self.status = CharClassesStatus::LitChar;
1280 return Some((char_kind, item));
1281 }
1282 _ => (),
1283 }
1284
1285 match self.base.peek() {
1286 Some(next) if next.get_char() == '\'' => CharClassesStatus::LitChar,
1287 _ => CharClassesStatus::Normal,
1288 }
1289 }
1290 '/' => match self.base.peek() {
1291 Some(next) if next.get_char() == '*' => {
1292 self.status = CharClassesStatus::BlockCommentOpening(1);
1293 return Some((FullCodeCharKind::StartComment, item));
1294 }
1295 Some(next) if next.get_char() == '/' => {
1296 self.status = CharClassesStatus::LineComment;
1297 return Some((FullCodeCharKind::StartComment, item));
1298 }
1299 _ => CharClassesStatus::Normal,
1300 },
1301 _ => CharClassesStatus::Normal,
1302 },
1303 CharClassesStatus::StringInBlockComment(deepness) => {
1304 char_kind = FullCodeCharKind::InStringCommented;
1305 if chr == '"' {
1306 CharClassesStatus::BlockComment(deepness)
1307 } else if chr == '*' && self.base.peek().map(RichChar::get_char) == Some('/') {
1308 char_kind = FullCodeCharKind::InComment;
1309 CharClassesStatus::BlockCommentClosing(deepness - 1)
1310 } else {
1311 CharClassesStatus::StringInBlockComment(deepness)
1312 }
1313 }
1314 CharClassesStatus::BlockComment(deepness) => {
1315 assert_ne!(deepness, 0);
1316 char_kind = FullCodeCharKind::InComment;
1317 match self.base.peek() {
1318 Some(next) if next.get_char() == '/' && chr == '*' => {
1319 CharClassesStatus::BlockCommentClosing(deepness - 1)
1320 }
1321 Some(next) if next.get_char() == '*' && chr == '/' => {
1322 CharClassesStatus::BlockCommentOpening(deepness + 1)
1323 }
1324 _ if chr == '"' => CharClassesStatus::StringInBlockComment(deepness),
1325 _ => self.status,
1326 }
1327 }
1328 CharClassesStatus::BlockCommentOpening(deepness) => {
1329 assert_eq!(chr, '*');
1330 self.status = CharClassesStatus::BlockComment(deepness);
1331 return Some((FullCodeCharKind::InComment, item));
1332 }
1333 CharClassesStatus::BlockCommentClosing(deepness) => {
1334 assert_eq!(chr, '/');
1335 if deepness == 0 {
1336 self.status = CharClassesStatus::Normal;
1337 return Some((FullCodeCharKind::EndComment, item));
1338 } else {
1339 self.status = CharClassesStatus::BlockComment(deepness);
1340 return Some((FullCodeCharKind::InComment, item));
1341 }
1342 }
1343 CharClassesStatus::LineComment => match chr {
1344 '\n' => {
1345 self.status = CharClassesStatus::Normal;
1346 return Some((FullCodeCharKind::EndComment, item));
1347 }
1348 _ => {
1349 self.status = CharClassesStatus::LineComment;
1350 return Some((FullCodeCharKind::InComment, item));
1351 }
1352 },
1353 };
1354 Some((char_kind, item))
1355 }
1356 }
1357
1358 /// An iterator over the lines of a string, paired with the char kind at the
1359 /// end of the line.
1360 pub(crate) struct LineClasses<'a> {
1361 base: iter::Peekable<CharClasses<std::str::Chars<'a>>>,
1362 kind: FullCodeCharKind,
1363 }
1364
1365 impl<'a> LineClasses<'a> {
new(s: &'a str) -> Self1366 pub(crate) fn new(s: &'a str) -> Self {
1367 LineClasses {
1368 base: CharClasses::new(s.chars()).peekable(),
1369 kind: FullCodeCharKind::Normal,
1370 }
1371 }
1372 }
1373
1374 impl<'a> Iterator for LineClasses<'a> {
1375 type Item = (FullCodeCharKind, String);
1376
next(&mut self) -> Option<Self::Item>1377 fn next(&mut self) -> Option<Self::Item> {
1378 self.base.peek()?;
1379
1380 let mut line = String::new();
1381
1382 let start_kind = match self.base.peek() {
1383 Some((kind, _)) => *kind,
1384 None => unreachable!(),
1385 };
1386
1387 for (kind, c) in self.base.by_ref() {
1388 // needed to set the kind of the ending character on the last line
1389 self.kind = kind;
1390 if c == '\n' {
1391 self.kind = match (start_kind, kind) {
1392 (FullCodeCharKind::Normal, FullCodeCharKind::InString) => {
1393 FullCodeCharKind::StartString
1394 }
1395 (FullCodeCharKind::InString, FullCodeCharKind::Normal) => {
1396 FullCodeCharKind::EndString
1397 }
1398 (FullCodeCharKind::InComment, FullCodeCharKind::InStringCommented) => {
1399 FullCodeCharKind::StartStringCommented
1400 }
1401 (FullCodeCharKind::InStringCommented, FullCodeCharKind::InComment) => {
1402 FullCodeCharKind::EndStringCommented
1403 }
1404 _ => kind,
1405 };
1406 break;
1407 }
1408 line.push(c);
1409 }
1410
1411 // Workaround for CRLF newline.
1412 if line.ends_with('\r') {
1413 line.pop();
1414 }
1415
1416 Some((self.kind, line))
1417 }
1418 }
1419
1420 /// Iterator over functional and commented parts of a string. Any part of a string is either
1421 /// functional code, either *one* block comment, either *one* line comment. Whitespace between
1422 /// comments is functional code. Line comments contain their ending newlines.
1423 struct UngroupedCommentCodeSlices<'a> {
1424 slice: &'a str,
1425 iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
1426 }
1427
1428 impl<'a> UngroupedCommentCodeSlices<'a> {
new(code: &'a str) -> UngroupedCommentCodeSlices<'a>1429 fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
1430 UngroupedCommentCodeSlices {
1431 slice: code,
1432 iter: CharClasses::new(code.char_indices()).peekable(),
1433 }
1434 }
1435 }
1436
1437 impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
1438 type Item = (CodeCharKind, usize, &'a str);
1439
next(&mut self) -> Option<Self::Item>1440 fn next(&mut self) -> Option<Self::Item> {
1441 let (kind, (start_idx, _)) = self.iter.next()?;
1442 match kind {
1443 FullCodeCharKind::Normal | FullCodeCharKind::InString => {
1444 // Consume all the Normal code
1445 while let Some(&(char_kind, _)) = self.iter.peek() {
1446 if char_kind.is_comment() {
1447 break;
1448 }
1449 let _ = self.iter.next();
1450 }
1451 }
1452 FullCodeCharKind::StartComment => {
1453 // Consume the whole comment
1454 loop {
1455 match self.iter.next() {
1456 Some((kind, ..)) if kind.inside_comment() => continue,
1457 _ => break,
1458 }
1459 }
1460 }
1461 _ => panic!(),
1462 }
1463 let slice = match self.iter.peek() {
1464 Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
1465 None => &self.slice[start_idx..],
1466 };
1467 Some((
1468 if kind.is_comment() {
1469 CodeCharKind::Comment
1470 } else {
1471 CodeCharKind::Normal
1472 },
1473 start_idx,
1474 slice,
1475 ))
1476 }
1477 }
1478
1479 /// Iterator over an alternating sequence of functional and commented parts of
1480 /// a string. The first item is always a, possibly zero length, subslice of
1481 /// functional text. Line style comments contain their ending newlines.
1482 pub(crate) struct CommentCodeSlices<'a> {
1483 slice: &'a str,
1484 last_slice_kind: CodeCharKind,
1485 last_slice_end: usize,
1486 }
1487
1488 impl<'a> CommentCodeSlices<'a> {
new(slice: &'a str) -> CommentCodeSlices<'a>1489 pub(crate) fn new(slice: &'a str) -> CommentCodeSlices<'a> {
1490 CommentCodeSlices {
1491 slice,
1492 last_slice_kind: CodeCharKind::Comment,
1493 last_slice_end: 0,
1494 }
1495 }
1496 }
1497
1498 impl<'a> Iterator for CommentCodeSlices<'a> {
1499 type Item = (CodeCharKind, usize, &'a str);
1500
next(&mut self) -> Option<Self::Item>1501 fn next(&mut self) -> Option<Self::Item> {
1502 if self.last_slice_end == self.slice.len() {
1503 return None;
1504 }
1505
1506 let mut sub_slice_end = self.last_slice_end;
1507 let mut first_whitespace = None;
1508 let subslice = &self.slice[self.last_slice_end..];
1509 let mut iter = CharClasses::new(subslice.char_indices());
1510
1511 for (kind, (i, c)) in &mut iter {
1512 let is_comment_connector = self.last_slice_kind == CodeCharKind::Normal
1513 && &subslice[..2] == "//"
1514 && [' ', '\t'].contains(&c);
1515
1516 if is_comment_connector && first_whitespace.is_none() {
1517 first_whitespace = Some(i);
1518 }
1519
1520 if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
1521 let last_index = match first_whitespace {
1522 Some(j) => j,
1523 None => i,
1524 };
1525 sub_slice_end = self.last_slice_end + last_index;
1526 break;
1527 }
1528
1529 if !is_comment_connector {
1530 first_whitespace = None;
1531 }
1532 }
1533
1534 if let (None, true) = (iter.next(), sub_slice_end == self.last_slice_end) {
1535 // This was the last subslice.
1536 sub_slice_end = match first_whitespace {
1537 Some(i) => self.last_slice_end + i,
1538 None => self.slice.len(),
1539 };
1540 }
1541
1542 let kind = match self.last_slice_kind {
1543 CodeCharKind::Comment => CodeCharKind::Normal,
1544 CodeCharKind::Normal => CodeCharKind::Comment,
1545 };
1546 let res = (
1547 kind,
1548 self.last_slice_end,
1549 &self.slice[self.last_slice_end..sub_slice_end],
1550 );
1551 self.last_slice_end = sub_slice_end;
1552 self.last_slice_kind = kind;
1553
1554 Some(res)
1555 }
1556 }
1557
1558 /// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
1559 /// (if it fits in the width/offset, else return `None`), else return `new`
recover_comment_removed( new: String, span: Span, context: &RewriteContext<'_>, ) -> Option<String>1560 pub(crate) fn recover_comment_removed(
1561 new: String,
1562 span: Span,
1563 context: &RewriteContext<'_>,
1564 ) -> Option<String> {
1565 let snippet = context.snippet(span);
1566 if snippet != new && changed_comment_content(snippet, &new) {
1567 // We missed some comments. Warn and keep the original text.
1568 if context.config.error_on_unformatted() {
1569 context.report.append(
1570 context.parse_sess.span_to_filename(span),
1571 vec![FormattingError::from_span(
1572 span,
1573 context.parse_sess,
1574 ErrorKind::LostComment,
1575 )],
1576 );
1577 }
1578 Some(snippet.to_owned())
1579 } else {
1580 Some(new)
1581 }
1582 }
1583
filter_normal_code(code: &str) -> String1584 pub(crate) fn filter_normal_code(code: &str) -> String {
1585 let mut buffer = String::with_capacity(code.len());
1586 LineClasses::new(code).for_each(|(kind, line)| match kind {
1587 FullCodeCharKind::Normal
1588 | FullCodeCharKind::StartString
1589 | FullCodeCharKind::InString
1590 | FullCodeCharKind::EndString => {
1591 buffer.push_str(&line);
1592 buffer.push('\n');
1593 }
1594 _ => (),
1595 });
1596 if !code.ends_with('\n') && buffer.ends_with('\n') {
1597 buffer.pop();
1598 }
1599 buffer
1600 }
1601
1602 /// Returns `true` if the two strings of code have the same payload of comments.
1603 /// The payload of comments is everything in the string except:
1604 /// - actual code (not comments),
1605 /// - comment start/end marks,
1606 /// - whitespace,
1607 /// - '*' at the beginning of lines in block comments.
changed_comment_content(orig: &str, new: &str) -> bool1608 fn changed_comment_content(orig: &str, new: &str) -> bool {
1609 // Cannot write this as a fn since we cannot return types containing closures.
1610 let code_comment_content = |code| {
1611 let slices = UngroupedCommentCodeSlices::new(code);
1612 slices
1613 .filter(|&(ref kind, _, _)| *kind == CodeCharKind::Comment)
1614 .flat_map(|(_, _, s)| CommentReducer::new(s))
1615 };
1616 let res = code_comment_content(orig).ne(code_comment_content(new));
1617 debug!(
1618 "comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
1619 res,
1620 orig,
1621 new,
1622 code_comment_content(orig).collect::<String>(),
1623 code_comment_content(new).collect::<String>()
1624 );
1625 res
1626 }
1627
1628 /// Iterator over the 'payload' characters of a comment.
1629 /// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
1630 /// The comment must be one comment, ie not more than one start mark (no multiple line comments,
1631 /// for example).
1632 struct CommentReducer<'a> {
1633 is_block: bool,
1634 at_start_line: bool,
1635 iter: std::str::Chars<'a>,
1636 }
1637
1638 impl<'a> CommentReducer<'a> {
new(comment: &'a str) -> CommentReducer<'a>1639 fn new(comment: &'a str) -> CommentReducer<'a> {
1640 let is_block = comment.starts_with("/*");
1641 let comment = remove_comment_header(comment);
1642 CommentReducer {
1643 is_block,
1644 // There are no supplementary '*' on the first line.
1645 at_start_line: false,
1646 iter: comment.chars(),
1647 }
1648 }
1649 }
1650
1651 impl<'a> Iterator for CommentReducer<'a> {
1652 type Item = char;
1653
next(&mut self) -> Option<Self::Item>1654 fn next(&mut self) -> Option<Self::Item> {
1655 loop {
1656 let mut c = self.iter.next()?;
1657 if self.is_block && self.at_start_line {
1658 while c.is_whitespace() {
1659 c = self.iter.next()?;
1660 }
1661 // Ignore leading '*'.
1662 if c == '*' {
1663 c = self.iter.next()?;
1664 }
1665 } else if c == '\n' {
1666 self.at_start_line = true;
1667 }
1668 if !c.is_whitespace() {
1669 return Some(c);
1670 }
1671 }
1672 }
1673 }
1674
remove_comment_header(comment: &str) -> &str1675 fn remove_comment_header(comment: &str) -> &str {
1676 if comment.starts_with("///") || comment.starts_with("//!") {
1677 &comment[3..]
1678 } else if let Some(stripped) = comment.strip_prefix("//") {
1679 stripped
1680 } else if (comment.starts_with("/**") && !comment.starts_with("/**/"))
1681 || comment.starts_with("/*!")
1682 {
1683 &comment[3..comment.len() - 2]
1684 } else {
1685 assert!(
1686 comment.starts_with("/*"),
1687 "string '{}' is not a comment",
1688 comment
1689 );
1690 &comment[2..comment.len() - 2]
1691 }
1692 }
1693
1694 #[cfg(test)]
1695 mod test {
1696 use super::*;
1697 use crate::shape::{Indent, Shape};
1698
1699 #[test]
char_classes()1700 fn char_classes() {
1701 let mut iter = CharClasses::new("//\n\n".chars());
1702
1703 assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
1704 assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
1705 assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
1706 assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
1707 assert_eq!(None, iter.next());
1708 }
1709
1710 #[test]
comment_code_slices()1711 fn comment_code_slices() {
1712 let input = "code(); /* test */ 1 + 1";
1713 let mut iter = CommentCodeSlices::new(input);
1714
1715 assert_eq!((CodeCharKind::Normal, 0, "code(); "), iter.next().unwrap());
1716 assert_eq!(
1717 (CodeCharKind::Comment, 8, "/* test */"),
1718 iter.next().unwrap()
1719 );
1720 assert_eq!((CodeCharKind::Normal, 18, " 1 + 1"), iter.next().unwrap());
1721 assert_eq!(None, iter.next());
1722 }
1723
1724 #[test]
comment_code_slices_two()1725 fn comment_code_slices_two() {
1726 let input = "// comment\n test();";
1727 let mut iter = CommentCodeSlices::new(input);
1728
1729 assert_eq!((CodeCharKind::Normal, 0, ""), iter.next().unwrap());
1730 assert_eq!(
1731 (CodeCharKind::Comment, 0, "// comment\n"),
1732 iter.next().unwrap()
1733 );
1734 assert_eq!(
1735 (CodeCharKind::Normal, 11, " test();"),
1736 iter.next().unwrap()
1737 );
1738 assert_eq!(None, iter.next());
1739 }
1740
1741 #[test]
comment_code_slices_three()1742 fn comment_code_slices_three() {
1743 let input = "1 // comment\n // comment2\n\n";
1744 let mut iter = CommentCodeSlices::new(input);
1745
1746 assert_eq!((CodeCharKind::Normal, 0, "1 "), iter.next().unwrap());
1747 assert_eq!(
1748 (CodeCharKind::Comment, 2, "// comment\n // comment2\n"),
1749 iter.next().unwrap()
1750 );
1751 assert_eq!((CodeCharKind::Normal, 29, "\n"), iter.next().unwrap());
1752 assert_eq!(None, iter.next());
1753 }
1754
1755 #[test]
1756 #[rustfmt::skip]
format_doc_comments()1757 fn format_doc_comments() {
1758 let mut wrap_normalize_config: crate::config::Config = Default::default();
1759 wrap_normalize_config.set().wrap_comments(true);
1760 wrap_normalize_config.set().normalize_comments(true);
1761
1762 let mut wrap_config: crate::config::Config = Default::default();
1763 wrap_config.set().wrap_comments(true);
1764
1765 let comment = rewrite_comment(" //test",
1766 true,
1767 Shape::legacy(100, Indent::new(0, 100)),
1768 &wrap_normalize_config).unwrap();
1769 assert_eq!("/* test */", comment);
1770
1771 let comment = rewrite_comment("// comment on a",
1772 false,
1773 Shape::legacy(10, Indent::empty()),
1774 &wrap_normalize_config).unwrap();
1775 assert_eq!("// comment\n// on a", comment);
1776
1777 let comment = rewrite_comment("// A multi line comment\n // between args.",
1778 false,
1779 Shape::legacy(60, Indent::new(0, 12)),
1780 &wrap_normalize_config).unwrap();
1781 assert_eq!("// A multi line comment\n // between args.", comment);
1782
1783 let input = "// comment";
1784 let expected =
1785 "/* comment */";
1786 let comment = rewrite_comment(input,
1787 true,
1788 Shape::legacy(9, Indent::new(0, 69)),
1789 &wrap_normalize_config).unwrap();
1790 assert_eq!(expected, comment);
1791
1792 let comment = rewrite_comment("/* trimmed */",
1793 true,
1794 Shape::legacy(100, Indent::new(0, 100)),
1795 &wrap_normalize_config).unwrap();
1796 assert_eq!("/* trimmed */", comment);
1797
1798 // Check that different comment style are properly recognised.
1799 let comment = rewrite_comment(r#"/// test1
1800 /// test2
1801 /*
1802 * test3
1803 */"#,
1804 false,
1805 Shape::legacy(100, Indent::new(0, 0)),
1806 &wrap_normalize_config).unwrap();
1807 assert_eq!("/// test1\n/// test2\n// test3", comment);
1808
1809 // Check that the blank line marks the end of a commented paragraph.
1810 let comment = rewrite_comment(r#"// test1
1811
1812 // test2"#,
1813 false,
1814 Shape::legacy(100, Indent::new(0, 0)),
1815 &wrap_normalize_config).unwrap();
1816 assert_eq!("// test1\n\n// test2", comment);
1817
1818 // Check that the blank line marks the end of a custom-commented paragraph.
1819 let comment = rewrite_comment(r#"//@ test1
1820
1821 //@ test2"#,
1822 false,
1823 Shape::legacy(100, Indent::new(0, 0)),
1824 &wrap_normalize_config).unwrap();
1825 assert_eq!("//@ test1\n\n//@ test2", comment);
1826
1827 // Check that bare lines are just indented but otherwise left unchanged.
1828 let comment = rewrite_comment(r#"// test1
1829 /*
1830 a bare line!
1831
1832 another bare line!
1833 */"#,
1834 false,
1835 Shape::legacy(100, Indent::new(0, 0)),
1836 &wrap_config).unwrap();
1837 assert_eq!("// test1\n/*\n a bare line!\n\n another bare line!\n*/", comment);
1838 }
1839
1840 // This is probably intended to be a non-test fn, but it is not used.
1841 // We should keep this around unless it helps us test stuff to remove it.
uncommented(text: &str) -> String1842 fn uncommented(text: &str) -> String {
1843 CharClasses::new(text.chars())
1844 .filter_map(|(s, c)| match s {
1845 FullCodeCharKind::Normal | FullCodeCharKind::InString => Some(c),
1846 _ => None,
1847 })
1848 .collect()
1849 }
1850
1851 #[test]
test_uncommented()1852 fn test_uncommented() {
1853 assert_eq!(&uncommented("abc/*...*/"), "abc");
1854 assert_eq!(
1855 &uncommented("// .... /* \n../* /* *** / */ */a/* // */c\n"),
1856 "..ac\n"
1857 );
1858 assert_eq!(&uncommented("abc \" /* */\" qsdf"), "abc \" /* */\" qsdf");
1859 }
1860
1861 #[test]
test_contains_comment()1862 fn test_contains_comment() {
1863 assert_eq!(contains_comment("abc"), false);
1864 assert_eq!(contains_comment("abc // qsdf"), true);
1865 assert_eq!(contains_comment("abc /* kqsdf"), true);
1866 assert_eq!(contains_comment("abc \" /* */\" qsdf"), false);
1867 }
1868
1869 #[test]
test_find_uncommented()1870 fn test_find_uncommented() {
1871 fn check(haystack: &str, needle: &str, expected: Option<usize>) {
1872 assert_eq!(expected, haystack.find_uncommented(needle));
1873 }
1874
1875 check("/*/ */test", "test", Some(6));
1876 check("//test\ntest", "test", Some(7));
1877 check("/* comment only */", "whatever", None);
1878 check(
1879 "/* comment */ some text /* more commentary */ result",
1880 "result",
1881 Some(46),
1882 );
1883 check("sup // sup", "p", Some(2));
1884 check("sup", "x", None);
1885 check(r#"π? /**/ π is nice!"#, r#"π is nice"#, Some(9));
1886 check("/*sup yo? \n sup*/ sup", "p", Some(20));
1887 check("hel/*lohello*/lo", "hello", None);
1888 check("acb", "ab", None);
1889 check(",/*A*/ ", ",", Some(0));
1890 check("abc", "abc", Some(0));
1891 check("/* abc */", "abc", None);
1892 check("/**/abc/* */", "abc", Some(4));
1893 check("\"/* abc */\"", "abc", Some(4));
1894 check("\"/* abc", "abc", Some(4));
1895 }
1896
1897 #[test]
test_filter_normal_code()1898 fn test_filter_normal_code() {
1899 let s = r#"
1900 fn main() {
1901 println!("hello, world");
1902 }
1903 "#;
1904 assert_eq!(s, filter_normal_code(s));
1905 let s_with_comment = r#"
1906 fn main() {
1907 // hello, world
1908 println!("hello, world");
1909 }
1910 "#;
1911 assert_eq!(s, filter_normal_code(s_with_comment));
1912 }
1913 }
1914