1 /**
2 * @file output.cpp
3 * Does all the output & comment formatting.
4 *
5 * @author Ben Gardner
6 * @author Guy Maurel October 2015, 2021
7 * @license GPL v2+
8 */
9
10 #include "output.h"
11
12 #include "align_tab_column.h"
13 #include "braces.h"
14 #include "indent.h"
15 #include "prototypes.h"
16 #include "tokenize.h"
17 #include "unc_ctype.h"
18 #include "unicode.h"
19
20 #include <ctime>
21 #include <map>
22 #include <regex>
23 #include <set>
24
25
26 constexpr static auto LCURRENT = LOUTPUT;
27
28 using namespace uncrustify;
29
30
31 struct cmt_reflow
32 {
33 chunk_t *pc = nullptr;
34 size_t column = 0; //! Column of the comment start
35 size_t brace_col = 0; //! Brace column (for indenting with tabs)
36 size_t base_col = 0; //! Base column (for indenting with tabs)
37 size_t word_count = 0; //! number of words on this line
38 size_t xtra_indent = 0; //! extra indent of non-first lines (0 or 1)
39 unc_text cont_text; //! fixed text to output at the start of a line (0 to 3 chars)
40 bool reflow = false; //! reflow the current line
41 };
42
43
44 /**
45 * A multiline comment
46 * The only trick here is that we have to trim out whitespace characters
47 * to get the comment to line up.
48 */
49 static void output_comment_multi(chunk_t *pc);
50
51
52 static bool kw_fcn_filename(chunk_t *cmt, unc_text &out_txt);
53
54
55 static bool kw_fcn_class(chunk_t *cmt, unc_text &out_txt);
56
57
58 static bool kw_fcn_message(chunk_t *cmt, unc_text &out_txt);
59
60
61 static bool kw_fcn_category(chunk_t *cmt, unc_text &out_txt);
62
63
64 static bool kw_fcn_scope(chunk_t *cmt, unc_text &out_txt);
65
66
67 static bool kw_fcn_function(chunk_t *cmt, unc_text &out_txt);
68
69
70 /**
71 * Adds the javadoc-style @param and @return stuff, based on the params and
72 * return value for pc.
73 * If the arg list is '()' or '(void)', then no @params are added.
74 * Likewise, if the return value is 'void', then no @return is added.
75 */
76 static bool kw_fcn_javaparam(chunk_t *cmt, unc_text &out_txt);
77
78
79 static bool kw_fcn_fclass(chunk_t *cmt, unc_text &out_txt);
80
81
82 static bool kw_fcn_year(chunk_t *cmt, unc_text &out_txt);
83
84
85 /**
86 * Output a multiline comment without any reformatting other than shifting
87 * it left or right to get the column right.
88 *
89 * Trims trailing whitespaces.
90 */
91 static void output_comment_multi_simple(chunk_t *pc);
92
93
94 /**
95 * This renders the #if condition to a string buffer.
96 *
97 * @param[out] dst unc_text buffer to be filled
98 * @param[in] ifdef if conditional as chunk list
99 */
100 static void generate_if_conditional_as_text(unc_text &dst, chunk_t *ifdef);
101
102
103 /**
104 * Do keyword substitution on a comment.
105 * NOTE: it is assumed that a comment will contain at most one of each type
106 * of keyword.
107 */
108 static void do_kw_subst(chunk_t *pc);
109
110
111 //! All output text is sent here, one char at a time.
112 static void add_char(UINT32 ch, bool is_literal = false);
113
114
115 static void add_text(const char *ascii_text);
116
117
118 static void add_text(const unc_text &text, bool is_ignored, bool is_literal);
119
120
121 /**
122 * Count the number of characters to the end of the next chunk of text.
123 * If it exceeds the limit, return true.
124 */
125 static bool next_word_exceeds_limit(const unc_text &text, size_t idx);
126
127
128 /**
129 * Output a comment to the column using indent_with_tabs and
130 * indent_cmt_with_tabs as the rules.
131 * base_col is the indent of the first line of the comment.
132 * On the first line, column == base_col.
133 * On subsequent lines, column >= base_col.
134 *
135 * @param brace_col the brace-level indent of the comment
136 * @param base_col the indent of the start of the comment (multiline)
137 * @param column the column that we should end up in
138 */
139 static void cmt_output_indent(size_t brace_col, size_t base_col, size_t column);
140
141
142 /**
143 * Checks for and updates the lead chars.
144 *
145 * @param line the comment line
146 *
147 * @return 0: not present, >0: number of chars that are part of the lead
148 */
149 static size_t cmt_parse_lead(const unc_text &line, bool is_last);
150
151
152 /**
153 * Scans a multiline comment to determine the following:
154 * - the extra indent of the non-first line (0 or 1)
155 * - the continuation text ('' or '* ')
156 *
157 * The decision is based on:
158 * - cmt_indent_multi
159 * - cmt_star_cont
160 * - cmt_multi_first_len_minimum
161 * - the first line length
162 * - the second line leader length
163 * - the last line length (without leading space/tab)
164 *
165 * If the first and last line are the same length and don't contain any alnum
166 * chars and (the first line len > 2 or the second leader is the same as the
167 * first line length), then the indent is 0.
168 *
169 * If the leader on the second line is 1 wide or missing, then the indent is 1.
170 *
171 * Otherwise, the indent is 0.
172 *
173 * @param str The comment string
174 * @param len Length of the comment
175 * @param start_col Starting column
176 *
177 * @return cmt.xtra_indent is set to 0 or 1
178 */
179 static void calculate_comment_body_indent(cmt_reflow &cmt, const unc_text &str);
180
181
182 static int next_up(const unc_text &text, size_t idx, const unc_text &tag);
183
184
185 /**
186 * Outputs the C comment at pc.
187 * C comment combining is done here
188 *
189 * @return the last chunk output'd
190 */
191 static chunk_t *output_comment_c(chunk_t *pc);
192
193
194 /**
195 * Outputs the CPP comment at pc.
196 * CPP comment combining is done here
197 *
198 * @return the last chunk output'd
199 */
200 static chunk_t *output_comment_cpp(chunk_t *pc);
201
202
203 static void cmt_trim_whitespace(unc_text &line, bool in_preproc);
204
205
206 /**
207 * Outputs a comment. The initial opening '//' may be included in the text.
208 * Subsequent openings (if combining comments), should not be included.
209 * The closing (for C/D comments) should not be included.
210 *
211 * TODO:
212 * If reflowing text, the comment should be added one word (or line) at a time.
213 * A newline should only be sent if a blank line is encountered or if the next
214 * line is indented beyond the current line (optional?).
215 * If the last char on a line is a ':' or '.', then the next line won't be
216 * combined.
217 */
218 static void add_comment_text(const unc_text &text, cmt_reflow &cmt, bool esc_close, size_t continuation_indent = 0);
219
220
221 static void output_cmt_start(cmt_reflow &cmt, chunk_t *pc);
222
223
224 /**
225 * Checks to see if the current comment can be combined with the next comment.
226 * The two can be combined if:
227 * 1. They are the same type
228 * 2. There is exactly one newline between then
229 * 3. They are indented to the same level
230 */
231 static bool can_combine_comment(chunk_t *pc, cmt_reflow &cmt);
232
233
234 #define LOG_CONTTEXT() \
235 LOG_FMT(LCONTTEXT, "%s(%d): set cont_text to '%s'\n", __func__, __LINE__, cmt.cont_text.c_str())
236
237
add_spaces()238 static void add_spaces()
239 {
240 while (cpd.spaces > 0)
241 {
242 write_char(' ');
243 cpd.spaces--;
244 }
245 }
246
247
add_char(UINT32 ch,bool is_literal)248 static void add_char(UINT32 ch, bool is_literal)
249 {
250 // If we did a '\r' and it isn't followed by a '\n', then output a newline
251 if ( (cpd.last_char == '\r')
252 && (ch != '\n'))
253 {
254 write_string(cpd.newline);
255 cpd.column = 1;
256 cpd.did_newline = 1;
257 cpd.spaces = 0;
258 }
259
260 // convert a newline into the LF/CRLF/CR sequence
261 if (ch == '\n')
262 {
263 add_spaces();
264 write_string(cpd.newline);
265 cpd.column = 1;
266 cpd.did_newline = 1;
267 cpd.spaces = 0;
268 }
269 else if (ch == '\r') // do not output the CARRIAGERETURN
270 {
271 // do not output '\r'
272 cpd.column = 1;
273 cpd.did_newline = 1;
274 cpd.spaces = 0;
275 }
276 else if ( (ch == '\t')
277 && cpd.output_tab_as_space)
278 {
279 size_t endcol = next_tab_column(cpd.column);
280
281 while (cpd.column < endcol)
282 {
283 add_char(' ');
284 }
285 return;
286 }
287 else
288 {
289 // explicitly disallow a tab after a space
290 if ( !is_literal
291 && ch == '\t'
292 && cpd.last_char == ' ')
293 {
294 log_rule_B("indent_with_tabs");
295
296 if (options::indent_with_tabs() == 0)
297 {
298 size_t endcol = next_tab_column(cpd.column);
299
300 while (cpd.column < endcol)
301 {
302 add_char(' ');
303 }
304 return;
305 }
306 }
307
308 if ( (ch == ' ')
309 && !cpd.output_trailspace)
310 {
311 cpd.spaces++;
312 cpd.column++;
313 }
314 else
315 {
316 add_spaces();
317 write_char(ch);
318
319 if (ch == '\t')
320 {
321 cpd.column = next_tab_column(cpd.column);
322 }
323 else
324 {
325 cpd.column++;
326 }
327 }
328 }
329 cpd.last_char = ch;
330 } // add_char
331
332
add_text(const char * ascii_text)333 static void add_text(const char *ascii_text)
334 {
335 char ch;
336
337 while ((ch = *ascii_text) != 0)
338 {
339 ascii_text++;
340 add_char(ch);
341 }
342 }
343
344
add_text(const unc_text & text,bool is_ignored=false,bool is_literal=false)345 static void add_text(const unc_text &text, bool is_ignored = false, bool is_literal = false)
346 {
347 for (size_t idx = 0; idx < text.size(); idx++)
348 {
349 int ch = text[idx];
350
351 if (is_ignored)
352 {
353 write_char(ch);
354 }
355 else
356 {
357 add_char(ch, is_literal);
358 }
359 }
360 }
361
362
next_word_exceeds_limit(const unc_text & text,size_t idx)363 static bool next_word_exceeds_limit(const unc_text &text, size_t idx)
364 {
365 LOG_FMT(LCONTTEXT, "%s(%d): idx is %zu\n",
366 __func__, __LINE__, idx);
367 size_t length = 0;
368
369 // Count any whitespace
370 while ( (idx < text.size())
371 && unc_isspace(text[idx]))
372 {
373 idx++;
374 length++;
375 }
376
377 // Count non-whitespace
378 while ( (idx < text.size())
379 && !unc_isspace(text[idx]))
380 {
381 idx++;
382 length++;
383 }
384 return((cpd.column + length - 1) > options::cmt_width());
385 }
386
387
388 /**
389 * Advance to a specific column
390 * cpd.column is the current column
391 *
392 * @param column The column to advance to
393 */
output_to_column(size_t column,bool allow_tabs)394 static void output_to_column(size_t column, bool allow_tabs)
395 {
396 cpd.did_newline = 0;
397
398 if (allow_tabs)
399 {
400 // tab out as far as possible and then use spaces
401 size_t next_column = next_tab_column(cpd.column);
402
403 while (next_column <= column)
404 {
405 add_text("\t");
406 next_column = next_tab_column(cpd.column);
407 }
408 }
409
410 // space out the final bit
411 while (cpd.column < column)
412 {
413 add_text(" ");
414 }
415 }
416
417
cmt_output_indent(size_t brace_col,size_t base_col,size_t column)418 static void cmt_output_indent(size_t brace_col, size_t base_col, size_t column)
419 {
420 log_rule_B("indent_cmt_with_tabs");
421 log_rule_B("indent_with_tabs");
422 size_t iwt = options::indent_cmt_with_tabs() ? 2 :
423 (options::indent_with_tabs() ? 1 : 0);
424
425 size_t tab_col = (iwt == 0) ? 0 : ((iwt == 1) ? brace_col : base_col);
426
427 // LOG_FMT(LSYS, "%s(brace=%zd base=%zd col=%zd iwt=%zd) tab=%zd cur=%zd\n",
428 // __func__, brace_col, base_col, column, iwt, tab_col, cpd.column);
429
430 cpd.did_newline = 0;
431
432 if ( iwt == 2
433 || ( cpd.column == 1
434 && iwt == 1))
435 {
436 // tab out as far as possible and then use spaces
437 while (next_tab_column(cpd.column) <= tab_col)
438 {
439 add_text("\t");
440 }
441 }
442
443 // space out the rest
444 while (cpd.column < column)
445 {
446 add_text(" ");
447 }
448 } // cmt_output_indent
449
450
output_parsed(FILE * pfile,bool withOptions)451 void output_parsed(FILE *pfile, bool withOptions)
452 {
453 const char *eol_marker = get_eol_marker();
454
455 if (withOptions)
456 {
457 save_option_file(pfile, false, true);
458 }
459 fprintf(pfile, "# -=====-%s", eol_marker);
460 fprintf(pfile, "# number of loops = %d\n", cpd.changes);
461 fprintf(pfile, "# -=====-%s", eol_marker);
462 fprintf(pfile, "# language = %s\n", language_name_from_flags(cpd.lang_flags));
463 fprintf(pfile, "# -=====-%s", eol_marker);
464 // MAXLENGTHOFTHENAME must be consider at the format line at the file
465 // output.cpp, line 427: fprintf(pfile, "# Line Tag Parent...
466 // and 430: ... fprintf(pfile, "%s# %3zu>%19.19s[%19.19s] ...
467 // here xx xx xx xx
468 #ifdef WIN32
469 fprintf(pfile, "# Line Tag Parent_type Type of the parent Columns Br/Lvl/pp Nl Text");
470 #else // not WIN32
471 fprintf(pfile, "# Line Tag Parent_type Type of the parent Columns Br/Lvl/pp Flag Nl Text");
472 #endif // ifdef WIN32
473
474 for (chunk_t *pc = chunk_get_head(); pc != nullptr; pc = chunk_get_next(pc))
475 {
476 #ifdef WIN32
477 fprintf(pfile, "%s# %3d>%19.19s|%19.19s|%19.19s[%3d/%3d/%3d/%3d][%d/%d/%d][%d-%d]",
478 eol_marker, (int)pc->orig_line, get_token_name(pc->type),
479 get_token_name(get_chunk_parent_type(pc)), get_token_name(get_type_of_the_parent(pc)),
480 (int)pc->column, (int)pc->orig_col, (int)pc->orig_col_end, (int)pc->orig_prev_sp,
481 (int)pc->brace_level, (int)pc->level, (int)pc->pp_level, (int)pc->nl_count, pc->after_tab);
482 #else // not WIN32
483 fprintf(pfile, "%s# %3zu>%19.19s|%19.19s|%19.19s[%3zu/%3zu/%3zu/%3d][%zu/%zu/%zu]",
484 eol_marker, pc->orig_line, get_token_name(pc->type),
485 get_token_name(get_chunk_parent_type(pc)), get_token_name(get_type_of_the_parent(pc)),
486 pc->column, pc->orig_col, pc->orig_col_end, pc->orig_prev_sp,
487 pc->brace_level, pc->level, pc->pp_level);
488 fprintf(pfile, "[%11llx]",
489 static_cast<pcf_flags_t::int_t>(pc->flags));
490 fprintf(pfile, "[%zu-%d]",
491 pc->nl_count, pc->after_tab);
492 #endif // ifdef WIN32
493
494 if ( pc->type != CT_NEWLINE
495 && (pc->len() != 0))
496 {
497 for (size_t cnt = 0; cnt < pc->column; cnt++)
498 {
499 fprintf(pfile, " ");
500 }
501
502 if (pc->type != CT_NL_CONT)
503 {
504 fprintf(pfile, "%s", pc->text());
505 }
506 else
507 {
508 fprintf(pfile, "\\");
509 }
510 }
511 }
512
513 fprintf(pfile, "%s# -=====-%s", eol_marker, eol_marker);
514 fflush(pfile);
515 } // output_parsed
516
517
output_parsed_csv(FILE * pfile)518 void output_parsed_csv(FILE *pfile)
519 {
520 const char *eol_marker = get_eol_marker();
521
522 fprintf(pfile, "number of loops,%d,\n", cpd.changes);
523 fprintf(pfile, "language,%s,\n", language_name_from_flags(cpd.lang_flags));
524 fprintf(pfile, "Line,Tag,Parent_type,Type of the parent,Column,Orig Col Strt,"
525 "Orig Col End,Orig Sp Before,Br,Lvl,pp,Flags,Nl Before,Nl After,Text,");
526
527 for (chunk_t *pc = chunk_get_head(); pc != nullptr; pc = chunk_get_next(pc))
528 {
529 fprintf(pfile, "%s%zu,%s,%s,%s,%zu,%zu,%zu,%d,%zu,%zu,%zu,",
530 eol_marker, pc->orig_line, get_token_name(pc->type),
531 get_token_name(get_chunk_parent_type(pc)), get_token_name(get_type_of_the_parent(pc)),
532 pc->column, pc->orig_col, pc->orig_col_end, pc->orig_prev_sp,
533 pc->brace_level, pc->level, pc->pp_level);
534
535 auto pcf_flag_str = pcf_flags_str(pcf_flag_e(pc->flags));
536 #ifdef WIN32
537 auto pcf_flag_str_start = pcf_flag_str.find("[") + 1;
538 #else // not WIN32
539 auto pcf_flag_str_start = pcf_flag_str.find(":") + 1;
540 #endif // ifdef WIN32
541 auto pcf_flag_str_end = pcf_flag_str.find("]");
542 auto pcf_names = pcf_flag_str.substr(pcf_flag_str_start,
543 pcf_flag_str_end - pcf_flag_str_start);
544 fprintf(pfile, "\"%s\",", pcf_names.c_str());
545 fprintf(pfile, "%zu,%d,",
546 pc->nl_count, pc->after_tab);
547
548 if ( pc->type != CT_NEWLINE
549 && (pc->len() != 0))
550 {
551 fprintf(pfile, "\"");
552
553 for (size_t cnt = 0; cnt < pc->column; cnt++)
554 {
555 fprintf(pfile, " ");
556 }
557
558 if (pc->type != CT_NL_CONT)
559 {
560 for (auto *ch = pc->text(); *ch != '\0'; ++ch)
561 {
562 fprintf(pfile, "%c", *ch);
563
564 if (*ch == '"')
565 {
566 // need to escape the double-quote for csv-format
567 fprintf(pfile, "\"");
568 }
569 }
570 }
571 else
572 {
573 fprintf(pfile, "\\");
574 }
575 fprintf(pfile, "\"");
576 }
577 }
578
579 fflush(pfile);
580 } // output_parsed_csv
581
582
output_text(FILE * pfile)583 void output_text(FILE *pfile)
584 {
585 bool tracking = cpd.html_file != nullptr; // special for debugging
586
587 cpd.fout = pfile;
588 cpd.did_newline = 1;
589 cpd.column = 1;
590
591 if (cpd.bom)
592 {
593 write_bom();
594 }
595 chunk_t *pc;
596
597 if (cpd.frag_cols > 0)
598 {
599 size_t indent = cpd.frag_cols - 1;
600
601 // loop over the whole chunk list
602 for (pc = chunk_get_head(); pc != nullptr; pc = chunk_get_next(pc))
603 {
604 pc->column += indent;
605 pc->column_indent += indent;
606 }
607
608 cpd.frag_cols = 0;
609 }
610
611 if (tracking)
612 {
613 add_text("<html>\n");
614 add_text("<head>\n");
615 add_text(" <meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"/>\n");
616 add_text(" <title>Uncrustify: where do the Spaces options work</title>\n");
617 add_text("</head>\n");
618 add_text("<body lang=\"en-US\">\n");
619 add_text("<p>\n");
620 add_text("</p>\n");
621 add_text("<pre>\n");
622 }
623 bool write_in_tracking = false;
624
625 // loop over the whole chunk list
626 for (pc = chunk_get_head(); pc != nullptr; pc = chunk_get_next(pc))
627 {
628 char copy[1000];
629 LOG_FMT(LCONTTEXT, "%s(%d): text() is '%s', type is %s, orig_line is %zu, column is %zu, nl is %zu\n",
630 __func__, __LINE__, pc->elided_text(copy), get_token_name(pc->type), pc->orig_line, pc->column, pc->nl_count);
631 log_rule_B("cmt_convert_tab_to_spaces");
632 cpd.output_tab_as_space = ( options::cmt_convert_tab_to_spaces()
633 && chunk_is_comment(pc));
634
635 if (chunk_is_token(pc, CT_NEWLINE))
636 {
637 for (size_t cnt = 0; cnt < pc->nl_count; cnt++)
638 {
639 if ( cnt > 0
640 && pc->nl_column > 1)
641 {
642 log_rule_B("indent_with_tabs");
643 output_to_column(pc->nl_column, (options::indent_with_tabs() == 2));
644 }
645 add_char('\n');
646 }
647
648 cpd.did_newline = 1;
649 cpd.column = 1;
650 LOG_FMT(LOUTIND, " xx\n");
651 }
652 else if (chunk_is_token(pc, CT_NL_CONT))
653 {
654 // FIXME: this really shouldn't be done here!
655 if (!pc->flags.test(PCF_WAS_ALIGNED))
656 {
657 // Add or remove space before a backslash-newline at the end of a line.
658 log_rule_B("sp_before_nl_cont");
659
660 if (options::sp_before_nl_cont() & IARF_REMOVE)
661 {
662 log_rule_B("sp_before_nl_cont");
663 pc->column = cpd.column + (options::sp_before_nl_cont() == IARF_FORCE);
664 }
665 else
666 {
667 // Try to keep the same relative spacing
668 chunk_t *prev = chunk_get_prev(pc);
669
670 if (chunk_is_token(prev, CT_PP_IGNORE))
671 {
672 /*
673 * Want to completely leave alone PP_IGNORE'd blocks because
674 * they likely have special column aligned newline
675 * continuations (common in multiline macros)
676 */
677 pc->column = pc->orig_col;
678 }
679 else
680 {
681 // Try to keep the same relative spacing
682 while ( prev != nullptr
683 && prev->orig_col == 0
684 && prev->nl_count == 0)
685 {
686 prev = chunk_get_prev(prev);
687 }
688
689 if ( prev != nullptr
690 && prev->nl_count == 0)
691 {
692 int orig_sp = (pc->orig_col - prev->orig_col_end);
693
694 if ((int)(cpd.column + orig_sp) < 0)
695 {
696 #ifdef WIN32
697 fprintf(stderr, "FATAL: negative value.\n pc->orig_col is %d, prev->orig_col_end is %d\n",
698 (int)pc->orig_col, (int)prev->orig_col_end);
699 #else // not WIN32
700 fprintf(stderr, "FATAL: negative value.\n pc->orig_col is %zu, prev->orig_col_end is %zu\n",
701 pc->orig_col, prev->orig_col_end);
702 #endif // ifdef WIN32
703 log_flush(true);
704 exit(EX_SOFTWARE);
705 }
706 pc->column = cpd.column + orig_sp;
707
708 // Add or remove space before a backslash-newline at the end of a line.
709 log_rule_B("sp_before_nl_cont");
710
711 if ( (options::sp_before_nl_cont() != IARF_IGNORE)
712 && (pc->column < (cpd.column + 1)))
713 {
714 pc->column = cpd.column + 1;
715 }
716 }
717 }
718 }
719 output_to_column(pc->column, false);
720 }
721 else
722 {
723 log_rule_B("indent_with_tabs");
724 output_to_column(pc->column, (options::indent_with_tabs() == 2));
725 }
726 add_char('\\');
727 add_char('\n');
728 cpd.did_newline = 1;
729 cpd.column = 1;
730 LOG_FMT(LOUTIND, " \\xx\n");
731 }
732 else if (chunk_is_token(pc, CT_COMMENT_MULTI))
733 {
734 log_rule_B("cmt_indent_multi");
735
736 if (options::cmt_indent_multi())
737 {
738 output_comment_multi(pc);
739 }
740 else
741 {
742 output_comment_multi_simple(pc);
743 }
744 }
745 else if (chunk_is_token(pc, CT_COMMENT_CPP))
746 {
747 bool tmp = cpd.output_trailspace;
748 /*
749 * keep trailing spaces if they are still present in a chunk;
750 * note that tokenize() already strips spaces in comments,
751 * so if they made it up to here, they are to stay
752 */
753 cpd.output_trailspace = true;
754 pc = output_comment_cpp(pc);
755 cpd.output_trailspace = tmp;
756 }
757 else if (chunk_is_token(pc, CT_COMMENT))
758 {
759 pc = output_comment_c(pc);
760 }
761 else if ( chunk_is_token(pc, CT_JUNK)
762 || chunk_is_token(pc, CT_IGNORED))
763 {
764 LOG_FMT(LOUTIND, "%s(%d): orig_line is %zu, orig_col is %zu,\npc->text() >%s<, pc->str.size() is %zu\n",
765 __func__, __LINE__, pc->orig_line, pc->orig_col, pc->text(), pc->str.size());
766 // do not adjust the column for junk
767 add_text(pc->str, true);
768 }
769 else if (pc->len() == 0)
770 {
771 // don't do anything for non-visible stuff
772 LOG_FMT(LOUTIND, "%s(%d): orig_line is %zu, column is %zu, non-visible stuff: type is %s\n",
773 __func__, __LINE__, pc->orig_line, pc->column, get_token_name(pc->type));
774 }
775 else
776 {
777 bool allow_tabs;
778 cpd.output_trailspace = (chunk_is_token(pc, CT_STRING_MULTI));
779
780 // indent to the 'level' first
781 if (cpd.did_newline)
782 {
783 log_rule_B("indent_with_tabs");
784
785 if (options::indent_with_tabs() == 1)
786 {
787 size_t lvlcol;
788
789 /*
790 * FIXME: it would be better to properly set column_indent in
791 * indent_text(), but this hack for '}' and '#' seems to work.
792 */
793 if ( chunk_is_token(pc, CT_BRACE_CLOSE)
794 || chunk_is_token(pc, CT_CASE_COLON)
795 || chunk_is_token(pc, CT_PREPROC))
796 {
797 lvlcol = pc->column;
798 }
799 else
800 {
801 lvlcol = pc->column_indent;
802
803 if (lvlcol > pc->column)
804 {
805 lvlcol = pc->column;
806 }
807 }
808
809 if (lvlcol > 1)
810 {
811 output_to_column(lvlcol, true);
812 }
813 }
814 log_rule_B("indent_with_tabs");
815 allow_tabs = (options::indent_with_tabs() == 2)
816 || ( chunk_is_comment(pc)
817 && options::indent_with_tabs() != 0);
818
819 LOG_FMT(LOUTIND, "%s(%d): orig_line is %zu, column is %zu, column_indent is %zu, cpd.column is %zu\n",
820 __func__, __LINE__, pc->orig_line, pc->column, pc->column_indent, cpd.column);
821 }
822 else
823 {
824 /*
825 * Reformatting multi-line comments can screw up the column.
826 * Make sure we don't mess up the spacing on this line.
827 * This has to be done here because comments are not formatted
828 * until the output phase.
829 */
830 if (pc->column < cpd.column)
831 {
832 reindent_line(pc, cpd.column);
833 }
834 // not the first item on a line
835 chunk_t *prev = chunk_get_prev(pc);
836 log_rule_B("align_with_tabs");
837 allow_tabs = ( options::align_with_tabs()
838 && pc->flags.test(PCF_WAS_ALIGNED)
839 && ((prev->column + prev->len() + 1) != pc->column));
840
841 log_rule_B("align_keep_tabs");
842
843 if (options::align_keep_tabs())
844 {
845 allow_tabs |= pc->after_tab;
846 }
847 LOG_FMT(LOUTIND, "%s(%d): at column %zu(%s)\n",
848 __func__, __LINE__, pc->column, (allow_tabs ? "true" : "FALSE"));
849 }
850 output_to_column(pc->column, allow_tabs);
851
852 if (write_in_tracking)
853 {
854 if (chunk_is_token(pc, CT_ANGLE_OPEN))
855 {
856 add_text("<", false, false);
857 }
858 else if (chunk_is_token(pc, CT_ANGLE_CLOSE))
859 {
860 add_text(">", false, false);
861 }
862 else
863 {
864 add_text(pc->str, false, chunk_is_token(pc, CT_STRING));
865 }
866 write_in_tracking = false;
867 }
868 else
869 {
870 add_text(pc->str, false, chunk_is_token(pc, CT_STRING));
871 }
872
873 if (chunk_is_token(pc, CT_PP_DEFINE)) // Issue #876
874 {
875 // If true, a <TAB> is inserted after #define.
876 log_rule_B("force_tab_after_define");
877
878 if (options::force_tab_after_define())
879 {
880 add_char('\t');
881 }
882 }
883 cpd.did_newline = chunk_is_newline(pc);
884 cpd.output_trailspace = false;
885 }
886
887 if (pc->tracking != nullptr)
888 {
889 LOG_FMT(LGUY, " Tracking info are: \n");
890 LOG_FMT(LGUY, " number of track(s) %zu\n", pc->tracking->size());
891 add_text("<a title=\"");
892 char tempText[80];
893
894 for (size_t track = 0; track < pc->tracking->size(); track++)
895 {
896 track_list *A = pc->tracking;
897 Track_nr B = A->at(track);
898 size_t Bfirst = B.first;
899 char *Bsecond = B.second;
900
901 sprintf(tempText, "%zu", Bfirst);
902 add_text(tempText);
903 add_text(",");
904
905 if (track == pc->tracking->size() - 1)
906 {
907 sprintf(tempText, "%s", Bsecond);
908 add_text(tempText);
909 }
910 LOG_FMT(LGUY, " %zu, tracking number is %zu\n", track, Bfirst);
911 LOG_FMT(LGUY, " %zu, rule is %s\n", track, Bsecond);
912 }
913
914 add_text("\"><font color=\"red\">M</font></a>");
915 write_in_tracking = true;
916 }
917 }
918
919 if (tracking)
920 {
921 add_text("</pre>\n");
922 add_text("</body>\n");
923 add_text("</html>\n");
924 }
925 } // output_text
926
927
dump_step(const char * filename,const char * step_description)928 void dump_step(const char *filename, const char *step_description)
929 {
930 static int file_num = 0;
931 char buffer[256];
932 FILE *dump_file;
933
934 if ( filename == nullptr
935 || strlen(filename) == 0)
936 {
937 return;
938 }
939
940 // On the first call, also save the options in use
941 if (file_num == 0)
942 {
943 snprintf(buffer, 256, "New dump file: %s_%03d.log - Options in use", filename, file_num);
944 log_rule_B(buffer);
945
946 snprintf(buffer, 256, "%s_%03d.log", filename, file_num);
947 ++file_num;
948
949 dump_file = fopen(buffer, "wb");
950
951 if (dump_file != nullptr)
952 {
953 save_option_file(dump_file, false, true);
954 fclose(dump_file);
955 }
956 }
957 snprintf(buffer, 256, "New dump file: %s_%03d.log - %s", filename, file_num, step_description);
958 log_rule_B(buffer);
959
960 snprintf(buffer, 256, "%s_%03d.log", filename, file_num);
961 ++file_num;
962
963 dump_file = fopen(buffer, "wb");
964
965 if (dump_file != nullptr)
966 {
967 fprintf(dump_file, "STEP: %s\n--------------\n", step_description);
968 output_parsed(dump_file, false);
969 fclose(dump_file);
970 }
971 } // dump_step
972
973
cmt_parse_lead(const unc_text & line,bool is_last)974 static size_t cmt_parse_lead(const unc_text &line, bool is_last)
975 {
976 size_t len = 0;
977
978 while ( len < 32
979 && len < line.size()) // TODO what is the meaning of 32?
980 {
981 if ( len > 0
982 && line[len] == '/')
983 {
984 // ignore combined comments
985 size_t tmp = len + 1;
986
987 while ( tmp < line.size()
988 && unc_isspace(line[tmp]))
989 {
990 tmp++;
991 }
992
993 if ( tmp < line.size()
994 && line[tmp] == '/')
995 {
996 return(1);
997 }
998 break;
999 }
1000 else if (strchr("*|\\#+", line[len]) == nullptr)
1001 {
1002 break; // none of the characters '*|\#+' found in line
1003 }
1004 len++;
1005 }
1006
1007 if (len > 30) // TODO: what is the meaning of 30?
1008 {
1009 return(1);
1010 }
1011
1012 if ( len > 0
1013 && ( len >= line.size()
1014 || unc_isspace(line[len])))
1015 {
1016 return(len);
1017 }
1018
1019 if ( len == 1
1020 && line[0] == '*')
1021 {
1022 return(len);
1023 }
1024
1025 if ( is_last
1026 && len > 0)
1027 {
1028 return(len);
1029 }
1030 return(0);
1031 } // cmt_parse_lead
1032
1033
1034 /**
1035 * Eat whitespace characters starting at the specified index in the forward or reverse direction
1036 * within a single line
1037 * @param str the input string containing the comment text
1038 * @param idx the starting index
1039 * @param forward if true, searches in the forward direction;
1040 * if false, searches in the reverse direction
1041 * @return the first index at which a non-whitespace character is encountered, including
1042 * a newline character
1043 */
1044 template<typename String>
eat_line_whitespace(const String & str,int idx,bool forward=true)1045 static int eat_line_whitespace(const String &str,
1046 int idx, bool
1047 forward = true)
1048 {
1049 auto advance_index = [&](int i)
1050 {
1051 return(forward ? i + 1 : i - 1);
1052 };
1053
1054 auto index_in_range = [&](int i)
1055 {
1056 // TODO: the following BREAKS with source code formatting; uncrustify seems to
1057 // think that the following is a template. This will NEED to be fixed!!!
1058 // For now, reformulate the statement
1059 //return(forward ? i<int(str.size()) : i> = 0);
1060 return(forward ? (i < int(str.size())) : (i >= 0));
1061 };
1062
1063 while ( index_in_range(idx)
1064 && str[idx] != '\n'
1065 && str[idx] != '\r'
1066 && unc_isspace(str[idx]))
1067 {
1068 idx = advance_index(idx);
1069 }
1070 return(idx);
1071 } // eat_line_whitespace
1072
1073
1074 /**
1075 * Returns whether or not a javaparam tag is the leading
1076 * text in a comment line, with only a sequence of whitespace
1077 * and/or '*' characters preceding it
1078 * @param str the input string containing the comment text
1079 * @param idx the starting index
1080 * @return true/false
1081 */
1082 template<typename String>
javaparam_tag_is_start_of_line(const String & str,int idx)1083 static bool javaparam_tag_is_start_of_line(const String &str, int idx)
1084 {
1085 idx = eat_line_whitespace(str,
1086 str[idx] == '@' ? idx - 1 : idx,
1087 false);
1088
1089 while (true)
1090 {
1091 if ( idx < 0
1092 || str[idx] == '\n'
1093 || str[idx] == '\r')
1094 {
1095 return(true);
1096 }
1097
1098 if (str[idx] == '*')
1099 {
1100 idx = eat_line_whitespace(str,
1101 idx - 1,
1102 false);
1103 }
1104 else
1105 {
1106 return(false);
1107 }
1108 }
1109 } // javaparam_tag_is_start_of_line
1110
1111
1112 /**
1113 * Attempts to match a doxygen/javadoc-style comment tag
1114 * @param str the input string containing the comment text
1115 * @param idx the starting index
1116 * @return the index of the character immediately following the matched tag,
1117 * or -1 if no match is found
1118 */
match_doxygen_javadoc_tag(const std::wstring & str,size_t idx)1119 static int match_doxygen_javadoc_tag(const std::wstring &str, size_t idx)
1120 {
1121 std::wsmatch match;
1122
1123 if (str[idx] == L'@')
1124 {
1125 std::wregex criteria(L"(@(?:author|"
1126 L"deprecated|"
1127 L"exception|"
1128 L"param(?:\\s*\\[\\s*(?:in\\s*,\\s*out|in|out)\\s*\\])?|"
1129 L"return|"
1130 L"see|"
1131 L"since|"
1132 L"throws|"
1133 L"version)\\b)");
1134
1135 if ( std::regex_search(str.cbegin() + idx, str.cend(), match, criteria)
1136 && match[1].matched
1137 && match.position(1) == std::wsmatch::difference_type(0))
1138 {
1139 std::set<std::wstring> block_tags =
1140 {
1141 L"@author",
1142 L"@deprecated",
1143 L"@exception",
1144 L"@param",
1145 L"@param[in]",
1146 L"@param[in,out]",
1147 L"@param[out]",
1148 L"@return",
1149 L"@see",
1150 L"@since",
1151 L"@throws",
1152 L"@version"
1153 };
1154 std::wstring result(match[1]);
1155 result.erase(std::remove_if(result.begin(), result.end(), ::isspace), result.end());
1156 auto &&it_block_tag = block_tags.find(result);
1157
1158 if ( it_block_tag != block_tags.end()
1159 && javaparam_tag_is_start_of_line(str, idx))
1160 {
1161 return(int(idx + match[1].length()));
1162 }
1163 }
1164 }
1165 return(-1);
1166 } // match_javadoc_block_tag
1167
1168
calculate_doxygen_javadoc_indent_alignment(const std::wstring & str,size_t & doxygen_javadoc_param_name_indent,size_t & doxygen_javadoc_continuation_indent)1169 static void calculate_doxygen_javadoc_indent_alignment(const std::wstring &str,
1170 size_t &doxygen_javadoc_param_name_indent,
1171 size_t &doxygen_javadoc_continuation_indent)
1172 {
1173 log_rule_B("cmt_align_doxygen_javadoc_tags");
1174
1175 doxygen_javadoc_continuation_indent = 0;
1176 doxygen_javadoc_param_name_indent = 0;
1177
1178 if (!options::cmt_align_doxygen_javadoc_tags())
1179 {
1180 return;
1181 }
1182
1183 for (size_t idx = 0; idx < str.size(); ++idx)
1184 {
1185 int start_idx = idx;
1186 int end_idx = match_doxygen_javadoc_tag(str, start_idx);
1187
1188 if (end_idx > start_idx)
1189 {
1190 size_t block_tag_width = 1 + std::count_if(str.begin() + start_idx,
1191 str.begin() + end_idx,
1192 [](wchar_t ch) {
1193 return(!unc_isspace(ch));
1194 });
1195
1196 if (block_tag_width > doxygen_javadoc_param_name_indent)
1197 {
1198 doxygen_javadoc_param_name_indent = block_tag_width;
1199 }
1200 idx = eat_line_whitespace(str, end_idx);
1201
1202 size_t param_name_width = 0;
1203
1204 if (str.find(L"@param", start_idx) == size_t(start_idx))
1205 {
1206 param_name_width = 1;
1207
1208 while (true)
1209 {
1210 while ( !unc_isspace(str[idx])
1211 && str[idx] != ',')
1212 {
1213 ++param_name_width;
1214 ++idx;
1215 }
1216 idx = eat_line_whitespace(str, idx);
1217
1218 if (str[idx] != ',')
1219 {
1220 break;
1221 }
1222 param_name_width += 2;
1223 idx = eat_line_whitespace(str, idx + 1);
1224 }
1225 }
1226
1227 if (param_name_width > doxygen_javadoc_continuation_indent)
1228 {
1229 doxygen_javadoc_continuation_indent = param_name_width;
1230 }
1231 }
1232 }
1233
1234 if (doxygen_javadoc_param_name_indent > 0)
1235 {
1236 log_rule_B("cmt_sp_before_doxygen_javadoc_tags");
1237
1238 doxygen_javadoc_param_name_indent += options::cmt_sp_before_doxygen_javadoc_tags();
1239 doxygen_javadoc_continuation_indent += doxygen_javadoc_param_name_indent;
1240 }
1241 } // calculate_doxygen_javadoc_indent_alignment
1242
1243
calculate_comment_body_indent(cmt_reflow & cmt,const unc_text & str)1244 static void calculate_comment_body_indent(cmt_reflow &cmt, const unc_text &str)
1245 {
1246 cmt.xtra_indent = 0;
1247
1248 log_rule_B("cmt_indent_multi");
1249
1250 if (!options::cmt_indent_multi())
1251 {
1252 return;
1253 }
1254 size_t idx = 0;
1255 size_t len = str.size();
1256 size_t last_len = 0;
1257
1258 log_rule_B("cmt_multi_check_last");
1259
1260 if (options::cmt_multi_check_last())
1261 {
1262 // find the last line length
1263 for (idx = len - 1; idx > 0; idx--)
1264 {
1265 if ( str[idx] == '\n'
1266 || str[idx] == '\r')
1267 {
1268 idx++;
1269
1270 while ( idx < len
1271 && ( str[idx] == ' '
1272 || str[idx] == '\t'))
1273 {
1274 idx++;
1275 }
1276 last_len = len - idx;
1277 break;
1278 }
1279 }
1280 }
1281 // find the first line length
1282 size_t first_len = 0;
1283
1284 for (idx = 0; idx < len; idx++)
1285 {
1286 if ( str[idx] == '\n'
1287 || str[idx] == '\r')
1288 {
1289 first_len = idx;
1290
1291 while ( str[first_len - 1] == ' '
1292 || str[first_len - 1] == '\t')
1293 {
1294 if (first_len == 0)
1295 {
1296 fprintf(stderr, "%s(%d): first_len is ZERO, cannot be decremented.\n",
1297 __func__, __LINE__);
1298 log_flush(true);
1299 exit(EX_SOFTWARE);
1300 }
1301 first_len--;
1302 }
1303
1304 // handle DOS endings
1305 if ( str[idx] == '\r'
1306 && str[idx + 1] == '\n')
1307 {
1308 idx++;
1309 }
1310 idx++;
1311 break;
1312 }
1313 }
1314
1315 // Scan the second line
1316 size_t width = 0;
1317
1318 for ( ; idx < len - 1; idx++)
1319 {
1320 if ( str[idx] == ' '
1321 || str[idx] == '\t')
1322 {
1323 if (width > 0)
1324 {
1325 break;
1326 }
1327 continue;
1328 }
1329
1330 if ( str[idx] == '\n'
1331 || str[idx] == '\r')
1332 {
1333 break; // Done with second line
1334 }
1335
1336 // Count the leading chars
1337 if ( str[idx] == '*'
1338 || str[idx] == '|'
1339 || str[idx] == '\\'
1340 || str[idx] == '#'
1341 || str[idx] == '+')
1342 {
1343 width++;
1344 }
1345 else
1346 {
1347 if ( width != 1
1348 || str[idx - 1] != '*')
1349 {
1350 width = 0;
1351 }
1352 break;
1353 }
1354 }
1355
1356 // LOG_FMT(LSYS, "%s: first=%d last=%d width=%d\n", __func__, first_len, last_len, width);
1357
1358 /*
1359 * If the first and last line are the same length and don't contain any
1360 * alphanumeric chars and (the first line len > cmt_multi_first_len_minimum
1361 * or the second leader is the same as the first line length), then the
1362 * indent is 0.
1363 */
1364 log_rule_B("cmt_multi_first_len_minimum");
1365
1366 if ( first_len == last_len
1367 && ( first_len > options::cmt_multi_first_len_minimum()
1368 || first_len == width))
1369 {
1370 return;
1371 }
1372 cmt.xtra_indent = (width == 2) ? 0 : 1;
1373 } // calculate_comment_body_indent
1374
1375
1376 // TODO: can we use search_next_chunk here?
get_next_function(chunk_t * pc)1377 static chunk_t *get_next_function(chunk_t *pc)
1378 {
1379 while ((pc = chunk_get_next(pc)) != nullptr)
1380 {
1381 if ( chunk_is_token(pc, CT_FUNC_DEF)
1382 || chunk_is_token(pc, CT_FUNC_PROTO)
1383 || chunk_is_token(pc, CT_FUNC_CLASS_DEF)
1384 || chunk_is_token(pc, CT_FUNC_CLASS_PROTO)
1385 || chunk_is_token(pc, CT_OC_MSG_DECL))
1386 {
1387 return(pc);
1388 }
1389 }
1390 return(nullptr);
1391 }
1392
1393
get_next_class(chunk_t * pc)1394 static chunk_t *get_next_class(chunk_t *pc)
1395 {
1396 return(chunk_get_next(chunk_search_next_cat(pc, CT_CLASS)));
1397 }
1398
1399
get_prev_category(chunk_t * pc)1400 static chunk_t *get_prev_category(chunk_t *pc)
1401 {
1402 return(chunk_search_prev_cat(pc, CT_OC_CATEGORY));
1403 }
1404
1405
get_next_scope(chunk_t * pc)1406 static chunk_t *get_next_scope(chunk_t *pc)
1407 {
1408 return(chunk_search_next_cat(pc, CT_OC_SCOPE));
1409 }
1410
1411
get_prev_oc_class(chunk_t * pc)1412 static chunk_t *get_prev_oc_class(chunk_t *pc)
1413 {
1414 return(chunk_search_prev_cat(pc, CT_OC_CLASS));
1415 }
1416
1417
next_up(const unc_text & text,size_t idx,const unc_text & tag)1418 static int next_up(const unc_text &text, size_t idx, const unc_text &tag)
1419 {
1420 size_t offs = 0;
1421
1422 while ( idx < text.size()
1423 && unc_isspace(text[idx]))
1424 {
1425 idx++;
1426 offs++;
1427 }
1428
1429 if (text.startswith(tag, idx))
1430 {
1431 return(offs);
1432 }
1433 return(-1);
1434 }
1435
1436
add_comment_text(const unc_text & text,cmt_reflow & cmt,bool esc_close,size_t continuation_indent)1437 static void add_comment_text(const unc_text &text,
1438 cmt_reflow &cmt,
1439 bool esc_close,
1440 size_t continuation_indent)
1441 {
1442 bool was_star = false;
1443 bool was_slash = false;
1444 bool in_word = false;
1445 size_t len = text.size();
1446 size_t ch_cnt = 0; // chars since newline
1447
1448 // If the '//' is included write it first else we may wrap an empty line
1449 size_t idx = 0;
1450
1451 if (text.startswith("//"))
1452 {
1453 add_text("//");
1454 idx += 2;
1455
1456 while (unc_isspace(text[idx]))
1457 {
1458 add_char(text[idx++]);
1459 }
1460 }
1461
1462 for ( ; idx < len; idx++) // TODO: avoid modifying idx in loop
1463 {
1464 // Split the comment
1465 if (text[idx] == '\n')
1466 {
1467 in_word = false;
1468 add_char('\n');
1469 cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
1470
1471 if (cmt.xtra_indent > 0)
1472 {
1473 add_char(' ');
1474 }
1475 // hack to get escaped newlines to align and not duplicate the leading '//'
1476 int tmp = next_up(text, idx + 1, "//");
1477
1478 if (tmp < 0)
1479 {
1480 add_text(cmt.cont_text);
1481 }
1482 else
1483 {
1484 idx += tmp;
1485 }
1486 ch_cnt = 0;
1487 }
1488 else if ( cmt.reflow
1489 && text[idx] == ' '
1490 && options::cmt_width() > 0
1491 && ( cpd.column > options::cmt_width()
1492 || ( ch_cnt > 1
1493 && next_word_exceeds_limit(text, idx))))
1494 {
1495 log_rule_B("cmt_width");
1496 in_word = false;
1497 add_char('\n');
1498 cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
1499
1500 if (cmt.xtra_indent > 0)
1501 {
1502 add_char(' ');
1503 }
1504 // The number of spaces to insert after the star on subsequent comment lines.
1505 log_rule_B("cmt_sp_after_star_cont");
1506
1507 /**
1508 * calculate the output column
1509 */
1510 size_t column = options::cmt_sp_after_star_cont();
1511
1512 if ( text[idx + 1] == 42 // this is star *
1513 && text[idx + 2] == 47) // this is /
1514 {
1515 LOG_FMT(LCONTTEXT, "%s(%d): we have a comment end\n",
1516 __func__, __LINE__);
1517
1518 column += cmt.column;
1519 }
1520 else
1521 {
1522 add_text(cmt.cont_text);
1523
1524 if (continuation_indent > 0)
1525 {
1526 if (options::cmt_align_doxygen_javadoc_tags())
1527 {
1528 log_rule_B("cmt_align_doxygen_javadoc_tags");
1529 }
1530 else if (options::cmt_reflow_indent_to_paragraph_start())
1531 {
1532 log_rule_B("cmt_reflow_indent_to_paragraph_start");
1533 }
1534 column += continuation_indent;
1535
1536 log_rule_B("cmt_sp_after_star_cont");
1537
1538 if (column >= options::cmt_sp_after_star_cont())
1539 {
1540 column -= options::cmt_sp_after_star_cont();
1541 }
1542 }
1543 /**
1544 * count the number trailing spaces in the comment continuation text
1545 */
1546 size_t num_trailing_sp = 0;
1547
1548 while ( num_trailing_sp < cmt.cont_text.size()
1549 && unc_isspace(cmt.cont_text[cmt.cont_text.size() - 1 - num_trailing_sp]))
1550 {
1551 ++num_trailing_sp;
1552 }
1553 column += cpd.column;
1554
1555 if (column >= num_trailing_sp)
1556 {
1557 column -= num_trailing_sp;
1558 }
1559 }
1560 output_to_column(column,
1561 false);
1562 ch_cnt = 0;
1563 }
1564 else
1565 {
1566 // Escape a C closure in a CPP comment
1567 if ( esc_close
1568 && ( ( was_star
1569 && text[idx] == '/')
1570 || ( was_slash
1571 && text[idx] == '*')))
1572 {
1573 add_char(' ');
1574 }
1575
1576 if ( !in_word
1577 && !unc_isspace(text[idx]))
1578 {
1579 cmt.word_count++;
1580 }
1581 in_word = !unc_isspace(text[idx]);
1582
1583 add_char(text[idx]);
1584 was_star = (text[idx] == '*');
1585 was_slash = (text[idx] == '/');
1586 ch_cnt++;
1587 }
1588 }
1589 } // add_comment_text
1590
1591
output_cmt_start(cmt_reflow & cmt,chunk_t * pc)1592 static void output_cmt_start(cmt_reflow &cmt, chunk_t *pc)
1593 {
1594 cmt.pc = pc;
1595 cmt.column = pc->column;
1596 cmt.brace_col = pc->column_indent;
1597 cmt.base_col = pc->column_indent;
1598 cmt.word_count = 0;
1599 cmt.xtra_indent = 0;
1600 cmt.cont_text.clear();
1601 cmt.reflow = false;
1602
1603 // Issue #2752
1604 log_rule_B("cmt_insert_file_header");
1605 log_rule_B("cmt_insert_file_footer");
1606 log_rule_B("cmt_insert_func_header)");
1607 log_rule_B("cmt_insert_class_header");
1608 log_rule_B("cmt_insert_oc_msg_header");
1609
1610 if ( options::cmt_insert_file_header().size() > 0
1611 || options::cmt_insert_file_footer().size() > 0
1612 || options::cmt_insert_func_header().size() > 0
1613 || options::cmt_insert_class_header().size() > 0
1614 || options::cmt_insert_oc_msg_header().size() > 0)
1615 {
1616 LOG_FMT(LCONTTEXT, "%s(%d): cmt_insert_file\n", __func__, __LINE__);
1617 do_kw_subst(pc);
1618 }
1619 else
1620 {
1621 LOG_FMT(LCONTTEXT, "%s(%d): no cmt_insert_file\n", __func__, __LINE__);
1622 }
1623
1624 if (cmt.brace_col == 0)
1625 {
1626 log_rule_B("output_tab_size");
1627 cmt.brace_col = 1 + (pc->brace_level * options::output_tab_size());
1628 }
1629 // LOG_FMT(LSYS, "%s: line %zd, brace=%zd base=%zd col=%zd orig=%zd aligned=%x\n",
1630 // __func__, pc->orig_line, cmt.brace_col, cmt.base_col, cmt.column, pc->orig_col,
1631 // pc->flags & (PCF_WAS_ALIGNED | PCF_RIGHT_COMMENT));
1632
1633 if ( get_chunk_parent_type(pc) == CT_COMMENT_START
1634 || get_chunk_parent_type(pc) == CT_COMMENT_WHOLE)
1635 {
1636 log_rule_B("indent_col1_comment");
1637
1638 if ( !options::indent_col1_comment()
1639 && pc->orig_col == 1
1640 && !pc->flags.test(PCF_INSERTED))
1641 {
1642 cmt.column = 1;
1643 cmt.base_col = 1;
1644 cmt.brace_col = 1;
1645 }
1646 }
1647 // tab aligning code
1648 log_rule_B("indent_cmt_with_tabs");
1649
1650 if ( options::indent_cmt_with_tabs()
1651 && ( get_chunk_parent_type(pc) == CT_COMMENT_END
1652 || get_chunk_parent_type(pc) == CT_COMMENT_WHOLE))
1653 {
1654 cmt.column = align_tab_column(cmt.column - 1);
1655 // LOG_FMT(LSYS, "%s: line %d, orig:%d new:%d\n",
1656 // __func__, pc->orig_line, pc->column, cmt.column);
1657 pc->column = cmt.column;
1658 }
1659 cmt.base_col = cmt.column;
1660
1661 // LOG_FMT(LSYS, "%s: -- brace=%d base=%d col=%d\n",
1662 // __func__, cmt.brace_col, cmt.base_col, cmt.column);
1663
1664 // Bump out to the column
1665 cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
1666 } // output_cmt_start
1667
1668
can_combine_comment(chunk_t * pc,cmt_reflow & cmt)1669 static bool can_combine_comment(chunk_t *pc, cmt_reflow &cmt)
1670 {
1671 // We can't combine if there is something other than a newline next
1672 if (get_chunk_parent_type(pc) == CT_COMMENT_START)
1673 {
1674 return(false);
1675 }
1676 // next is a newline for sure, make sure it is a single newline
1677 chunk_t *next = chunk_get_next(pc);
1678
1679 if ( next != nullptr
1680 && next->nl_count == 1)
1681 {
1682 // Make sure the comment is the same type at the same column
1683 next = chunk_get_next(next);
1684
1685 if ( chunk_is_token(next, pc->type)
1686 && ( ( next->column == 1
1687 && pc->column == 1)
1688 || ( next->column == cmt.base_col
1689 && pc->column == cmt.base_col)
1690 || ( next->column > cmt.base_col
1691 && get_chunk_parent_type(pc) == CT_COMMENT_END)))
1692 {
1693 return(true);
1694 }
1695 }
1696 return(false);
1697 } // can_combine_comment
1698
1699
output_comment_c(chunk_t * first)1700 static chunk_t *output_comment_c(chunk_t *first)
1701 {
1702 cmt_reflow cmt;
1703
1704 output_cmt_start(cmt, first);
1705 log_rule_B("cmt_reflow_mode");
1706 cmt.reflow = (options::cmt_reflow_mode() != 1);
1707
1708 // See if we can combine this comment with the next comment
1709 log_rule_B("cmt_c_group");
1710
1711 if ( !options::cmt_c_group()
1712 || !can_combine_comment(first, cmt))
1713 {
1714 // Just add the single comment
1715 log_rule_B("cmt_star_cont");
1716 cmt.cont_text = options::cmt_star_cont() ? " * " : " ";
1717 LOG_CONTTEXT();
1718
1719 log_rule_B("cmt_trailing_single_line_c_to_cpp");
1720
1721 if (options::cmt_trailing_single_line_c_to_cpp() && chunk_is_last_on_line(*first))
1722 {
1723 add_text("//");
1724
1725 unc_text tmp;
1726 tmp.set(first->str, 2, first->len() - 4);
1727 cmt_trim_whitespace(tmp, false);
1728 add_comment_text(tmp, cmt, false);
1729 }
1730 else
1731 {
1732 add_comment_text(first->str, cmt, false);
1733 }
1734 return(first);
1735 }
1736 log_rule_B("cmt_star_cont");
1737 cmt.cont_text = options::cmt_star_cont() ? " *" : " ";
1738 LOG_CONTTEXT();
1739
1740 add_text("/*");
1741
1742 log_rule_B("cmt_c_nl_start");
1743
1744 if (options::cmt_c_nl_start())
1745 {
1746 add_comment_text("\n", cmt, false);
1747 }
1748 chunk_t *pc = first;
1749 unc_text tmp;
1750
1751 while (can_combine_comment(pc, cmt))
1752 {
1753 LOG_FMT(LCONTTEXT, "%s(%d): text() is '%s'\n",
1754 __func__, __LINE__, pc->text());
1755 tmp.set(pc->str, 2, pc->len() - 4);
1756
1757 if ( cpd.last_char == '*'
1758 && ( tmp[0] == '/'
1759 || tmp[0] != ' ')) // Issue #1908
1760 {
1761 LOG_FMT(LCONTTEXT, "%s(%d): add_text a " "\n", __func__, __LINE__);
1762 add_text(" ");
1763 }
1764 // In case of reflow, original comment could contain trailing spaces before closing the comment, we don't need them after reflow
1765 LOG_FMT(LCONTTEXT, "%s(%d): trim\n", __func__, __LINE__);
1766 cmt_trim_whitespace(tmp, false);
1767 LOG_FMT(LCONTTEXT, "%s(%d): add_comment_text(tmp is '%s')\n",
1768 __func__, __LINE__, tmp.c_str());
1769 add_comment_text(tmp, cmt, false);
1770 LOG_FMT(LCONTTEXT, "%s(%d): add_comment_text(newline)\n",
1771 __func__, __LINE__);
1772 add_comment_text("\n", cmt, false);
1773 pc = chunk_get_next(pc);
1774 pc = chunk_get_next(pc);
1775 }
1776 tmp.set(pc->str, 2, pc->len() - 4);
1777
1778 if ( cpd.last_char == '*'
1779 && tmp[0] == '/')
1780 {
1781 add_text(" ");
1782 }
1783 // In case of reflow, original comment could contain trailing spaces before closing the comment, we don't need them after reflow
1784 cmt_trim_whitespace(tmp, false);
1785 add_comment_text(tmp, cmt, false);
1786
1787 log_rule_B("cmt_c_nl_end");
1788
1789 if (options::cmt_c_nl_end())
1790 {
1791 cmt.cont_text = " ";
1792 LOG_CONTTEXT();
1793 add_comment_text("\n", cmt, false);
1794 }
1795 add_comment_text("*/", cmt, false);
1796 return(pc);
1797 } // output_comment_c
1798
1799
output_comment_cpp(chunk_t * first)1800 static chunk_t *output_comment_cpp(chunk_t *first)
1801 {
1802 cmt_reflow cmt;
1803
1804 output_cmt_start(cmt, first);
1805 log_rule_B("cmt_reflow_mode");
1806 cmt.reflow = (options::cmt_reflow_mode() != 1);
1807
1808 unc_text leadin = "//"; // default setting to keep previous behaviour
1809
1810 // If true, space is added with sp_cmt_cpp_start will be added after doxygen
1811 // sequences like '///', '///<', '//!' and '//!<'.
1812 log_rule_B("sp_cmt_cpp_doxygen");
1813
1814 if (options::sp_cmt_cpp_doxygen()) // special treatment for doxygen style comments (treat as unity)
1815 {
1816 const char *sComment = first->text();
1817 bool grouping = (sComment[2] == '@');
1818 size_t brace = 3;
1819
1820 if ( sComment[2] == '/'
1821 || sComment[2] == '!') // doxygen style found!
1822 {
1823 leadin += sComment[2]; // at least one additional char (either "///" or "//!")
1824
1825 if (sComment[3] == '<') // and a further one (either "///<" or "//!<")
1826 {
1827 leadin += '<';
1828 }
1829 else
1830 {
1831 grouping = (sComment[3] == '@'); // or a further one (grouping)
1832 brace = 4;
1833 }
1834 }
1835
1836 if ( grouping
1837 && ( sComment[brace] == '{'
1838 || sComment[brace] == '}'))
1839 {
1840 leadin += '@';
1841 leadin += sComment[brace];
1842 }
1843 }
1844 // Special treatment for Qt translator or meta-data comments (treat as unity)
1845 // If true, space is added with sp_cmt_cpp_start will be added after Qt
1846 // translator or meta-data comments like '//:', '//=', and '//~'.
1847 log_rule_B("sp_cmt_cpp_qttr");
1848
1849 if (options::sp_cmt_cpp_qttr())
1850 {
1851 const int c = first->str[2];
1852
1853 if ( c == ':'
1854 || c == '='
1855 || c == '~')
1856 {
1857 leadin += c;
1858 }
1859 }
1860 // CPP comments can't be grouped unless they are converted to C comments
1861 log_rule_B("cmt_cpp_to_c");
1862
1863 if (!options::cmt_cpp_to_c())
1864 {
1865 auto const *cmt_text = first->str.c_str() + 2;
1866 // Add or remove space after the opening of a C++ comment,
1867 // i.e. '// A' vs. '//A'.
1868 auto *sp_cmt = &options::sp_cmt_cpp_start;
1869
1870 cmt.cont_text = leadin;
1871
1872 // Get start of comment text
1873 while ( *cmt_text != '\0'
1874 && unc_isspace(*cmt_text))
1875 {
1876 ++cmt_text;
1877 }
1878
1879 // Determine if we are dealing with a region marker
1880 if ( ( !first->prev
1881 || first->prev->orig_line != first->orig_line)
1882 && ( strncmp(cmt_text, "BEGIN", 5) == 0
1883 || strncmp(cmt_text, "END", 3) == 0))
1884 {
1885 // If sp_cmt_cpp_region is not ignore, use that instead of
1886 // sp_cmt_cpp_start
1887 if (options::sp_cmt_cpp_region() != IARF_IGNORE)
1888 {
1889 sp_cmt = &options::sp_cmt_cpp_region;
1890 }
1891 }
1892 // Add or remove space after the opening of a C++ comment,
1893 // i.e. '// A' vs. '//A'.
1894 log_rule_B(sp_cmt->name());
1895
1896 if ((*sp_cmt)() != IARF_REMOVE)
1897 {
1898 cmt.cont_text += ' ';
1899 }
1900 LOG_CONTTEXT();
1901
1902 // Add or remove space after the opening of a C++ comment,
1903 // i.e. '// A' vs. '//A'.
1904 log_rule_B(sp_cmt->name());
1905
1906 if ((*sp_cmt)() == IARF_IGNORE)
1907 {
1908 add_comment_text(first->str, cmt, false);
1909 }
1910 else
1911 {
1912 size_t iLISz = leadin.size();
1913 unc_text tmp(first->str, 0, iLISz);
1914 add_comment_text(tmp, cmt, false);
1915
1916 tmp.set(first->str, iLISz, first->len() - iLISz);
1917
1918 // Add or remove space after the opening of a C++ comment,
1919 // i.e. '// A' vs. '//A'.
1920 log_rule_B("sp_cmt_cpp_start");
1921
1922 if ((*sp_cmt)() & IARF_REMOVE)
1923 {
1924 while ( (tmp.size() > 0)
1925 && unc_isspace(tmp[0]))
1926 {
1927 tmp.pop_front();
1928 }
1929 }
1930
1931 if (tmp.size() > 0)
1932 {
1933 // Add or remove space after the opening of a C++ comment,
1934 // i.e. '// A' vs. '//A'.
1935 log_rule_B("sp_cmt_cpp_start");
1936
1937 if ((*sp_cmt)() & IARF_ADD)
1938 {
1939 if ( !unc_isspace(tmp[0])
1940 && (tmp[0] != '/'))
1941 {
1942 add_comment_text(" ", cmt, false);
1943 }
1944 }
1945 add_comment_text(tmp, cmt, false);
1946 }
1947 }
1948 return(first);
1949 }
1950 // We are going to convert the CPP comments to C comments
1951 log_rule_B("cmt_star_cont");
1952 cmt.cont_text = options::cmt_star_cont() ? " * " : " ";
1953 LOG_CONTTEXT();
1954
1955 unc_text tmp;
1956
1957 // See if we can combine this comment with the next comment
1958 log_rule_B("cmt_cpp_group");
1959
1960 if ( !options::cmt_cpp_group()
1961 || !can_combine_comment(first, cmt))
1962 {
1963 // nothing to group: just output a single line
1964 add_text("/*");
1965
1966 // patch # 32, 2012-03-23
1967 // Add or remove space after the opening of a C++ comment,
1968 // i.e. '// A' vs. '//A'.
1969 log_rule_B("sp_cmt_cpp_start");
1970
1971 if ( !unc_isspace(first->str[2])
1972 && (options::sp_cmt_cpp_start() & IARF_ADD))
1973 {
1974 add_char(' ');
1975 }
1976 tmp.set(first->str, 2, first->len() - 2);
1977 add_comment_text(tmp, cmt, true);
1978 add_text(" */");
1979 return(first);
1980 }
1981 add_text("/*");
1982
1983 log_rule_B("cmt_cpp_nl_start");
1984
1985 if (options::cmt_cpp_nl_start())
1986 {
1987 add_comment_text("\n", cmt, false);
1988 }
1989 else
1990 {
1991 add_text(" ");
1992 }
1993 chunk_t *pc = first;
1994 int offs;
1995
1996 while (can_combine_comment(pc, cmt))
1997 {
1998 offs = unc_isspace(pc->str[2]) ? 1 : 0;
1999 tmp.set(pc->str, 2 + offs, pc->len() - (2 + offs));
2000
2001 if ( cpd.last_char == '*'
2002 && tmp[0] == '/')
2003 {
2004 add_text(" ");
2005 }
2006 add_comment_text(tmp, cmt, true);
2007 add_comment_text("\n", cmt, false);
2008 pc = chunk_get_next(chunk_get_next(pc));
2009 }
2010 offs = unc_isspace(pc->str[2]) ? 1 : 0;
2011 tmp.set(pc->str, 2 + offs, pc->len() - (2 + offs));
2012 add_comment_text(tmp, cmt, true);
2013
2014 log_rule_B("cmt_cpp_nl_end");
2015
2016 if (options::cmt_cpp_nl_end())
2017 {
2018 cmt.cont_text = "";
2019 LOG_CONTTEXT();
2020 add_comment_text("\n", cmt, false);
2021 }
2022 add_comment_text(" */", cmt, false);
2023 return(pc);
2024 } // output_comment_cpp
2025
2026
cmt_trim_whitespace(unc_text & line,bool in_preproc)2027 static void cmt_trim_whitespace(unc_text &line, bool in_preproc)
2028 {
2029 // Remove trailing whitespace on the line
2030 while ( line.size() > 0
2031 && ( line.back() == ' '
2032 || line.back() == '\t'))
2033 {
2034 line.pop_back();
2035 }
2036
2037 // Shift back to the comment text, ...
2038 if ( in_preproc // if in a preproc ...
2039 && line.size() > 1 // with a line that holds ...
2040 && line.back() == '\\') // a backslash-newline ...
2041 {
2042 bool do_space = false;
2043
2044 // If there was any space before the backslash, change it to 1 space
2045 line.pop_back();
2046
2047 while ( line.size() > 0
2048 && ( line.back() == ' '
2049 || line.back() == '\t'))
2050 {
2051 do_space = true;
2052 line.pop_back();
2053 }
2054
2055 if (do_space)
2056 {
2057 line.append(' ');
2058 }
2059 line.append('\\');
2060 }
2061 } // cmt_trim_whitespace
2062
2063
2064 /**
2065 * Return an indexed-map of reflow fold end of line/beginning of line regex pairs read
2066 * from file
2067 */
get_reflow_fold_regex_map()2068 static std::map<std::size_t, std::pair<std::wregex, std::wregex> > get_reflow_fold_regex_map()
2069 {
2070 /**
2071 * TODO: should the following be static to prevent initializing it multiple times?
2072 */
2073 static std::map<std::size_t, std::pair<std::wregex, std::wregex> > regex_map;
2074
2075 if (regex_map.empty())
2076 {
2077 if (!options::cmt_reflow_fold_regex_file().empty())
2078 {
2079 std::wstring raw_wstring(cpd.reflow_fold_regex.raw.begin(),
2080 cpd.reflow_fold_regex.raw.end());
2081
2082 std::wregex criteria(L"\\s*(?:(?:(beg_of_next)|(end_of_prev))_line_regex)"
2083 "\\s*\\[\\s*([0-9]+)\\s*\\]\\s*=\\s*\"(.*)\"\\s*"
2084 "(?=\\r\\n|\\r|\\n|$)");
2085 std::wsregex_iterator it_regex(raw_wstring.cbegin(), raw_wstring.cend(), criteria);
2086 std::wsregex_iterator it_regex_end = std::wsregex_iterator();
2087
2088 while (it_regex != it_regex_end)
2089 {
2090 std::wsmatch match = *it_regex;
2091
2092 if ( (( match[1].matched
2093 || match[2].matched))
2094 && match[3].matched
2095 && match[4].matched)
2096 {
2097 auto &&index = std::stoi(match[3].str());
2098 std::wregex *p_wregex = match[1].matched ? ®ex_map[index].second
2099 : ®ex_map[index].first;
2100 *p_wregex = match[4].str();
2101 }
2102 ++it_regex;
2103 }
2104 }
2105 else
2106 {
2107 regex_map.emplace(0L, std::make_pair(L"[\\w,\\]\\)]$", L"^[\\w,\\[\\(]"));
2108 regex_map.emplace(1L, std::make_pair(L"\\.$", L"^[A-Z]"));
2109 }
2110 }
2111 return(regex_map);
2112 } // get_reflow_fold_regex_map
2113
2114
output_comment_multi(chunk_t * pc)2115 static void output_comment_multi(chunk_t *pc)
2116 {
2117 if (pc == nullptr)
2118 {
2119 return;
2120 }
2121 cmt_reflow cmt;
2122
2123 char copy[1000];
2124
2125 LOG_FMT(LCONTTEXT, "%s(%d): text() is '%s', type is %s, orig_col is %zu, column is %zu\n",
2126 __func__, __LINE__, pc->elided_text(copy), get_token_name(pc->type), pc->orig_col, pc->column);
2127
2128 output_cmt_start(cmt, pc);
2129 log_rule_B("cmt_reflow_mode");
2130 cmt.reflow = (options::cmt_reflow_mode() != 1);
2131
2132 size_t cmt_col = cmt.base_col;
2133 int col_diff = pc->orig_col - cmt.base_col;
2134
2135 calculate_comment_body_indent(cmt, pc->str);
2136
2137 log_rule_B("cmt_indent_multi");
2138 log_rule_B("cmt_star_cont");
2139 cmt.cont_text = !options::cmt_indent_multi() ? "" :
2140 (options::cmt_star_cont() ? "* " : " ");
2141 LOG_CONTTEXT();
2142
2143 std::wstring pc_wstring(pc->str.get().cbegin(),
2144 pc->str.get().cend());
2145
2146 size_t doxygen_javadoc_param_name_indent = 0;
2147 size_t doxygen_javadoc_continuation_indent = 0;
2148 size_t reflow_paragraph_continuation_indent = 0;
2149
2150 calculate_doxygen_javadoc_indent_alignment(pc_wstring,
2151 doxygen_javadoc_param_name_indent,
2152 doxygen_javadoc_continuation_indent);
2153
2154 size_t line_count = 0;
2155 size_t ccol = pc->column; // the col of subsequent comment lines
2156 size_t cmt_idx = 0;
2157 bool nl_end = false;
2158 bool doxygen_javadoc_indent_align = false;
2159 unc_text line;
2160
2161 /*
2162 * Get a map of regex pairs that define expressions to match at both the end
2163 * of the previous line and the beginning of the next line
2164 */
2165 auto &&cmt_reflow_regex_map = get_reflow_fold_regex_map();
2166
2167 line.clear();
2168 LOG_FMT(LCONTTEXT, "%s(%d): pc->len() is %zu\n",
2169 __func__, __LINE__, pc->len());
2170 //LOG_FMT(LCONTTEXT, "%s(%d): pc->str is %s\n",
2171 // __func__, __LINE__, pc->str.c_str());
2172
2173 /**
2174 * check for enable/disable processing comment strings that may
2175 * both be embedded within the same multi-line comment
2176 */
2177 auto disable_processing_cmt_idx = find_disable_processing_comment_marker(pc->str);
2178 auto enable_processing_cmt_idx = find_enable_processing_comment_marker(pc->str);
2179
2180 while (cmt_idx < pc->len())
2181 {
2182 int ch = pc->str[cmt_idx];
2183 cmt_idx++;
2184
2185 if ( cmt_idx > std::size_t(disable_processing_cmt_idx)
2186 && enable_processing_cmt_idx > disable_processing_cmt_idx)
2187 {
2188 auto length = enable_processing_cmt_idx - disable_processing_cmt_idx;
2189 unc_text verbatim_text(pc->str,
2190 disable_processing_cmt_idx,
2191 length);
2192
2193 add_text(verbatim_text);
2194
2195 cmt_idx = enable_processing_cmt_idx;
2196
2197 /**
2198 * check for additional enable/disable processing comment strings that may
2199 * both be embedded within the same multi-line comment
2200 */
2201 disable_processing_cmt_idx = find_disable_processing_comment_marker(pc->str,
2202 enable_processing_cmt_idx);
2203 enable_processing_cmt_idx = find_enable_processing_comment_marker(pc->str,
2204 enable_processing_cmt_idx);
2205
2206 /**
2207 * it's probably necessary to reset the line count to prevent line
2208 * continuation characters from being added to the end of the current line
2209 */
2210 line_count = 0;
2211 }
2212
2213 // handle the CRLF and CR endings. convert both to LF
2214 if (ch == '\r')
2215 {
2216 ch = '\n';
2217
2218 if ( cmt_idx < pc->len()
2219 && pc->str[cmt_idx] == '\n')
2220 {
2221 cmt_idx++;
2222 }
2223 }
2224
2225 // Find the start column
2226 if (line.size() == 0)
2227 {
2228 nl_end = false;
2229
2230 if (ch == ' ')
2231 {
2232 ccol++;
2233 continue;
2234 }
2235 else if (ch == '\t')
2236 {
2237 log_rule_B("input_tab_size");
2238 ccol = calc_next_tab_column(ccol, options::input_tab_size());
2239 continue;
2240 }
2241 else
2242 {
2243 LOG_FMT(LCONTTEXT, "%s(%d):ch is %d, %c\n", __func__, __LINE__, ch, char(ch));
2244 }
2245 }
2246
2247 if ( ch == '@'
2248 && options::cmt_align_doxygen_javadoc_tags())
2249 {
2250 int start_idx = cmt_idx - 1;
2251 int end_idx = match_doxygen_javadoc_tag(pc_wstring, start_idx);
2252
2253 if (end_idx > start_idx)
2254 {
2255 doxygen_javadoc_indent_align = true;
2256
2257 std::string match(pc->str.get().cbegin() + start_idx,
2258 pc->str.get().cbegin() + end_idx);
2259
2260 match.erase(std::remove_if(match.begin(),
2261 match.end(),
2262 ::isspace),
2263 match.end());
2264
2265 /**
2266 * remove whitespace before the '@'
2267 */
2268 int line_size_before_indent = line.size();
2269
2270 while ( line_size_before_indent > 0
2271 && unc_isspace(line.back()))
2272 {
2273 line.pop_back();
2274 --line_size_before_indent;
2275 }
2276 log_rule_B("cmt_sp_before_doxygen_javadoc_tags");
2277
2278 int indent = options::cmt_sp_before_doxygen_javadoc_tags();
2279
2280 while (indent-- > 0)
2281 {
2282 line.append(' ');
2283 }
2284 cmt_idx += (end_idx - start_idx);
2285 line.append(match.c_str());
2286
2287 bool is_exception_tag = match.find("@exception") != std::string::npos;
2288 bool is_param_tag = match.find("@param") != std::string::npos;
2289 bool is_throws_tag = match.find("@throws") != std::string::npos;
2290
2291 if ( is_exception_tag
2292 || is_param_tag
2293 || is_throws_tag)
2294 {
2295 indent = int(doxygen_javadoc_param_name_indent) - int(line.size());
2296
2297 while (indent-- > -line_size_before_indent)
2298 {
2299 line.append(' ');
2300 }
2301
2302 while (true)
2303 {
2304 cmt_idx = eat_line_whitespace(pc->str,
2305 cmt_idx);
2306
2307 while ( cmt_idx < pc->len()
2308 && !unc_isspace(pc->str[cmt_idx])
2309 && pc->str[cmt_idx] != ',')
2310 {
2311 line.append(pc->str[cmt_idx++]);
2312 }
2313
2314 if (!is_param_tag)
2315 {
2316 break;
2317 }
2318 /**
2319 * check for the possibility that comma-separated parameter names are present
2320 */
2321 cmt_idx = eat_line_whitespace(pc->str,
2322 cmt_idx);
2323
2324 if (pc->str[cmt_idx] != ',')
2325 {
2326 break;
2327 }
2328 ++cmt_idx;
2329 line.append(", ");
2330 }
2331 }
2332 cmt_idx = eat_line_whitespace(pc->str,
2333 cmt_idx);
2334 indent = int(doxygen_javadoc_continuation_indent) - int(line.size());
2335
2336 while (indent-- > -line_size_before_indent)
2337 {
2338 line.append(' ');
2339 }
2340
2341 while ( cmt_idx < pc->len()
2342 && !unc_isspace(pc->str[cmt_idx]))
2343 {
2344 line.append(pc->str[cmt_idx++]);
2345 }
2346 continue;
2347 }
2348 }
2349 /*
2350 * Now see if we need/must fold the next line with the current to enable
2351 * full reflow
2352 */
2353 log_rule_B("cmt_reflow_mode");
2354
2355 if ( options::cmt_reflow_mode() == 2
2356 && ch == '\n'
2357 && cmt_idx < pc->len())
2358 {
2359 int next_nonempty_line = -1;
2360 int prev_nonempty_line = -1;
2361 size_t nwidx = line.size();
2362
2363 // strip trailing whitespace from the line collected so far
2364 while (nwidx > 0)
2365 {
2366 nwidx--;
2367
2368 if ( prev_nonempty_line < 0
2369 && !unc_isspace(line[nwidx])
2370 && line[nwidx] != '*' // block comment: skip '*' at end of line
2371 && (pc->flags.test(PCF_IN_PREPROC)
2372 ? ( line[nwidx] != '\\'
2373 || ( line[nwidx + 1] != '\r'
2374 && line[nwidx + 1] != '\n'))
2375 : true))
2376 {
2377 prev_nonempty_line = nwidx; // last non-whitespace char in the previous line
2378 }
2379 }
2380
2381 for (size_t nxt_idx = cmt_idx;
2382 ( nxt_idx < pc->len()
2383 && pc->str[nxt_idx] != '\r'
2384 && pc->str[nxt_idx] != '\n');
2385 nxt_idx++)
2386 {
2387 if ( next_nonempty_line < 0
2388 && !unc_isspace(pc->str[nxt_idx])
2389 && pc->str[nxt_idx] != '*'
2390 && (pc->flags.test(PCF_IN_PREPROC)
2391 ? ( pc->str[nxt_idx] != '\\'
2392 || ( pc->str[nxt_idx + 1] != '\r'
2393 && pc->str[nxt_idx + 1] != '\n'))
2394 : true))
2395 {
2396 next_nonempty_line = nxt_idx; // first non-whitespace char in the next line
2397 }
2398 }
2399
2400 if ( options::cmt_reflow_indent_to_paragraph_start()
2401 && next_nonempty_line >= 0
2402 && ( prev_nonempty_line <= 0
2403 || doxygen_javadoc_indent_align))
2404 {
2405 log_rule_B("cmt_reflow_indent_to_paragraph_start");
2406
2407 int cmt_star_indent = 0;
2408
2409 while ( next_nonempty_line > cmt_star_indent
2410 && pc->str[next_nonempty_line - cmt_star_indent - 1] != '*')
2411 {
2412 ++cmt_star_indent;
2413 }
2414 reflow_paragraph_continuation_indent = size_t(cmt_star_indent);
2415 }
2416
2417 /*
2418 * see if we should fold up; usually that'd be a YES, but there are a few
2419 * situations where folding/reflowing by merging lines is frowned upon:
2420 *
2421 * - ASCII art in the comments (most often, these are drawings done in +-\/|.,*)
2422 *
2423 * - Doxygen/JavaDoc/etc. parameters: these often start with \ or @, at least
2424 * something clearly non-alphanumeric (you see where we're going with this?)
2425 *
2426 * - bullet lists that are closely spaced: bullets are always non-alphanumeric
2427 * characters, such as '-' or '+' (or, oh horror, '*' - that's bloody ambiguous
2428 * to parse :-( ... with or without '*' comment start prefix, that's the
2429 * question, then.)
2430 *
2431 * - semi-HTML formatted code, e.g. <pre>...</pre> comment sections (NDoc, etc.)
2432 *
2433 * - New lines which form a new paragraph without there having been added an
2434 * extra empty line between the last sentence and the new one.
2435 * A bit like this, really; so it is opportune to check if the last line ended
2436 * in a terminal (that would be the set '.:;!?') and the new line starts with
2437 * a capital.
2438 * Though new lines starting with comment delimiters, such as '(', should be
2439 * pulled up.
2440 *
2441 * So it bores down to this: the only folding (& reflowing) that's going to happen
2442 * is when the next line starts with an alphanumeric character AND the last
2443 * line didn't end with an non-alphanumeric character, except: ',' AND the next
2444 * line didn't start with a '*' all of a sudden while the previous one didn't
2445 * (the ambiguous '*'-for-bullet case!)
2446 */
2447 if ( prev_nonempty_line >= 0
2448 && next_nonempty_line >= int(cmt_idx))
2449 {
2450 std::wstring prev_line(line.get().cbegin(),
2451 line.get().cend());
2452 std::wstring next_line(pc->str.get().cbegin() + next_nonempty_line,
2453 pc->str.get().cend());
2454
2455 for (auto &&cmt_reflow_regex_map_entry : cmt_reflow_regex_map)
2456 {
2457 auto &&cmt_reflow_regex_pair = cmt_reflow_regex_map_entry.second;
2458 auto &&end_of_prev_line_regex = cmt_reflow_regex_pair.first;
2459 auto &&beg_of_next_line_regex = cmt_reflow_regex_pair.second;
2460 std::wsmatch match[2];
2461
2462 if ( std::regex_search(prev_line, match[0], end_of_prev_line_regex)
2463 && match[0].position(0) + match[0].length(0) == std::wsmatch::difference_type(line.size())
2464 && std::regex_search(next_line, match[1], beg_of_next_line_regex)
2465 && match[1].position(0) == 0)
2466 {
2467 // rewind the line to the last non-alpha:
2468 line.resize(prev_nonempty_line + 1);
2469
2470 // roll the current line forward to the first non-alpha:
2471 cmt_idx = next_nonempty_line;
2472 // override the NL and make it a single whitespace:
2473 ch = ' ';
2474
2475 break;
2476 }
2477 }
2478 }
2479 }
2480
2481 if (ch == '\n')
2482 {
2483 LOG_FMT(LCONTTEXT, "%s(%d):ch is newline\n", __func__, __LINE__);
2484 }
2485 else
2486 {
2487 LOG_FMT(LCONTTEXT, "%s(%d):ch is %d, %c\n", __func__, __LINE__, ch, char(ch));
2488 }
2489 line.append(ch);
2490
2491 // If we just hit an end of line OR we just hit end-of-comment...
2492 if ( ch == '\n'
2493 || cmt_idx == pc->len())
2494 {
2495 if (ch == '\n')
2496 {
2497 LOG_FMT(LCONTTEXT, "%s(%d):ch is newline\n", __func__, __LINE__);
2498 }
2499 else
2500 {
2501 LOG_FMT(LCONTTEXT, "%s(%d):ch is %d, %c\n", __func__, __LINE__, ch, char(ch));
2502 }
2503 line_count++;
2504 LOG_FMT(LCONTTEXT, "%s(%d):line_count is %zu\n", __func__, __LINE__, line_count);
2505
2506 // strip trailing tabs and spaces before the newline
2507 if (ch == '\n')
2508 {
2509 nl_end = true;
2510 line.pop_back();
2511 cmt_trim_whitespace(line, pc->flags.test(PCF_IN_PREPROC));
2512 }
2513
2514 if (line_count == 1)
2515 {
2516 // this is the first line - add unchanged
2517 add_comment_text(line, cmt, false);
2518
2519 if (nl_end)
2520 {
2521 add_char('\n');
2522 }
2523 }
2524 else
2525 {
2526 /*
2527 * This is not the first line, so we need to indent to the
2528 * correct column. Each line is indented 0 or more spaces.
2529 */
2530 // Ensure ccol is not negative
2531 if (static_cast<int>(ccol) >= col_diff)
2532 {
2533 ccol -= col_diff;
2534 }
2535
2536 if (ccol < (cmt_col + 3))
2537 {
2538 ccol = cmt_col + 3;
2539 }
2540
2541 if (line.size() == 0)
2542 {
2543 // Empty line - just a '\n'
2544 log_rule_B("cmt_star_cont");
2545
2546 if (options::cmt_star_cont())
2547 {
2548 // The number of spaces to insert at the start of subsequent comment lines.
2549 log_rule_B("cmt_sp_before_star_cont");
2550 cmt.column = cmt_col + options::cmt_sp_before_star_cont();
2551 cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
2552
2553 if (cmt.xtra_indent > 0)
2554 {
2555 add_char(' ');
2556 }
2557 // multiline comments can have empty lines with some spaces in them for alignment
2558 // while adding * symbol and aligning them we don't want to keep these trailing spaces
2559 unc_text tmp = unc_text(cmt.cont_text);
2560 cmt_trim_whitespace(tmp, false);
2561 add_text(tmp);
2562 }
2563 add_char('\n');
2564 }
2565 else
2566 {
2567 /*
2568 * If this doesn't start with a '*' or '|'.
2569 * '\name' is a common parameter documentation thing.
2570 */
2571 log_rule_B("cmt_indent_multi");
2572
2573 if ( options::cmt_indent_multi()
2574 && line[0] != '*'
2575 && line[0] != '|'
2576 && line[0] != '#'
2577 && ( line[0] != '\\'
2578 || unc_isalpha(line[1]))
2579 && line[0] != '+')
2580 {
2581 // The number of spaces to insert at the start of subsequent comment lines.
2582 log_rule_B("cmt_sp_before_star_cont");
2583 size_t start_col = cmt_col + options::cmt_sp_before_star_cont();
2584
2585 log_rule_B("cmt_star_cont");
2586
2587 if (options::cmt_star_cont())
2588 {
2589 cmt.column = start_col;
2590 cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
2591
2592 if (cmt.xtra_indent > 0)
2593 {
2594 add_char(' ');
2595 }
2596 add_text(cmt.cont_text);
2597 // The number of spaces to insert after the star on subsequent comment lines.
2598 log_rule_B("cmt_sp_after_star_cont");
2599 output_to_column(ccol + options::cmt_sp_after_star_cont(), false);
2600 }
2601 else
2602 {
2603 cmt.column = ccol;
2604 cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
2605 }
2606 }
2607 else
2608 {
2609 // The number of spaces to insert at the start of subsequent comment lines.
2610 log_rule_B("cmt_sp_before_star_cont");
2611 cmt.column = cmt_col + options::cmt_sp_before_star_cont();
2612 cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
2613
2614 if (cmt.xtra_indent > 0)
2615 {
2616 add_char(' ');
2617 }
2618 size_t idx;
2619
2620 // Checks for and updates the lead chars.
2621 // @return 0=not present, >0=number of chars that are part of the lead
2622 idx = cmt_parse_lead(line, (cmt_idx == pc->len()));
2623
2624 if (idx > 0)
2625 {
2626 // >0=number of chars that are part of the lead
2627 cmt.cont_text.set(line, 0, idx);
2628 LOG_CONTTEXT();
2629
2630 if ( (line.size() >= 2)
2631 && (line[0] == '*')
2632 && unc_isalnum(line[1]))
2633 {
2634 line.insert(1, ' ');
2635 }
2636 }
2637 else
2638 {
2639 // bug #653
2640 if (language_is_set(LANG_D))
2641 {
2642 // 0=no lead char present
2643 add_text(cmt.cont_text);
2644 }
2645 }
2646 }
2647 size_t continuation_indent = 0;
2648
2649 if (doxygen_javadoc_indent_align)
2650 {
2651 continuation_indent = doxygen_javadoc_continuation_indent;
2652 }
2653 else if (reflow_paragraph_continuation_indent > 0)
2654 {
2655 continuation_indent = reflow_paragraph_continuation_indent;
2656 }
2657 add_comment_text(line,
2658 cmt,
2659 false,
2660 continuation_indent);
2661
2662 if (nl_end)
2663 {
2664 add_text("\n");
2665 }
2666 }
2667 }
2668 line.clear();
2669 doxygen_javadoc_indent_align = false;
2670 ccol = 1;
2671 }
2672 }
2673 } // output_comment_multi
2674
2675
kw_fcn_filename(chunk_t * cmt,unc_text & out_txt)2676 static bool kw_fcn_filename(chunk_t *cmt, unc_text &out_txt)
2677 {
2678 UNUSED(cmt);
2679 out_txt.append(path_basename(cpd.filename.c_str()));
2680 return(true);
2681 }
2682
2683
kw_fcn_class(chunk_t * cmt,unc_text & out_txt)2684 static bool kw_fcn_class(chunk_t *cmt, unc_text &out_txt)
2685 {
2686 chunk_t *tmp = nullptr;
2687
2688 if (language_is_set(LANG_CPP | LANG_OC))
2689 {
2690 chunk_t *fcn = get_next_function(cmt);
2691
2692 if (chunk_is_token(fcn, CT_OC_MSG_DECL))
2693 {
2694 tmp = get_prev_oc_class(cmt);
2695 }
2696 else
2697 {
2698 tmp = get_next_class(cmt);
2699 }
2700 }
2701 else if (language_is_set(LANG_OC))
2702 {
2703 tmp = get_prev_oc_class(cmt);
2704 }
2705
2706 if (tmp == nullptr)
2707 {
2708 tmp = get_next_class(cmt);
2709 }
2710
2711 if (tmp != nullptr)
2712 {
2713 out_txt.append(tmp->str);
2714
2715 while ((tmp = chunk_get_next(tmp)) != nullptr)
2716 {
2717 if (tmp->type != CT_DC_MEMBER)
2718 {
2719 break;
2720 }
2721 tmp = chunk_get_next(tmp);
2722
2723 if (tmp != nullptr)
2724 {
2725 out_txt.append("::");
2726 out_txt.append(tmp->str);
2727 }
2728 }
2729 return(true);
2730 }
2731 return(false);
2732 } // kw_fcn_class
2733
2734
kw_fcn_message(chunk_t * cmt,unc_text & out_txt)2735 static bool kw_fcn_message(chunk_t *cmt, unc_text &out_txt)
2736 {
2737 chunk_t *fcn = get_next_function(cmt);
2738
2739 if (!fcn)
2740 {
2741 return(false);
2742 }
2743 out_txt.append(fcn->str);
2744
2745 chunk_t *tmp = chunk_get_next_ncnnl(fcn);
2746 chunk_t *word = nullptr;
2747
2748 while (tmp != nullptr)
2749 {
2750 if ( chunk_is_token(tmp, CT_BRACE_OPEN)
2751 || chunk_is_token(tmp, CT_SEMICOLON))
2752 {
2753 break;
2754 }
2755
2756 if (chunk_is_token(tmp, CT_OC_COLON))
2757 {
2758 if (word != nullptr)
2759 {
2760 out_txt.append(word->str);
2761 word = nullptr;
2762 }
2763 out_txt.append(":");
2764 }
2765
2766 if (chunk_is_token(tmp, CT_WORD))
2767 {
2768 word = tmp;
2769 }
2770 tmp = chunk_get_next_ncnnl(tmp);
2771 }
2772 return(true);
2773 } // kw_fcn_message
2774
2775
kw_fcn_category(chunk_t * cmt,unc_text & out_txt)2776 static bool kw_fcn_category(chunk_t *cmt, unc_text &out_txt)
2777 {
2778 chunk_t *category = get_prev_category(cmt);
2779
2780 if (category)
2781 {
2782 out_txt.append('(');
2783 out_txt.append(category->str);
2784 out_txt.append(')');
2785 }
2786 return(true);
2787 } // kw_fcn_category
2788
2789
kw_fcn_scope(chunk_t * cmt,unc_text & out_txt)2790 static bool kw_fcn_scope(chunk_t *cmt, unc_text &out_txt)
2791 {
2792 chunk_t *scope = get_next_scope(cmt);
2793
2794 if (scope)
2795 {
2796 out_txt.append(scope->str);
2797 return(true);
2798 }
2799 return(false);
2800 } // kw_fcn_scope
2801
2802
kw_fcn_function(chunk_t * cmt,unc_text & out_txt)2803 static bool kw_fcn_function(chunk_t *cmt, unc_text &out_txt)
2804 {
2805 chunk_t *fcn = get_next_function(cmt);
2806
2807 if (fcn)
2808 {
2809 if (get_chunk_parent_type(fcn) == CT_OPERATOR)
2810 {
2811 out_txt.append("operator ");
2812 }
2813
2814 if ( fcn->prev != nullptr
2815 && fcn->prev->type == CT_DESTRUCTOR)
2816 {
2817 out_txt.append('~');
2818 }
2819 out_txt.append(fcn->str);
2820 return(true);
2821 }
2822 return(false);
2823 }
2824
2825
kw_fcn_javaparam(chunk_t * cmt,unc_text & out_txt)2826 static bool kw_fcn_javaparam(chunk_t *cmt, unc_text &out_txt)
2827 {
2828 chunk_t *fcn = get_next_function(cmt);
2829
2830 if (!fcn)
2831 {
2832 return(false);
2833 }
2834 chunk_t *fpo;
2835 chunk_t *fpc;
2836 bool has_param = true;
2837 bool need_nl = false;
2838
2839 if (chunk_is_token(fcn, CT_OC_MSG_DECL))
2840 {
2841 chunk_t *tmp = chunk_get_next_ncnnl(fcn);
2842 has_param = false;
2843
2844 while (tmp != nullptr)
2845 {
2846 if ( chunk_is_token(tmp, CT_BRACE_OPEN)
2847 || chunk_is_token(tmp, CT_SEMICOLON))
2848 {
2849 break;
2850 }
2851
2852 if (has_param)
2853 {
2854 if (need_nl)
2855 {
2856 out_txt.append("\n");
2857 }
2858 need_nl = true;
2859 out_txt.append("@param");
2860 out_txt.append(" ");
2861 out_txt.append(tmp->str);
2862 out_txt.append(" TODO");
2863 }
2864 has_param = false;
2865
2866 if (chunk_is_token(tmp, CT_PAREN_CLOSE))
2867 {
2868 has_param = true;
2869 }
2870 tmp = chunk_get_next_ncnnl(tmp);
2871 }
2872 fpo = fpc = nullptr;
2873 }
2874 else
2875 {
2876 fpo = chunk_get_next_type(fcn, CT_FPAREN_OPEN, fcn->level);
2877
2878 if (fpo == nullptr)
2879 {
2880 return(true);
2881 }
2882 fpc = chunk_get_next_type(fpo, CT_FPAREN_CLOSE, fcn->level);
2883
2884 if (fpc == nullptr)
2885 {
2886 return(true);
2887 }
2888 }
2889 chunk_t *tmp;
2890
2891 // Check for 'foo()' and 'foo(void)'
2892 if (chunk_get_next_ncnnl(fpo) == fpc)
2893 {
2894 has_param = false;
2895 }
2896 else
2897 {
2898 tmp = chunk_get_next_ncnnl(fpo);
2899
2900 if ( (tmp == chunk_get_prev_ncnnl(fpc))
2901 && chunk_is_str(tmp, "void", 4))
2902 {
2903 has_param = false;
2904 }
2905 }
2906
2907 if (has_param)
2908 {
2909 chunk_t *prev = nullptr;
2910 tmp = fpo;
2911
2912 while ((tmp = chunk_get_next(tmp)) != nullptr)
2913 {
2914 if ( chunk_is_token(tmp, CT_COMMA)
2915 || tmp == fpc)
2916 {
2917 if (need_nl)
2918 {
2919 out_txt.append("\n");
2920 }
2921 need_nl = true;
2922 out_txt.append("@param");
2923
2924 if (prev != nullptr)
2925 {
2926 out_txt.append(" ");
2927 out_txt.append(prev->str);
2928 out_txt.append(" TODO");
2929 }
2930 prev = nullptr;
2931
2932 if (tmp == fpc)
2933 {
2934 break;
2935 }
2936 }
2937
2938 if (chunk_is_token(tmp, CT_WORD))
2939 {
2940 prev = tmp;
2941 }
2942 }
2943 }
2944 // Do the return stuff
2945 tmp = chunk_get_prev_ncnnl(fcn);
2946
2947 // For Objective-C we need to go to the previous chunk
2948 if ( tmp != nullptr
2949 && get_chunk_parent_type(tmp) == CT_OC_MSG_DECL
2950 && chunk_is_token(tmp, CT_PAREN_CLOSE))
2951 {
2952 tmp = chunk_get_prev_ncnnl(tmp);
2953 }
2954
2955 if ( tmp != nullptr
2956 && !chunk_is_str(tmp, "void", 4))
2957 {
2958 if (need_nl)
2959 {
2960 out_txt.append("\n");
2961 }
2962 out_txt.append("@return TODO");
2963 }
2964 return(true);
2965 } // kw_fcn_javaparam
2966
2967
kw_fcn_fclass(chunk_t * cmt,unc_text & out_txt)2968 static bool kw_fcn_fclass(chunk_t *cmt, unc_text &out_txt)
2969 {
2970 chunk_t *fcn = get_next_function(cmt);
2971
2972 if (!fcn)
2973 {
2974 return(false);
2975 }
2976
2977 if (fcn->flags.test(PCF_IN_CLASS))
2978 {
2979 // if inside a class, we need to find to the class name
2980 chunk_t *tmp = chunk_get_prev_type(fcn, CT_BRACE_OPEN, fcn->level - 1);
2981 tmp = chunk_get_prev_type(tmp, CT_CLASS, tmp->level);
2982 tmp = chunk_get_next_ncnnl(tmp);
2983
2984 while (chunk_is_token(chunk_get_next_ncnnl(tmp), CT_DC_MEMBER))
2985 {
2986 tmp = chunk_get_next_ncnnl(tmp);
2987 tmp = chunk_get_next_ncnnl(tmp);
2988 }
2989
2990 if (tmp != nullptr)
2991 {
2992 out_txt.append(tmp->str);
2993 return(true);
2994 }
2995 }
2996 else
2997 {
2998 // if outside a class, we expect "CLASS::METHOD(...)"
2999 chunk_t *tmp = chunk_get_prev_ncnnl(fcn);
3000
3001 if (chunk_is_token(tmp, CT_OPERATOR))
3002 {
3003 tmp = chunk_get_prev_ncnnl(tmp);
3004 }
3005
3006 if ( tmp != nullptr
3007 && ( chunk_is_token(tmp, CT_DC_MEMBER)
3008 || chunk_is_token(tmp, CT_MEMBER)))
3009 {
3010 tmp = chunk_get_prev_ncnnl(tmp);
3011 out_txt.append(tmp->str);
3012 return(true);
3013 }
3014 }
3015 return(false);
3016 } // kw_fcn_fclass
3017
3018
kw_fcn_year(chunk_t * cmt,unc_text & out_txt)3019 static bool kw_fcn_year(chunk_t *cmt, unc_text &out_txt)
3020 {
3021 UNUSED(cmt);
3022 time_t now = time(nullptr);
3023
3024 out_txt.append(std::to_string(1900 + localtime(&now)->tm_year));
3025 return(true);
3026 }
3027
3028
3029 struct kw_subst_t
3030 {
3031 const char *tag;
3032 bool (*func)(chunk_t *cmt, unc_text &out_txt);
3033 };
3034
3035
3036 static const kw_subst_t kw_subst_table[] =
3037 {
3038 { "$(filename)", kw_fcn_filename },
3039 { "$(class)", kw_fcn_class },
3040 { "$(message)", kw_fcn_message },
3041 { "$(category)", kw_fcn_category },
3042 { "$(scope)", kw_fcn_scope },
3043 { "$(function)", kw_fcn_function },
3044 { "$(javaparam)", kw_fcn_javaparam },
3045 { "$(fclass)", kw_fcn_fclass },
3046 { "$(year)", kw_fcn_year },
3047 };
3048
3049
do_kw_subst(chunk_t * pc)3050 static void do_kw_subst(chunk_t *pc)
3051 {
3052 for (const auto &kw : kw_subst_table)
3053 {
3054 int idx = pc->str.find(kw.tag);
3055
3056 if (idx < 0)
3057 {
3058 continue;
3059 }
3060 unc_text tmp_txt;
3061 tmp_txt.clear();
3062
3063 if (kw.func(pc, tmp_txt))
3064 {
3065 // if the replacement contains '\n' we need to fix the lead
3066 if (tmp_txt.find("\n") >= 0)
3067 {
3068 size_t nl_idx = pc->str.rfind("\n", idx);
3069
3070 if (nl_idx > 0)
3071 {
3072 // idx and nl_idx are both positive
3073 unc_text nl_txt;
3074 nl_txt.append("\n");
3075 nl_idx++;
3076
3077 while ( (nl_idx < static_cast<size_t>(idx))
3078 && !unc_isalnum(pc->str[nl_idx]))
3079 {
3080 nl_txt.append(pc->str[nl_idx++]);
3081 }
3082 tmp_txt.replace("\n", nl_txt);
3083 }
3084 }
3085 pc->str.replace(kw.tag, tmp_txt);
3086 }
3087 }
3088 } // do_kw_subst
3089
3090
output_comment_multi_simple(chunk_t * pc)3091 static void output_comment_multi_simple(chunk_t *pc)
3092 {
3093 if (pc == nullptr)
3094 {
3095 return;
3096 }
3097 cmt_reflow cmt;
3098
3099 LOG_FMT(LCONTTEXT, "%s(%d): text() is '%s', type is %s, orig_col is %zu, column is %zu\n",
3100 __func__, __LINE__, pc->text(), get_token_name(pc->type), pc->orig_col, pc->column);
3101
3102 output_cmt_start(cmt, pc);
3103
3104 // The multiline comment is saved inside one chunk. If the comment is
3105 // shifted all lines of the comment need to be shifted by the same amount.
3106 // Save the difference of initial and current position to apply it on every
3107 // line_column
3108 const int col_diff = [pc]()
3109 {
3110 int diff = 0;
3111
3112 if (chunk_is_newline(chunk_get_prev(pc)))
3113 {
3114 // The comment should be indented correctly
3115 diff = pc->column - pc->orig_col;
3116 }
3117 return(diff);
3118 }();
3119
3120 /**
3121 * check for enable/disable processing comment strings that may
3122 * both be embedded within the same multi-line comment
3123 */
3124 auto disable_processing_cmt_idx = find_disable_processing_comment_marker(pc->str);
3125 auto enable_processing_cmt_idx = find_enable_processing_comment_marker(pc->str);
3126
3127 unc_text line;
3128 size_t line_count = 0;
3129 size_t line_column = pc->column;
3130 size_t cmt_idx = 0;
3131
3132 while (cmt_idx < pc->len())
3133 {
3134 int ch = pc->str[cmt_idx];
3135 cmt_idx++;
3136
3137 if ( cmt_idx > std::size_t(disable_processing_cmt_idx)
3138 && enable_processing_cmt_idx > disable_processing_cmt_idx)
3139 {
3140 auto length = enable_processing_cmt_idx - disable_processing_cmt_idx;
3141 unc_text verbatim_text(pc->str,
3142 disable_processing_cmt_idx,
3143 length);
3144
3145 add_text(verbatim_text);
3146
3147 cmt_idx = enable_processing_cmt_idx;
3148
3149 /**
3150 * check for additional enable/disable processing comment strings that may
3151 * both be embedded within the same multi-line comment
3152 */
3153 disable_processing_cmt_idx = find_disable_processing_comment_marker(pc->str,
3154 enable_processing_cmt_idx);
3155 enable_processing_cmt_idx = find_enable_processing_comment_marker(pc->str,
3156 enable_processing_cmt_idx);
3157
3158 line.clear();
3159
3160 continue;
3161 }
3162 // 1: step through leading tabs and spaces to find the start column
3163 log_rule_B("cmt_convert_tab_to_spaces");
3164
3165 if ( line.size() == 0
3166 && ( line_column < cmt.base_col
3167 || options::cmt_convert_tab_to_spaces()))
3168 {
3169 if (ch == ' ')
3170 {
3171 line_column++;
3172 continue;
3173 }
3174 else if (ch == '\t')
3175 {
3176 log_rule_B("input_tab_size");
3177 line_column = calc_next_tab_column(line_column, options::input_tab_size());
3178 continue;
3179 }
3180 else
3181 {
3182 LOG_FMT(LCONTTEXT, "%s(%d):ch is %d, %c\n", __func__, __LINE__, ch, char(ch));
3183 }
3184 }
3185
3186 // 2: add chars to line, handle the CRLF and CR endings (convert both to LF)
3187 if (ch == '\r')
3188 {
3189 ch = '\n';
3190
3191 if ( (cmt_idx < pc->len())
3192 && (pc->str[cmt_idx] == '\n'))
3193 {
3194 cmt_idx++;
3195 }
3196 }
3197 LOG_FMT(LCONTTEXT, "%s(%d):Line is %s\n", __func__, __LINE__, line.c_str());
3198 line.append(ch);
3199 LOG_FMT(LCONTTEXT, "%s(%d):Line is %s\n", __func__, __LINE__, line.c_str());
3200
3201 // If we just hit an end of line OR we just hit end-of-comment...
3202 if ( ch == '\n'
3203 || cmt_idx == pc->len())
3204 {
3205 line_count++;
3206 LOG_FMT(LCONTTEXT, "%s(%d):line_count is %zu\n", __func__, __LINE__, line_count);
3207
3208 // strip trailing tabs and spaces before the newline
3209 if (ch == '\n')
3210 {
3211 line.pop_back();
3212
3213 // Say we aren't in a preproc to prevent changing any bs-nl
3214 cmt_trim_whitespace(line, false);
3215
3216 line.append('\n');
3217 }
3218
3219 if (line.size() > 0)
3220 {
3221 // unless line contains only a single newline char, indent if the
3222 // line consists of either:
3223 if ( line.size() > 1 // more than a single newline char or
3224 || ch != '\n') // (end-of-comment) a single non newline char
3225 {
3226 if (line_count > 1)
3227 {
3228 // apply comment column shift without underflowing
3229 line_column = ( col_diff < 0
3230 && (cast_abs(line_column, col_diff) > line_column))
3231 ? 0 : line_column + col_diff;
3232 }
3233 cmt.column = line_column;
3234 cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
3235 }
3236 add_text(line);
3237
3238 line.clear();
3239 }
3240 line_column = 1;
3241 }
3242 }
3243 } // output_comment_multi_simple
3244
3245
generate_if_conditional_as_text(unc_text & dst,chunk_t * ifdef)3246 static void generate_if_conditional_as_text(unc_text &dst, chunk_t *ifdef)
3247 {
3248 int column = -1;
3249
3250 dst.clear();
3251
3252 for (chunk_t *pc = ifdef; pc != nullptr; pc = chunk_get_next(pc))
3253 {
3254 if (column == -1)
3255 {
3256 column = pc->column;
3257 }
3258
3259 if ( chunk_is_token(pc, CT_NEWLINE)
3260 || chunk_is_token(pc, CT_COMMENT_MULTI)
3261 || chunk_is_token(pc, CT_COMMENT_CPP))
3262 {
3263 break;
3264 }
3265 else if (chunk_is_token(pc, CT_NL_CONT))
3266 {
3267 dst += ' ';
3268 column = -1;
3269 }
3270 else if ( chunk_is_token(pc, CT_COMMENT)
3271 || chunk_is_token(pc, CT_COMMENT_EMBED))
3272 {
3273 }
3274 else // if (chunk_is_token(pc, CT_JUNK)) || else
3275 {
3276 for (int spacing = pc->column - column; spacing > 0; spacing--)
3277 {
3278 dst += ' ';
3279 column++;
3280 }
3281
3282 dst.append(pc->str);
3283 column += pc->len();
3284 }
3285 }
3286 } // generate_if_conditional_as_text
3287
3288
add_long_preprocessor_conditional_block_comment(void)3289 void add_long_preprocessor_conditional_block_comment(void)
3290 {
3291 chunk_t *pp_start = nullptr;
3292 chunk_t *pp_end = nullptr;
3293
3294 for (chunk_t *pc = chunk_get_head(); pc; pc = chunk_get_next_ncnnl(pc))
3295 {
3296 // just track the preproc level:
3297 if (chunk_is_token(pc, CT_PREPROC))
3298 {
3299 pp_end = pp_start = pc;
3300 }
3301
3302 if ( pc->type != CT_PP_IF
3303 || !pp_start)
3304 {
3305 continue;
3306 }
3307 #if 0
3308 if (pc->flags.test(PCF_IN_PREPROC))
3309 {
3310 continue;
3311 }
3312 #endif
3313
3314 chunk_t *br_close;
3315 chunk_t *br_open = pc;
3316 size_t nl_count = 0;
3317
3318 chunk_t *tmp = pc;
3319
3320 while ((tmp = chunk_get_next(tmp)) != nullptr)
3321 {
3322 // just track the preproc level:
3323 if (chunk_is_token(tmp, CT_PREPROC))
3324 {
3325 pp_end = tmp;
3326 }
3327
3328 if (chunk_is_newline(tmp))
3329 {
3330 nl_count += tmp->nl_count;
3331 }
3332 else if ( pp_end->pp_level == pp_start->pp_level
3333 && ( chunk_is_token(tmp, CT_PP_ENDIF)
3334 || ((chunk_is_token(br_open, CT_PP_IF)) ? (chunk_is_token(tmp, CT_PP_ELSE)) : 0)))
3335 {
3336 br_close = tmp;
3337
3338 LOG_FMT(LPPIF, "found #if / %s section on lines %zu and %zu, nl_count=%zu\n",
3339 (chunk_is_token(tmp, CT_PP_ENDIF) ? "#endif" : "#else"),
3340 br_open->orig_line, br_close->orig_line, nl_count);
3341
3342 // Found the matching #else or #endif - make sure a newline is next
3343 tmp = chunk_get_next(tmp);
3344
3345 LOG_FMT(LPPIF, "next item type %d (is %s)\n",
3346 (tmp ? tmp->type : -1), (tmp ? chunk_is_newline(tmp) ? "newline"
3347 : chunk_is_comment(tmp) ? "comment" : "other" : "---"));
3348
3349 if ( tmp == nullptr
3350 || chunk_is_token(tmp, CT_NEWLINE)) // chunk_is_newline(tmp))
3351 {
3352 size_t nl_min;
3353
3354 if (chunk_is_token(br_close, CT_PP_ENDIF))
3355 {
3356 log_rule_B("mod_add_long_ifdef_endif_comment");
3357 nl_min = options::mod_add_long_ifdef_endif_comment();
3358 }
3359 else
3360 {
3361 log_rule_B("mod_add_long_ifdef_else_comment");
3362 nl_min = options::mod_add_long_ifdef_else_comment();
3363 }
3364 const char *txt = !tmp ? "EOF" : ((chunk_is_token(tmp, CT_PP_ENDIF)) ? "#endif" : "#else");
3365 LOG_FMT(LPPIF, "#if / %s section candidate for augmenting when over NL threshold %zu != 0 (nl_count=%zu)\n",
3366 txt, nl_min, nl_count);
3367
3368 if ( nl_min > 0
3369 && nl_count > nl_min) // nl_count is 1 too large at all times as #if line was counted too
3370 {
3371 // determine the added comment style
3372 c_token_t style = (language_is_set(LANG_CPP)) ?
3373 CT_COMMENT_CPP : CT_COMMENT;
3374
3375 unc_text str;
3376 generate_if_conditional_as_text(str, br_open);
3377
3378 LOG_FMT(LPPIF, "#if / %s section over threshold %zu (nl_count=%zu) --> insert comment after the %s: %s\n",
3379 txt, nl_min, nl_count, txt, str.c_str());
3380
3381 // Add a comment after the close brace
3382 insert_comment_after(br_close, style, str);
3383 }
3384 }
3385
3386 // checks both the #else and #endif for a given level, only then look further in the main loop
3387 if (chunk_is_token(br_close, CT_PP_ENDIF))
3388 {
3389 break;
3390 }
3391 }
3392 }
3393 }
3394 } // add_long_preprocessor_conditional_block_comment
3395