1 /**
2  * @file output.cpp
3  * Does all the output & comment formatting.
4  *
5  * @author  Ben Gardner
6  * @author  Guy Maurel October 2015, 2021
7  * @license GPL v2+
8  */
9 
10 #include "output.h"
11 
12 #include "align_tab_column.h"
13 #include "braces.h"
14 #include "indent.h"
15 #include "prototypes.h"
16 #include "tokenize.h"
17 #include "unc_ctype.h"
18 #include "unicode.h"
19 
20 #include <ctime>
21 #include <map>
22 #include <regex>
23 #include <set>
24 
25 
26 constexpr static auto LCURRENT = LOUTPUT;
27 
28 using namespace uncrustify;
29 
30 
31 struct cmt_reflow
32 {
33    chunk_t  *pc         = nullptr;
34    size_t   column      = 0;   //! Column of the comment start
35    size_t   brace_col   = 0;   //! Brace column (for indenting with tabs)
36    size_t   base_col    = 0;   //! Base column (for indenting with tabs)
37    size_t   word_count  = 0;   //! number of words on this line
38    size_t   xtra_indent = 0;   //! extra indent of non-first lines (0 or 1)
39    unc_text cont_text;         //! fixed text to output at the start of a line (0 to 3 chars)
40    bool     reflow = false;    //! reflow the current line
41 };
42 
43 
44 /**
45  * A multiline comment
46  * The only trick here is that we have to trim out whitespace characters
47  * to get the comment to line up.
48  */
49 static void output_comment_multi(chunk_t *pc);
50 
51 
52 static bool kw_fcn_filename(chunk_t *cmt, unc_text &out_txt);
53 
54 
55 static bool kw_fcn_class(chunk_t *cmt, unc_text &out_txt);
56 
57 
58 static bool kw_fcn_message(chunk_t *cmt, unc_text &out_txt);
59 
60 
61 static bool kw_fcn_category(chunk_t *cmt, unc_text &out_txt);
62 
63 
64 static bool kw_fcn_scope(chunk_t *cmt, unc_text &out_txt);
65 
66 
67 static bool kw_fcn_function(chunk_t *cmt, unc_text &out_txt);
68 
69 
70 /**
71  * Adds the javadoc-style @param and @return stuff, based on the params and
72  * return value for pc.
73  * If the arg list is '()' or '(void)', then no @params are added.
74  * Likewise, if the return value is 'void', then no @return is added.
75  */
76 static bool kw_fcn_javaparam(chunk_t *cmt, unc_text &out_txt);
77 
78 
79 static bool kw_fcn_fclass(chunk_t *cmt, unc_text &out_txt);
80 
81 
82 static bool kw_fcn_year(chunk_t *cmt, unc_text &out_txt);
83 
84 
85 /**
86  * Output a multiline comment without any reformatting other than shifting
87  * it left or right to get the column right.
88  *
89  * Trims trailing whitespaces.
90  */
91 static void output_comment_multi_simple(chunk_t *pc);
92 
93 
94 /**
95  * This renders the #if condition to a string buffer.
96  *
97  * @param[out] dst    unc_text buffer to be filled
98  * @param[in]  ifdef  if conditional as chunk list
99  */
100 static void generate_if_conditional_as_text(unc_text &dst, chunk_t *ifdef);
101 
102 
103 /**
104  * Do keyword substitution on a comment.
105  * NOTE: it is assumed that a comment will contain at most one of each type
106  * of keyword.
107  */
108 static void do_kw_subst(chunk_t *pc);
109 
110 
111 //! All output text is sent here, one char at a time.
112 static void add_char(UINT32 ch, bool is_literal = false);
113 
114 
115 static void add_text(const char *ascii_text);
116 
117 
118 static void add_text(const unc_text &text, bool is_ignored, bool is_literal);
119 
120 
121 /**
122  * Count the number of characters to the end of the next chunk of text.
123  * If it exceeds the limit, return true.
124  */
125 static bool next_word_exceeds_limit(const unc_text &text, size_t idx);
126 
127 
128 /**
129  * Output a comment to the column using indent_with_tabs and
130  * indent_cmt_with_tabs as the rules.
131  * base_col is the indent of the first line of the comment.
132  * On the first line, column == base_col.
133  * On subsequent lines, column >= base_col.
134  *
135  * @param brace_col  the brace-level indent of the comment
136  * @param base_col   the indent of the start of the comment (multiline)
137  * @param column     the column that we should end up in
138  */
139 static void cmt_output_indent(size_t brace_col, size_t base_col, size_t column);
140 
141 
142 /**
143  * Checks for and updates the lead chars.
144  *
145  * @param line the comment line
146  *
147  * @return 0: not present, >0: number of chars that are part of the lead
148  */
149 static size_t cmt_parse_lead(const unc_text &line, bool is_last);
150 
151 
152 /**
153  * Scans a multiline comment to determine the following:
154  *  - the extra indent of the non-first line (0 or 1)
155  *  - the continuation text ('' or '* ')
156  *
157  * The decision is based on:
158  *  - cmt_indent_multi
159  *  - cmt_star_cont
160  *  - cmt_multi_first_len_minimum
161  *  - the first line length
162  *  - the second line leader length
163  *  - the last line length (without leading space/tab)
164  *
165  * If the first and last line are the same length and don't contain any alnum
166  * chars and (the first line len > 2 or the second leader is the same as the
167  * first line length), then the indent is 0.
168  *
169  * If the leader on the second line is 1 wide or missing, then the indent is 1.
170  *
171  * Otherwise, the indent is 0.
172  *
173  * @param str        The comment string
174  * @param len        Length of the comment
175  * @param start_col  Starting column
176  *
177  * @return cmt.xtra_indent is set to 0 or 1
178  */
179 static void calculate_comment_body_indent(cmt_reflow &cmt, const unc_text &str);
180 
181 
182 static int next_up(const unc_text &text, size_t idx, const unc_text &tag);
183 
184 
185 /**
186  * Outputs the C comment at pc.
187  * C comment combining is done here
188  *
189  * @return the last chunk output'd
190  */
191 static chunk_t *output_comment_c(chunk_t *pc);
192 
193 
194 /**
195  * Outputs the CPP comment at pc.
196  * CPP comment combining is done here
197  *
198  * @return the last chunk output'd
199  */
200 static chunk_t *output_comment_cpp(chunk_t *pc);
201 
202 
203 static void cmt_trim_whitespace(unc_text &line, bool in_preproc);
204 
205 
206 /**
207  * Outputs a comment. The initial opening '//' may be included in the text.
208  * Subsequent openings (if combining comments), should not be included.
209  * The closing (for C/D comments) should not be included.
210  *
211  * TODO:
212  * If reflowing text, the comment should be added one word (or line) at a time.
213  * A newline should only be sent if a blank line is encountered or if the next
214  * line is indented beyond the current line (optional?).
215  * If the last char on a line is a ':' or '.', then the next line won't be
216  * combined.
217  */
218 static void add_comment_text(const unc_text &text, cmt_reflow &cmt, bool esc_close, size_t continuation_indent = 0);
219 
220 
221 static void output_cmt_start(cmt_reflow &cmt, chunk_t *pc);
222 
223 
224 /**
225  * Checks to see if the current comment can be combined with the next comment.
226  * The two can be combined if:
227  *  1. They are the same type
228  *  2. There is exactly one newline between then
229  *  3. They are indented to the same level
230  */
231 static bool can_combine_comment(chunk_t *pc, cmt_reflow &cmt);
232 
233 
234 #define LOG_CONTTEXT() \
235    LOG_FMT(LCONTTEXT, "%s(%d): set cont_text to '%s'\n", __func__, __LINE__, cmt.cont_text.c_str())
236 
237 
add_spaces()238 static void add_spaces()
239 {
240    while (cpd.spaces > 0)
241    {
242       write_char(' ');
243       cpd.spaces--;
244    }
245 }
246 
247 
add_char(UINT32 ch,bool is_literal)248 static void add_char(UINT32 ch, bool is_literal)
249 {
250    // If we did a '\r' and it isn't followed by a '\n', then output a newline
251    if (  (cpd.last_char == '\r')
252       && (ch != '\n'))
253    {
254       write_string(cpd.newline);
255       cpd.column      = 1;
256       cpd.did_newline = 1;
257       cpd.spaces      = 0;
258    }
259 
260    // convert a newline into the LF/CRLF/CR sequence
261    if (ch == '\n')
262    {
263       add_spaces();
264       write_string(cpd.newline);
265       cpd.column      = 1;
266       cpd.did_newline = 1;
267       cpd.spaces      = 0;
268    }
269    else if (ch == '\r') // do not output the CARRIAGERETURN
270    {
271       // do not output '\r'
272       cpd.column      = 1;
273       cpd.did_newline = 1;
274       cpd.spaces      = 0;
275    }
276    else if (  (ch == '\t')
277            && cpd.output_tab_as_space)
278    {
279       size_t endcol = next_tab_column(cpd.column);
280 
281       while (cpd.column < endcol)
282       {
283          add_char(' ');
284       }
285       return;
286    }
287    else
288    {
289       // explicitly disallow a tab after a space
290       if (  !is_literal
291          && ch == '\t'
292          && cpd.last_char == ' ')
293       {
294          log_rule_B("indent_with_tabs");
295 
296          if (options::indent_with_tabs() == 0)
297          {
298             size_t endcol = next_tab_column(cpd.column);
299 
300             while (cpd.column < endcol)
301             {
302                add_char(' ');
303             }
304             return;
305          }
306       }
307 
308       if (  (ch == ' ')
309          && !cpd.output_trailspace)
310       {
311          cpd.spaces++;
312          cpd.column++;
313       }
314       else
315       {
316          add_spaces();
317          write_char(ch);
318 
319          if (ch == '\t')
320          {
321             cpd.column = next_tab_column(cpd.column);
322          }
323          else
324          {
325             cpd.column++;
326          }
327       }
328    }
329    cpd.last_char = ch;
330 } // add_char
331 
332 
add_text(const char * ascii_text)333 static void add_text(const char *ascii_text)
334 {
335    char ch;
336 
337    while ((ch = *ascii_text) != 0)
338    {
339       ascii_text++;
340       add_char(ch);
341    }
342 }
343 
344 
add_text(const unc_text & text,bool is_ignored=false,bool is_literal=false)345 static void add_text(const unc_text &text, bool is_ignored = false, bool is_literal = false)
346 {
347    for (size_t idx = 0; idx < text.size(); idx++)
348    {
349       int ch = text[idx];
350 
351       if (is_ignored)
352       {
353          write_char(ch);
354       }
355       else
356       {
357          add_char(ch, is_literal);
358       }
359    }
360 }
361 
362 
next_word_exceeds_limit(const unc_text & text,size_t idx)363 static bool next_word_exceeds_limit(const unc_text &text, size_t idx)
364 {
365    LOG_FMT(LCONTTEXT, "%s(%d): idx is %zu\n",
366            __func__, __LINE__, idx);
367    size_t length = 0;
368 
369    // Count any whitespace
370    while (  (idx < text.size())
371          && unc_isspace(text[idx]))
372    {
373       idx++;
374       length++;
375    }
376 
377    // Count non-whitespace
378    while (  (idx < text.size())
379          && !unc_isspace(text[idx]))
380    {
381       idx++;
382       length++;
383    }
384    return((cpd.column + length - 1) > options::cmt_width());
385 }
386 
387 
388 /**
389  * Advance to a specific column
390  * cpd.column is the current column
391  *
392  * @param column  The column to advance to
393  */
output_to_column(size_t column,bool allow_tabs)394 static void output_to_column(size_t column, bool allow_tabs)
395 {
396    cpd.did_newline = 0;
397 
398    if (allow_tabs)
399    {
400       // tab out as far as possible and then use spaces
401       size_t next_column = next_tab_column(cpd.column);
402 
403       while (next_column <= column)
404       {
405          add_text("\t");
406          next_column = next_tab_column(cpd.column);
407       }
408    }
409 
410    // space out the final bit
411    while (cpd.column < column)
412    {
413       add_text(" ");
414    }
415 }
416 
417 
cmt_output_indent(size_t brace_col,size_t base_col,size_t column)418 static void cmt_output_indent(size_t brace_col, size_t base_col, size_t column)
419 {
420    log_rule_B("indent_cmt_with_tabs");
421    log_rule_B("indent_with_tabs");
422    size_t iwt = options::indent_cmt_with_tabs() ? 2 :
423                 (options::indent_with_tabs() ? 1 : 0);
424 
425    size_t tab_col = (iwt == 0) ? 0 : ((iwt == 1) ? brace_col : base_col);
426 
427    // LOG_FMT(LSYS, "%s(brace=%zd base=%zd col=%zd iwt=%zd) tab=%zd cur=%zd\n",
428    //        __func__, brace_col, base_col, column, iwt, tab_col, cpd.column);
429 
430    cpd.did_newline = 0;
431 
432    if (  iwt == 2
433       || (  cpd.column == 1
434          && iwt == 1))
435    {
436       // tab out as far as possible and then use spaces
437       while (next_tab_column(cpd.column) <= tab_col)
438       {
439          add_text("\t");
440       }
441    }
442 
443    // space out the rest
444    while (cpd.column < column)
445    {
446       add_text(" ");
447    }
448 } // cmt_output_indent
449 
450 
output_parsed(FILE * pfile,bool withOptions)451 void output_parsed(FILE *pfile, bool withOptions)
452 {
453    const char *eol_marker = get_eol_marker();
454 
455    if (withOptions)
456    {
457       save_option_file(pfile, false, true);
458    }
459    fprintf(pfile, "# -=====-%s", eol_marker);
460    fprintf(pfile, "# number of loops               = %d\n", cpd.changes);
461    fprintf(pfile, "# -=====-%s", eol_marker);
462    fprintf(pfile, "# language                      = %s\n", language_name_from_flags(cpd.lang_flags));
463    fprintf(pfile, "# -=====-%s", eol_marker);
464    // MAXLENGTHOFTHENAME must be consider at the format line at the file
465    // output.cpp, line 427: fprintf(pfile, "# Line              Tag                Parent...
466    // and              430: ... fprintf(pfile, "%s# %3zu>%19.19s[%19.19s] ...
467    // here                                                xx xx   xx xx
468 #ifdef WIN32
469    fprintf(pfile, "# Line                Tag         Parent_type  Type of the parent         Columns Br/Lvl/pp     Nl  Text");
470 #else // not WIN32
471    fprintf(pfile, "# Line                Tag         Parent_type  Type of the parent         Columns Br/Lvl/pp      Flag   Nl  Text");
472 #endif // ifdef WIN32
473 
474    for (chunk_t *pc = chunk_get_head(); pc != nullptr; pc = chunk_get_next(pc))
475    {
476 #ifdef WIN32
477       fprintf(pfile, "%s# %3d>%19.19s|%19.19s|%19.19s[%3d/%3d/%3d/%3d][%d/%d/%d][%d-%d]",
478               eol_marker, (int)pc->orig_line, get_token_name(pc->type),
479               get_token_name(get_chunk_parent_type(pc)), get_token_name(get_type_of_the_parent(pc)),
480               (int)pc->column, (int)pc->orig_col, (int)pc->orig_col_end, (int)pc->orig_prev_sp,
481               (int)pc->brace_level, (int)pc->level, (int)pc->pp_level, (int)pc->nl_count, pc->after_tab);
482 #else // not WIN32
483       fprintf(pfile, "%s# %3zu>%19.19s|%19.19s|%19.19s[%3zu/%3zu/%3zu/%3d][%zu/%zu/%zu]",
484               eol_marker, pc->orig_line, get_token_name(pc->type),
485               get_token_name(get_chunk_parent_type(pc)), get_token_name(get_type_of_the_parent(pc)),
486               pc->column, pc->orig_col, pc->orig_col_end, pc->orig_prev_sp,
487               pc->brace_level, pc->level, pc->pp_level);
488       fprintf(pfile, "[%11llx]",
489               static_cast<pcf_flags_t::int_t>(pc->flags));
490       fprintf(pfile, "[%zu-%d]",
491               pc->nl_count, pc->after_tab);
492 #endif // ifdef WIN32
493 
494       if (  pc->type != CT_NEWLINE
495          && (pc->len() != 0))
496       {
497          for (size_t cnt = 0; cnt < pc->column; cnt++)
498          {
499             fprintf(pfile, " ");
500          }
501 
502          if (pc->type != CT_NL_CONT)
503          {
504             fprintf(pfile, "%s", pc->text());
505          }
506          else
507          {
508             fprintf(pfile, "\\");
509          }
510       }
511    }
512 
513    fprintf(pfile, "%s# -=====-%s", eol_marker, eol_marker);
514    fflush(pfile);
515 } // output_parsed
516 
517 
output_parsed_csv(FILE * pfile)518 void output_parsed_csv(FILE *pfile)
519 {
520    const char *eol_marker = get_eol_marker();
521 
522    fprintf(pfile, "number of loops,%d,\n", cpd.changes);
523    fprintf(pfile, "language,%s,\n", language_name_from_flags(cpd.lang_flags));
524    fprintf(pfile, "Line,Tag,Parent_type,Type of the parent,Column,Orig Col Strt,"
525            "Orig Col End,Orig Sp Before,Br,Lvl,pp,Flags,Nl Before,Nl After,Text,");
526 
527    for (chunk_t *pc = chunk_get_head(); pc != nullptr; pc = chunk_get_next(pc))
528    {
529       fprintf(pfile, "%s%zu,%s,%s,%s,%zu,%zu,%zu,%d,%zu,%zu,%zu,",
530               eol_marker, pc->orig_line, get_token_name(pc->type),
531               get_token_name(get_chunk_parent_type(pc)), get_token_name(get_type_of_the_parent(pc)),
532               pc->column, pc->orig_col, pc->orig_col_end, pc->orig_prev_sp,
533               pc->brace_level, pc->level, pc->pp_level);
534 
535       auto pcf_flag_str = pcf_flags_str(pcf_flag_e(pc->flags));
536 #ifdef WIN32
537       auto pcf_flag_str_start = pcf_flag_str.find("[") + 1;
538 #else // not WIN32
539       auto pcf_flag_str_start = pcf_flag_str.find(":") + 1;
540 #endif // ifdef WIN32
541       auto pcf_flag_str_end = pcf_flag_str.find("]");
542       auto pcf_names        = pcf_flag_str.substr(pcf_flag_str_start,
543                                                   pcf_flag_str_end - pcf_flag_str_start);
544       fprintf(pfile, "\"%s\",", pcf_names.c_str());
545       fprintf(pfile, "%zu,%d,",
546               pc->nl_count, pc->after_tab);
547 
548       if (  pc->type != CT_NEWLINE
549          && (pc->len() != 0))
550       {
551          fprintf(pfile, "\"");
552 
553          for (size_t cnt = 0; cnt < pc->column; cnt++)
554          {
555             fprintf(pfile, " ");
556          }
557 
558          if (pc->type != CT_NL_CONT)
559          {
560             for (auto *ch = pc->text(); *ch != '\0'; ++ch)
561             {
562                fprintf(pfile, "%c", *ch);
563 
564                if (*ch == '"')
565                {
566                   // need to escape the double-quote for csv-format
567                   fprintf(pfile, "\"");
568                }
569             }
570          }
571          else
572          {
573             fprintf(pfile, "\\");
574          }
575          fprintf(pfile, "\"");
576       }
577    }
578 
579    fflush(pfile);
580 } // output_parsed_csv
581 
582 
output_text(FILE * pfile)583 void output_text(FILE *pfile)
584 {
585    bool tracking = cpd.html_file != nullptr;                 // special for debugging
586 
587    cpd.fout        = pfile;
588    cpd.did_newline = 1;
589    cpd.column      = 1;
590 
591    if (cpd.bom)
592    {
593       write_bom();
594    }
595    chunk_t *pc;
596 
597    if (cpd.frag_cols > 0)
598    {
599       size_t indent = cpd.frag_cols - 1;
600 
601       // loop over the whole chunk list
602       for (pc = chunk_get_head(); pc != nullptr; pc = chunk_get_next(pc))
603       {
604          pc->column        += indent;
605          pc->column_indent += indent;
606       }
607 
608       cpd.frag_cols = 0;
609    }
610 
611    if (tracking)
612    {
613       add_text("<html>\n");
614       add_text("<head>\n");
615       add_text("   <meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"/>\n");
616       add_text("   <title>Uncrustify: where do the Spaces options work</title>\n");
617       add_text("</head>\n");
618       add_text("<body lang=\"en-US\">\n");
619       add_text("<p>\n");
620       add_text("</p>\n");
621       add_text("<pre>\n");
622    }
623    bool write_in_tracking = false;
624 
625    // loop over the whole chunk list
626    for (pc = chunk_get_head(); pc != nullptr; pc = chunk_get_next(pc))
627    {
628       char copy[1000];
629       LOG_FMT(LCONTTEXT, "%s(%d): text() is '%s', type is %s, orig_line is %zu, column is %zu, nl is %zu\n",
630               __func__, __LINE__, pc->elided_text(copy), get_token_name(pc->type), pc->orig_line, pc->column, pc->nl_count);
631       log_rule_B("cmt_convert_tab_to_spaces");
632       cpd.output_tab_as_space = (  options::cmt_convert_tab_to_spaces()
633                                 && chunk_is_comment(pc));
634 
635       if (chunk_is_token(pc, CT_NEWLINE))
636       {
637          for (size_t cnt = 0; cnt < pc->nl_count; cnt++)
638          {
639             if (  cnt > 0
640                && pc->nl_column > 1)
641             {
642                log_rule_B("indent_with_tabs");
643                output_to_column(pc->nl_column, (options::indent_with_tabs() == 2));
644             }
645             add_char('\n');
646          }
647 
648          cpd.did_newline = 1;
649          cpd.column      = 1;
650          LOG_FMT(LOUTIND, " xx\n");
651       }
652       else if (chunk_is_token(pc, CT_NL_CONT))
653       {
654          // FIXME: this really shouldn't be done here!
655          if (!pc->flags.test(PCF_WAS_ALIGNED))
656          {
657             // Add or remove space before a backslash-newline at the end of a line.
658             log_rule_B("sp_before_nl_cont");
659 
660             if (options::sp_before_nl_cont() & IARF_REMOVE)
661             {
662                log_rule_B("sp_before_nl_cont");
663                pc->column = cpd.column + (options::sp_before_nl_cont() == IARF_FORCE);
664             }
665             else
666             {
667                // Try to keep the same relative spacing
668                chunk_t *prev = chunk_get_prev(pc);
669 
670                if (chunk_is_token(prev, CT_PP_IGNORE))
671                {
672                   /*
673                    * Want to completely leave alone PP_IGNORE'd blocks because
674                    * they likely have special column aligned newline
675                    * continuations (common in multiline macros)
676                    */
677                   pc->column = pc->orig_col;
678                }
679                else
680                {
681                   // Try to keep the same relative spacing
682                   while (  prev != nullptr
683                         && prev->orig_col == 0
684                         && prev->nl_count == 0)
685                   {
686                      prev = chunk_get_prev(prev);
687                   }
688 
689                   if (  prev != nullptr
690                      && prev->nl_count == 0)
691                   {
692                      int orig_sp = (pc->orig_col - prev->orig_col_end);
693 
694                      if ((int)(cpd.column + orig_sp) < 0)
695                      {
696 #ifdef WIN32
697                         fprintf(stderr, "FATAL: negative value.\n   pc->orig_col is %d, prev->orig_col_end is %d\n",
698                                 (int)pc->orig_col, (int)prev->orig_col_end);
699 #else // not WIN32
700                         fprintf(stderr, "FATAL: negative value.\n   pc->orig_col is %zu, prev->orig_col_end is %zu\n",
701                                 pc->orig_col, prev->orig_col_end);
702 #endif // ifdef WIN32
703                         log_flush(true);
704                         exit(EX_SOFTWARE);
705                      }
706                      pc->column = cpd.column + orig_sp;
707 
708                      // Add or remove space before a backslash-newline at the end of a line.
709                      log_rule_B("sp_before_nl_cont");
710 
711                      if (  (options::sp_before_nl_cont() != IARF_IGNORE)
712                         && (pc->column < (cpd.column + 1)))
713                      {
714                         pc->column = cpd.column + 1;
715                      }
716                   }
717                }
718             }
719             output_to_column(pc->column, false);
720          }
721          else
722          {
723             log_rule_B("indent_with_tabs");
724             output_to_column(pc->column, (options::indent_with_tabs() == 2));
725          }
726          add_char('\\');
727          add_char('\n');
728          cpd.did_newline = 1;
729          cpd.column      = 1;
730          LOG_FMT(LOUTIND, " \\xx\n");
731       }
732       else if (chunk_is_token(pc, CT_COMMENT_MULTI))
733       {
734          log_rule_B("cmt_indent_multi");
735 
736          if (options::cmt_indent_multi())
737          {
738             output_comment_multi(pc);
739          }
740          else
741          {
742             output_comment_multi_simple(pc);
743          }
744       }
745       else if (chunk_is_token(pc, CT_COMMENT_CPP))
746       {
747          bool tmp = cpd.output_trailspace;
748          /*
749           * keep trailing spaces if they are still present in a chunk;
750           * note that tokenize() already strips spaces in comments,
751           * so if they made it up to here, they are to stay
752           */
753          cpd.output_trailspace = true;
754          pc                    = output_comment_cpp(pc);
755          cpd.output_trailspace = tmp;
756       }
757       else if (chunk_is_token(pc, CT_COMMENT))
758       {
759          pc = output_comment_c(pc);
760       }
761       else if (  chunk_is_token(pc, CT_JUNK)
762               || chunk_is_token(pc, CT_IGNORED))
763       {
764          LOG_FMT(LOUTIND, "%s(%d): orig_line is %zu, orig_col is %zu,\npc->text() >%s<, pc->str.size() is %zu\n",
765                  __func__, __LINE__, pc->orig_line, pc->orig_col, pc->text(), pc->str.size());
766          // do not adjust the column for junk
767          add_text(pc->str, true);
768       }
769       else if (pc->len() == 0)
770       {
771          // don't do anything for non-visible stuff
772          LOG_FMT(LOUTIND, "%s(%d): orig_line is %zu, column is %zu, non-visible stuff: type is %s\n",
773                  __func__, __LINE__, pc->orig_line, pc->column, get_token_name(pc->type));
774       }
775       else
776       {
777          bool allow_tabs;
778          cpd.output_trailspace = (chunk_is_token(pc, CT_STRING_MULTI));
779 
780          // indent to the 'level' first
781          if (cpd.did_newline)
782          {
783             log_rule_B("indent_with_tabs");
784 
785             if (options::indent_with_tabs() == 1)
786             {
787                size_t lvlcol;
788 
789                /*
790                 * FIXME: it would be better to properly set column_indent in
791                 * indent_text(), but this hack for '}' and '#' seems to work.
792                 */
793                if (  chunk_is_token(pc, CT_BRACE_CLOSE)
794                   || chunk_is_token(pc, CT_CASE_COLON)
795                   || chunk_is_token(pc, CT_PREPROC))
796                {
797                   lvlcol = pc->column;
798                }
799                else
800                {
801                   lvlcol = pc->column_indent;
802 
803                   if (lvlcol > pc->column)
804                   {
805                      lvlcol = pc->column;
806                   }
807                }
808 
809                if (lvlcol > 1)
810                {
811                   output_to_column(lvlcol, true);
812                }
813             }
814             log_rule_B("indent_with_tabs");
815             allow_tabs = (options::indent_with_tabs() == 2)
816                          || (  chunk_is_comment(pc)
817                             && options::indent_with_tabs() != 0);
818 
819             LOG_FMT(LOUTIND, "%s(%d): orig_line is %zu, column is %zu, column_indent is %zu, cpd.column is %zu\n",
820                     __func__, __LINE__, pc->orig_line, pc->column, pc->column_indent, cpd.column);
821          }
822          else
823          {
824             /*
825              * Reformatting multi-line comments can screw up the column.
826              * Make sure we don't mess up the spacing on this line.
827              * This has to be done here because comments are not formatted
828              * until the output phase.
829              */
830             if (pc->column < cpd.column)
831             {
832                reindent_line(pc, cpd.column);
833             }
834             // not the first item on a line
835             chunk_t *prev = chunk_get_prev(pc);
836             log_rule_B("align_with_tabs");
837             allow_tabs = (  options::align_with_tabs()
838                          && pc->flags.test(PCF_WAS_ALIGNED)
839                          && ((prev->column + prev->len() + 1) != pc->column));
840 
841             log_rule_B("align_keep_tabs");
842 
843             if (options::align_keep_tabs())
844             {
845                allow_tabs |= pc->after_tab;
846             }
847             LOG_FMT(LOUTIND, "%s(%d): at column %zu(%s)\n",
848                     __func__, __LINE__, pc->column, (allow_tabs ? "true" : "FALSE"));
849          }
850          output_to_column(pc->column, allow_tabs);
851 
852          if (write_in_tracking)
853          {
854             if (chunk_is_token(pc, CT_ANGLE_OPEN))
855             {
856                add_text("&lt;", false, false);
857             }
858             else if (chunk_is_token(pc, CT_ANGLE_CLOSE))
859             {
860                add_text("&gt;", false, false);
861             }
862             else
863             {
864                add_text(pc->str, false, chunk_is_token(pc, CT_STRING));
865             }
866             write_in_tracking = false;
867          }
868          else
869          {
870             add_text(pc->str, false, chunk_is_token(pc, CT_STRING));
871          }
872 
873          if (chunk_is_token(pc, CT_PP_DEFINE))  // Issue #876
874          {
875             // If true, a <TAB> is inserted after #define.
876             log_rule_B("force_tab_after_define");
877 
878             if (options::force_tab_after_define())
879             {
880                add_char('\t');
881             }
882          }
883          cpd.did_newline       = chunk_is_newline(pc);
884          cpd.output_trailspace = false;
885       }
886 
887       if (pc->tracking != nullptr)
888       {
889          LOG_FMT(LGUY, " Tracking info are: \n");
890          LOG_FMT(LGUY, "  number of track(s) %zu\n", pc->tracking->size());
891          add_text("<a title=\"");
892          char tempText[80];
893 
894          for (size_t track = 0; track < pc->tracking->size(); track++)
895          {
896             track_list *A       = pc->tracking;
897             Track_nr   B        = A->at(track);
898             size_t     Bfirst   = B.first;
899             char       *Bsecond = B.second;
900 
901             sprintf(tempText, "%zu", Bfirst);
902             add_text(tempText);
903             add_text(",");
904 
905             if (track == pc->tracking->size() - 1)
906             {
907                sprintf(tempText, "%s", Bsecond);
908                add_text(tempText);
909             }
910             LOG_FMT(LGUY, "  %zu, tracking number is %zu\n", track, Bfirst);
911             LOG_FMT(LGUY, "  %zu, rule            is %s\n", track, Bsecond);
912          }
913 
914          add_text("\"><font color=\"red\">M</font></a>");
915          write_in_tracking = true;
916       }
917    }
918 
919    if (tracking)
920    {
921       add_text("</pre>\n");
922       add_text("</body>\n");
923       add_text("</html>\n");
924    }
925 } // output_text
926 
927 
dump_step(const char * filename,const char * step_description)928 void dump_step(const char *filename, const char *step_description)
929 {
930    static int file_num = 0;
931    char       buffer[256];
932    FILE       *dump_file;
933 
934    if (  filename == nullptr
935       || strlen(filename) == 0)
936    {
937       return;
938    }
939 
940    // On the first call, also save the options in use
941    if (file_num == 0)
942    {
943       snprintf(buffer, 256, "New dump file: %s_%03d.log - Options in use", filename, file_num);
944       log_rule_B(buffer);
945 
946       snprintf(buffer, 256, "%s_%03d.log", filename, file_num);
947       ++file_num;
948 
949       dump_file = fopen(buffer, "wb");
950 
951       if (dump_file != nullptr)
952       {
953          save_option_file(dump_file, false, true);
954          fclose(dump_file);
955       }
956    }
957    snprintf(buffer, 256, "New dump file: %s_%03d.log - %s", filename, file_num, step_description);
958    log_rule_B(buffer);
959 
960    snprintf(buffer, 256, "%s_%03d.log", filename, file_num);
961    ++file_num;
962 
963    dump_file = fopen(buffer, "wb");
964 
965    if (dump_file != nullptr)
966    {
967       fprintf(dump_file, "STEP: %s\n--------------\n", step_description);
968       output_parsed(dump_file, false);
969       fclose(dump_file);
970    }
971 } // dump_step
972 
973 
cmt_parse_lead(const unc_text & line,bool is_last)974 static size_t cmt_parse_lead(const unc_text &line, bool is_last)
975 {
976    size_t len = 0;
977 
978    while (  len < 32
979          && len < line.size()) // TODO what is the meaning of 32?
980    {
981       if (  len > 0
982          && line[len] == '/')
983       {
984          // ignore combined comments
985          size_t tmp = len + 1;
986 
987          while (  tmp < line.size()
988                && unc_isspace(line[tmp]))
989          {
990             tmp++;
991          }
992 
993          if (  tmp < line.size()
994             && line[tmp] == '/')
995          {
996             return(1);
997          }
998          break;
999       }
1000       else if (strchr("*|\\#+", line[len]) == nullptr)
1001       {
1002          break;  // none of the characters '*|\#+' found in line
1003       }
1004       len++;
1005    }
1006 
1007    if (len > 30)  // TODO: what is the meaning of 30?
1008    {
1009       return(1);
1010    }
1011 
1012    if (  len > 0
1013       && (  len >= line.size()
1014          || unc_isspace(line[len])))
1015    {
1016       return(len);
1017    }
1018 
1019    if (  len == 1
1020       && line[0] == '*')
1021    {
1022       return(len);
1023    }
1024 
1025    if (  is_last
1026       && len > 0)
1027    {
1028       return(len);
1029    }
1030    return(0);
1031 } // cmt_parse_lead
1032 
1033 
1034 /**
1035  * Eat whitespace characters starting at the specified index in the forward or reverse direction
1036  * within a single line
1037  * @param  str     the input string containing the comment text
1038  * @param  idx     the starting index
1039  * @param  forward if true, searches in the forward direction;
1040  *                 if false, searches in the reverse direction
1041  * @return         the first index at which a non-whitespace character is encountered, including
1042  *                 a newline character
1043  */
1044 template<typename String>
eat_line_whitespace(const String & str,int idx,bool forward=true)1045 static int eat_line_whitespace(const String &str,
1046                                int idx, bool
1047                                forward = true)
1048 {
1049    auto advance_index = [&](int i)
1050    {
1051       return(forward ? i + 1 : i - 1);
1052    };
1053 
1054    auto index_in_range = [&](int i)
1055    {
1056       // TODO: the following BREAKS with source code formatting; uncrustify seems to
1057       //       think that the following is a template. This will NEED to be fixed!!!
1058       //       For now, reformulate the statement
1059       //return(forward ? i<int(str.size()) : i> = 0);
1060       return(forward ? (i < int(str.size())) : (i >= 0));
1061    };
1062 
1063    while (  index_in_range(idx)
1064          && str[idx] != '\n'
1065          && str[idx] != '\r'
1066          && unc_isspace(str[idx]))
1067    {
1068       idx = advance_index(idx);
1069    }
1070    return(idx);
1071 } // eat_line_whitespace
1072 
1073 
1074 /**
1075  * Returns whether or not a javaparam tag is the leading
1076  * text in a comment line, with only a sequence of whitespace
1077  * and/or '*' characters preceding it
1078  * @param  str the input string containing the comment text
1079  * @param  idx the starting index
1080  * @return     true/false
1081  */
1082 template<typename String>
javaparam_tag_is_start_of_line(const String & str,int idx)1083 static bool javaparam_tag_is_start_of_line(const String &str, int idx)
1084 {
1085    idx = eat_line_whitespace(str,
1086                              str[idx] == '@' ? idx - 1 : idx,
1087                              false);
1088 
1089    while (true)
1090    {
1091       if (  idx < 0
1092          || str[idx] == '\n'
1093          || str[idx] == '\r')
1094       {
1095          return(true);
1096       }
1097 
1098       if (str[idx] == '*')
1099       {
1100          idx = eat_line_whitespace(str,
1101                                    idx - 1,
1102                                    false);
1103       }
1104       else
1105       {
1106          return(false);
1107       }
1108    }
1109 } // javaparam_tag_is_start_of_line
1110 
1111 
1112 /**
1113  * Attempts to match a doxygen/javadoc-style comment tag
1114  * @param  str the input string containing the comment text
1115  * @param  idx the starting index
1116  * @return     the index of the character immediately following the matched tag,
1117  *             or -1 if no match is found
1118  */
match_doxygen_javadoc_tag(const std::wstring & str,size_t idx)1119 static int match_doxygen_javadoc_tag(const std::wstring &str, size_t idx)
1120 {
1121    std::wsmatch match;
1122 
1123    if (str[idx] == L'@')
1124    {
1125       std::wregex criteria(L"(@(?:author|"
1126                            L"deprecated|"
1127                            L"exception|"
1128                            L"param(?:\\s*\\[\\s*(?:in\\s*,\\s*out|in|out)\\s*\\])?|"
1129                            L"return|"
1130                            L"see|"
1131                            L"since|"
1132                            L"throws|"
1133                            L"version)\\b)");
1134 
1135       if (  std::regex_search(str.cbegin() + idx, str.cend(), match, criteria)
1136          && match[1].matched
1137          && match.position(1) == std::wsmatch::difference_type(0))
1138       {
1139          std::set<std::wstring> block_tags =
1140          {
1141             L"@author",
1142             L"@deprecated",
1143             L"@exception",
1144             L"@param",
1145             L"@param[in]",
1146             L"@param[in,out]",
1147             L"@param[out]",
1148             L"@return",
1149             L"@see",
1150             L"@since",
1151             L"@throws",
1152             L"@version"
1153          };
1154          std::wstring           result(match[1]);
1155          result.erase(std::remove_if(result.begin(), result.end(), ::isspace), result.end());
1156          auto                   &&it_block_tag = block_tags.find(result);
1157 
1158          if (  it_block_tag != block_tags.end()
1159             && javaparam_tag_is_start_of_line(str, idx))
1160          {
1161             return(int(idx + match[1].length()));
1162          }
1163       }
1164    }
1165    return(-1);
1166 } // match_javadoc_block_tag
1167 
1168 
calculate_doxygen_javadoc_indent_alignment(const std::wstring & str,size_t & doxygen_javadoc_param_name_indent,size_t & doxygen_javadoc_continuation_indent)1169 static void calculate_doxygen_javadoc_indent_alignment(const std::wstring &str,
1170                                                        size_t             &doxygen_javadoc_param_name_indent,
1171                                                        size_t             &doxygen_javadoc_continuation_indent)
1172 {
1173    log_rule_B("cmt_align_doxygen_javadoc_tags");
1174 
1175    doxygen_javadoc_continuation_indent = 0;
1176    doxygen_javadoc_param_name_indent   = 0;
1177 
1178    if (!options::cmt_align_doxygen_javadoc_tags())
1179    {
1180       return;
1181    }
1182 
1183    for (size_t idx = 0; idx < str.size(); ++idx)
1184    {
1185       int start_idx = idx;
1186       int end_idx   = match_doxygen_javadoc_tag(str, start_idx);
1187 
1188       if (end_idx > start_idx)
1189       {
1190          size_t block_tag_width = 1 + std::count_if(str.begin() + start_idx,
1191                                                     str.begin() + end_idx,
1192                                                     [](wchar_t ch) {
1193             return(!unc_isspace(ch));
1194          });
1195 
1196          if (block_tag_width > doxygen_javadoc_param_name_indent)
1197          {
1198             doxygen_javadoc_param_name_indent = block_tag_width;
1199          }
1200          idx = eat_line_whitespace(str, end_idx);
1201 
1202          size_t param_name_width = 0;
1203 
1204          if (str.find(L"@param", start_idx) == size_t(start_idx))
1205          {
1206             param_name_width = 1;
1207 
1208             while (true)
1209             {
1210                while (  !unc_isspace(str[idx])
1211                      && str[idx] != ',')
1212                {
1213                   ++param_name_width;
1214                   ++idx;
1215                }
1216                idx = eat_line_whitespace(str, idx);
1217 
1218                if (str[idx] != ',')
1219                {
1220                   break;
1221                }
1222                param_name_width += 2;
1223                idx               = eat_line_whitespace(str, idx + 1);
1224             }
1225          }
1226 
1227          if (param_name_width > doxygen_javadoc_continuation_indent)
1228          {
1229             doxygen_javadoc_continuation_indent = param_name_width;
1230          }
1231       }
1232    }
1233 
1234    if (doxygen_javadoc_param_name_indent > 0)
1235    {
1236       log_rule_B("cmt_sp_before_doxygen_javadoc_tags");
1237 
1238       doxygen_javadoc_param_name_indent   += options::cmt_sp_before_doxygen_javadoc_tags();
1239       doxygen_javadoc_continuation_indent += doxygen_javadoc_param_name_indent;
1240    }
1241 } // calculate_doxygen_javadoc_indent_alignment
1242 
1243 
calculate_comment_body_indent(cmt_reflow & cmt,const unc_text & str)1244 static void calculate_comment_body_indent(cmt_reflow &cmt, const unc_text &str)
1245 {
1246    cmt.xtra_indent = 0;
1247 
1248    log_rule_B("cmt_indent_multi");
1249 
1250    if (!options::cmt_indent_multi())
1251    {
1252       return;
1253    }
1254    size_t idx      = 0;
1255    size_t len      = str.size();
1256    size_t last_len = 0;
1257 
1258    log_rule_B("cmt_multi_check_last");
1259 
1260    if (options::cmt_multi_check_last())
1261    {
1262       // find the last line length
1263       for (idx = len - 1; idx > 0; idx--)
1264       {
1265          if (  str[idx] == '\n'
1266             || str[idx] == '\r')
1267          {
1268             idx++;
1269 
1270             while (  idx < len
1271                   && (  str[idx] == ' '
1272                      || str[idx] == '\t'))
1273             {
1274                idx++;
1275             }
1276             last_len = len - idx;
1277             break;
1278          }
1279       }
1280    }
1281    // find the first line length
1282    size_t first_len = 0;
1283 
1284    for (idx = 0; idx < len; idx++)
1285    {
1286       if (  str[idx] == '\n'
1287          || str[idx] == '\r')
1288       {
1289          first_len = idx;
1290 
1291          while (  str[first_len - 1] == ' '
1292                || str[first_len - 1] == '\t')
1293          {
1294             if (first_len == 0)
1295             {
1296                fprintf(stderr, "%s(%d): first_len is ZERO, cannot be decremented.\n",
1297                        __func__, __LINE__);
1298                log_flush(true);
1299                exit(EX_SOFTWARE);
1300             }
1301             first_len--;
1302          }
1303 
1304          // handle DOS endings
1305          if (  str[idx] == '\r'
1306             && str[idx + 1] == '\n')
1307          {
1308             idx++;
1309          }
1310          idx++;
1311          break;
1312       }
1313    }
1314 
1315    // Scan the second line
1316    size_t width = 0;
1317 
1318    for ( ; idx < len - 1; idx++)
1319    {
1320       if (  str[idx] == ' '
1321          || str[idx] == '\t')
1322       {
1323          if (width > 0)
1324          {
1325             break;
1326          }
1327          continue;
1328       }
1329 
1330       if (  str[idx] == '\n'
1331          || str[idx] == '\r')
1332       {
1333          break;  // Done with second line
1334       }
1335 
1336       // Count the leading chars
1337       if (  str[idx] == '*'
1338          || str[idx] == '|'
1339          || str[idx] == '\\'
1340          || str[idx] == '#'
1341          || str[idx] == '+')
1342       {
1343          width++;
1344       }
1345       else
1346       {
1347          if (  width != 1
1348             || str[idx - 1] != '*')
1349          {
1350             width = 0;
1351          }
1352          break;
1353       }
1354    }
1355 
1356    // LOG_FMT(LSYS, "%s: first=%d last=%d width=%d\n", __func__, first_len, last_len, width);
1357 
1358    /*
1359     * If the first and last line are the same length and don't contain any
1360     * alphanumeric chars and (the first line len > cmt_multi_first_len_minimum
1361     * or the second leader is the same as the first line length), then the
1362     * indent is 0.
1363     */
1364    log_rule_B("cmt_multi_first_len_minimum");
1365 
1366    if (  first_len == last_len
1367       && (  first_len > options::cmt_multi_first_len_minimum()
1368          || first_len == width))
1369    {
1370       return;
1371    }
1372    cmt.xtra_indent = (width == 2) ? 0 : 1;
1373 } // calculate_comment_body_indent
1374 
1375 
1376 // TODO: can we use search_next_chunk here?
get_next_function(chunk_t * pc)1377 static chunk_t *get_next_function(chunk_t *pc)
1378 {
1379    while ((pc = chunk_get_next(pc)) != nullptr)
1380    {
1381       if (  chunk_is_token(pc, CT_FUNC_DEF)
1382          || chunk_is_token(pc, CT_FUNC_PROTO)
1383          || chunk_is_token(pc, CT_FUNC_CLASS_DEF)
1384          || chunk_is_token(pc, CT_FUNC_CLASS_PROTO)
1385          || chunk_is_token(pc, CT_OC_MSG_DECL))
1386       {
1387          return(pc);
1388       }
1389    }
1390    return(nullptr);
1391 }
1392 
1393 
get_next_class(chunk_t * pc)1394 static chunk_t *get_next_class(chunk_t *pc)
1395 {
1396    return(chunk_get_next(chunk_search_next_cat(pc, CT_CLASS)));
1397 }
1398 
1399 
get_prev_category(chunk_t * pc)1400 static chunk_t *get_prev_category(chunk_t *pc)
1401 {
1402    return(chunk_search_prev_cat(pc, CT_OC_CATEGORY));
1403 }
1404 
1405 
get_next_scope(chunk_t * pc)1406 static chunk_t *get_next_scope(chunk_t *pc)
1407 {
1408    return(chunk_search_next_cat(pc, CT_OC_SCOPE));
1409 }
1410 
1411 
get_prev_oc_class(chunk_t * pc)1412 static chunk_t *get_prev_oc_class(chunk_t *pc)
1413 {
1414    return(chunk_search_prev_cat(pc, CT_OC_CLASS));
1415 }
1416 
1417 
next_up(const unc_text & text,size_t idx,const unc_text & tag)1418 static int next_up(const unc_text &text, size_t idx, const unc_text &tag)
1419 {
1420    size_t offs = 0;
1421 
1422    while (  idx < text.size()
1423          && unc_isspace(text[idx]))
1424    {
1425       idx++;
1426       offs++;
1427    }
1428 
1429    if (text.startswith(tag, idx))
1430    {
1431       return(offs);
1432    }
1433    return(-1);
1434 }
1435 
1436 
add_comment_text(const unc_text & text,cmt_reflow & cmt,bool esc_close,size_t continuation_indent)1437 static void add_comment_text(const unc_text &text,
1438                              cmt_reflow     &cmt,
1439                              bool           esc_close,
1440                              size_t         continuation_indent)
1441 {
1442    bool   was_star  = false;
1443    bool   was_slash = false;
1444    bool   in_word   = false;
1445    size_t len       = text.size();
1446    size_t ch_cnt    = 0; // chars since newline
1447 
1448    // If the '//' is included write it first else we may wrap an empty line
1449    size_t idx = 0;
1450 
1451    if (text.startswith("//"))
1452    {
1453       add_text("//");
1454       idx += 2;
1455 
1456       while (unc_isspace(text[idx]))
1457       {
1458          add_char(text[idx++]);
1459       }
1460    }
1461 
1462    for ( ; idx < len; idx++)  // TODO: avoid modifying idx in loop
1463    {
1464       // Split the comment
1465       if (text[idx] == '\n')
1466       {
1467          in_word = false;
1468          add_char('\n');
1469          cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
1470 
1471          if (cmt.xtra_indent > 0)
1472          {
1473             add_char(' ');
1474          }
1475          // hack to get escaped newlines to align and not duplicate the leading '//'
1476          int tmp = next_up(text, idx + 1, "//");
1477 
1478          if (tmp < 0)
1479          {
1480             add_text(cmt.cont_text);
1481          }
1482          else
1483          {
1484             idx += tmp;
1485          }
1486          ch_cnt = 0;
1487       }
1488       else if (  cmt.reflow
1489               && text[idx] == ' '
1490               && options::cmt_width() > 0
1491               && (  cpd.column > options::cmt_width()
1492                  || (  ch_cnt > 1
1493                     && next_word_exceeds_limit(text, idx))))
1494       {
1495          log_rule_B("cmt_width");
1496          in_word = false;
1497          add_char('\n');
1498          cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
1499 
1500          if (cmt.xtra_indent > 0)
1501          {
1502             add_char(' ');
1503          }
1504          // The number of spaces to insert after the star on subsequent comment lines.
1505          log_rule_B("cmt_sp_after_star_cont");
1506 
1507          /**
1508           * calculate the output column
1509           */
1510          size_t column = options::cmt_sp_after_star_cont();
1511 
1512          if (  text[idx + 1] == 42                 // this is star *
1513             && text[idx + 2] == 47)                // this is      /
1514          {
1515             LOG_FMT(LCONTTEXT, "%s(%d): we have a comment end\n",
1516                     __func__, __LINE__);
1517 
1518             column += cmt.column;
1519          }
1520          else
1521          {
1522             add_text(cmt.cont_text);
1523 
1524             if (continuation_indent > 0)
1525             {
1526                if (options::cmt_align_doxygen_javadoc_tags())
1527                {
1528                   log_rule_B("cmt_align_doxygen_javadoc_tags");
1529                }
1530                else if (options::cmt_reflow_indent_to_paragraph_start())
1531                {
1532                   log_rule_B("cmt_reflow_indent_to_paragraph_start");
1533                }
1534                column += continuation_indent;
1535 
1536                log_rule_B("cmt_sp_after_star_cont");
1537 
1538                if (column >= options::cmt_sp_after_star_cont())
1539                {
1540                   column -= options::cmt_sp_after_star_cont();
1541                }
1542             }
1543             /**
1544              * count the number trailing spaces in the comment continuation text
1545              */
1546             size_t num_trailing_sp = 0;
1547 
1548             while (  num_trailing_sp < cmt.cont_text.size()
1549                   && unc_isspace(cmt.cont_text[cmt.cont_text.size() - 1 - num_trailing_sp]))
1550             {
1551                ++num_trailing_sp;
1552             }
1553             column += cpd.column;
1554 
1555             if (column >= num_trailing_sp)
1556             {
1557                column -= num_trailing_sp;
1558             }
1559          }
1560          output_to_column(column,
1561                           false);
1562          ch_cnt = 0;
1563       }
1564       else
1565       {
1566          // Escape a C closure in a CPP comment
1567          if (  esc_close
1568             && (  (  was_star
1569                   && text[idx] == '/')
1570                || (  was_slash
1571                   && text[idx] == '*')))
1572          {
1573             add_char(' ');
1574          }
1575 
1576          if (  !in_word
1577             && !unc_isspace(text[idx]))
1578          {
1579             cmt.word_count++;
1580          }
1581          in_word = !unc_isspace(text[idx]);
1582 
1583          add_char(text[idx]);
1584          was_star  = (text[idx] == '*');
1585          was_slash = (text[idx] == '/');
1586          ch_cnt++;
1587       }
1588    }
1589 } // add_comment_text
1590 
1591 
output_cmt_start(cmt_reflow & cmt,chunk_t * pc)1592 static void output_cmt_start(cmt_reflow &cmt, chunk_t *pc)
1593 {
1594    cmt.pc          = pc;
1595    cmt.column      = pc->column;
1596    cmt.brace_col   = pc->column_indent;
1597    cmt.base_col    = pc->column_indent;
1598    cmt.word_count  = 0;
1599    cmt.xtra_indent = 0;
1600    cmt.cont_text.clear();
1601    cmt.reflow = false;
1602 
1603    // Issue #2752
1604    log_rule_B("cmt_insert_file_header");
1605    log_rule_B("cmt_insert_file_footer");
1606    log_rule_B("cmt_insert_func_header)");
1607    log_rule_B("cmt_insert_class_header");
1608    log_rule_B("cmt_insert_oc_msg_header");
1609 
1610    if (  options::cmt_insert_file_header().size() > 0
1611       || options::cmt_insert_file_footer().size() > 0
1612       || options::cmt_insert_func_header().size() > 0
1613       || options::cmt_insert_class_header().size() > 0
1614       || options::cmt_insert_oc_msg_header().size() > 0)
1615    {
1616       LOG_FMT(LCONTTEXT, "%s(%d): cmt_insert_file\n", __func__, __LINE__);
1617       do_kw_subst(pc);
1618    }
1619    else
1620    {
1621       LOG_FMT(LCONTTEXT, "%s(%d): no cmt_insert_file\n", __func__, __LINE__);
1622    }
1623 
1624    if (cmt.brace_col == 0)
1625    {
1626       log_rule_B("output_tab_size");
1627       cmt.brace_col = 1 + (pc->brace_level * options::output_tab_size());
1628    }
1629    // LOG_FMT(LSYS, "%s: line %zd, brace=%zd base=%zd col=%zd orig=%zd aligned=%x\n",
1630    //        __func__, pc->orig_line, cmt.brace_col, cmt.base_col, cmt.column, pc->orig_col,
1631    //        pc->flags & (PCF_WAS_ALIGNED | PCF_RIGHT_COMMENT));
1632 
1633    if (  get_chunk_parent_type(pc) == CT_COMMENT_START
1634       || get_chunk_parent_type(pc) == CT_COMMENT_WHOLE)
1635    {
1636       log_rule_B("indent_col1_comment");
1637 
1638       if (  !options::indent_col1_comment()
1639          && pc->orig_col == 1
1640          && !pc->flags.test(PCF_INSERTED))
1641       {
1642          cmt.column    = 1;
1643          cmt.base_col  = 1;
1644          cmt.brace_col = 1;
1645       }
1646    }
1647    // tab aligning code
1648    log_rule_B("indent_cmt_with_tabs");
1649 
1650    if (  options::indent_cmt_with_tabs()
1651       && (  get_chunk_parent_type(pc) == CT_COMMENT_END
1652          || get_chunk_parent_type(pc) == CT_COMMENT_WHOLE))
1653    {
1654       cmt.column = align_tab_column(cmt.column - 1);
1655       // LOG_FMT(LSYS, "%s: line %d, orig:%d new:%d\n",
1656       //        __func__, pc->orig_line, pc->column, cmt.column);
1657       pc->column = cmt.column;
1658    }
1659    cmt.base_col = cmt.column;
1660 
1661    // LOG_FMT(LSYS, "%s: -- brace=%d base=%d col=%d\n",
1662    //        __func__, cmt.brace_col, cmt.base_col, cmt.column);
1663 
1664    // Bump out to the column
1665    cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
1666 } // output_cmt_start
1667 
1668 
can_combine_comment(chunk_t * pc,cmt_reflow & cmt)1669 static bool can_combine_comment(chunk_t *pc, cmt_reflow &cmt)
1670 {
1671    // We can't combine if there is something other than a newline next
1672    if (get_chunk_parent_type(pc) == CT_COMMENT_START)
1673    {
1674       return(false);
1675    }
1676    // next is a newline for sure, make sure it is a single newline
1677    chunk_t *next = chunk_get_next(pc);
1678 
1679    if (  next != nullptr
1680       && next->nl_count == 1)
1681    {
1682       // Make sure the comment is the same type at the same column
1683       next = chunk_get_next(next);
1684 
1685       if (  chunk_is_token(next, pc->type)
1686          && (  (  next->column == 1
1687                && pc->column == 1)
1688             || (  next->column == cmt.base_col
1689                && pc->column == cmt.base_col)
1690             || (  next->column > cmt.base_col
1691                && get_chunk_parent_type(pc) == CT_COMMENT_END)))
1692       {
1693          return(true);
1694       }
1695    }
1696    return(false);
1697 } // can_combine_comment
1698 
1699 
output_comment_c(chunk_t * first)1700 static chunk_t *output_comment_c(chunk_t *first)
1701 {
1702    cmt_reflow cmt;
1703 
1704    output_cmt_start(cmt, first);
1705    log_rule_B("cmt_reflow_mode");
1706    cmt.reflow = (options::cmt_reflow_mode() != 1);
1707 
1708    // See if we can combine this comment with the next comment
1709    log_rule_B("cmt_c_group");
1710 
1711    if (  !options::cmt_c_group()
1712       || !can_combine_comment(first, cmt))
1713    {
1714       // Just add the single comment
1715       log_rule_B("cmt_star_cont");
1716       cmt.cont_text = options::cmt_star_cont() ? " * " : "   ";
1717       LOG_CONTTEXT();
1718 
1719       log_rule_B("cmt_trailing_single_line_c_to_cpp");
1720 
1721       if (options::cmt_trailing_single_line_c_to_cpp() && chunk_is_last_on_line(*first))
1722       {
1723          add_text("//");
1724 
1725          unc_text tmp;
1726          tmp.set(first->str, 2, first->len() - 4);
1727          cmt_trim_whitespace(tmp, false);
1728          add_comment_text(tmp, cmt, false);
1729       }
1730       else
1731       {
1732          add_comment_text(first->str, cmt, false);
1733       }
1734       return(first);
1735    }
1736    log_rule_B("cmt_star_cont");
1737    cmt.cont_text = options::cmt_star_cont() ? " *" : "  ";
1738    LOG_CONTTEXT();
1739 
1740    add_text("/*");
1741 
1742    log_rule_B("cmt_c_nl_start");
1743 
1744    if (options::cmt_c_nl_start())
1745    {
1746       add_comment_text("\n", cmt, false);
1747    }
1748    chunk_t  *pc = first;
1749    unc_text tmp;
1750 
1751    while (can_combine_comment(pc, cmt))
1752    {
1753       LOG_FMT(LCONTTEXT, "%s(%d): text() is '%s'\n",
1754               __func__, __LINE__, pc->text());
1755       tmp.set(pc->str, 2, pc->len() - 4);
1756 
1757       if (  cpd.last_char == '*'
1758          && (  tmp[0] == '/'
1759             || tmp[0] != ' '))                 // Issue #1908
1760       {
1761          LOG_FMT(LCONTTEXT, "%s(%d): add_text a " "\n", __func__, __LINE__);
1762          add_text(" ");
1763       }
1764       // In case of reflow, original comment could contain trailing spaces before closing the comment, we don't need them after reflow
1765       LOG_FMT(LCONTTEXT, "%s(%d): trim\n", __func__, __LINE__);
1766       cmt_trim_whitespace(tmp, false);
1767       LOG_FMT(LCONTTEXT, "%s(%d): add_comment_text(tmp is '%s')\n",
1768               __func__, __LINE__, tmp.c_str());
1769       add_comment_text(tmp, cmt, false);
1770       LOG_FMT(LCONTTEXT, "%s(%d): add_comment_text(newline)\n",
1771               __func__, __LINE__);
1772       add_comment_text("\n", cmt, false);
1773       pc = chunk_get_next(pc);
1774       pc = chunk_get_next(pc);
1775    }
1776    tmp.set(pc->str, 2, pc->len() - 4);
1777 
1778    if (  cpd.last_char == '*'
1779       && tmp[0] == '/')
1780    {
1781       add_text(" ");
1782    }
1783    // In case of reflow, original comment could contain trailing spaces before closing the comment, we don't need them after reflow
1784    cmt_trim_whitespace(tmp, false);
1785    add_comment_text(tmp, cmt, false);
1786 
1787    log_rule_B("cmt_c_nl_end");
1788 
1789    if (options::cmt_c_nl_end())
1790    {
1791       cmt.cont_text = " ";
1792       LOG_CONTTEXT();
1793       add_comment_text("\n", cmt, false);
1794    }
1795    add_comment_text("*/", cmt, false);
1796    return(pc);
1797 } // output_comment_c
1798 
1799 
output_comment_cpp(chunk_t * first)1800 static chunk_t *output_comment_cpp(chunk_t *first)
1801 {
1802    cmt_reflow cmt;
1803 
1804    output_cmt_start(cmt, first);
1805    log_rule_B("cmt_reflow_mode");
1806    cmt.reflow = (options::cmt_reflow_mode() != 1);
1807 
1808    unc_text leadin = "//";             // default setting to keep previous behaviour
1809 
1810    // If true, space is added with sp_cmt_cpp_start will be added after doxygen
1811    // sequences like '///', '///<', '//!' and '//!<'.
1812    log_rule_B("sp_cmt_cpp_doxygen");
1813 
1814    if (options::sp_cmt_cpp_doxygen())  // special treatment for doxygen style comments (treat as unity)
1815    {
1816       const char *sComment = first->text();
1817       bool       grouping  = (sComment[2] == '@');
1818       size_t     brace     = 3;
1819 
1820       if (  sComment[2] == '/'
1821          || sComment[2] == '!') // doxygen style found!
1822       {
1823          leadin += sComment[2];                     // at least one additional char (either "///" or "//!")
1824 
1825          if (sComment[3] == '<')                    // and a further one (either "///<" or "//!<")
1826          {
1827             leadin += '<';
1828          }
1829          else
1830          {
1831             grouping = (sComment[3] == '@');  // or a further one (grouping)
1832             brace    = 4;
1833          }
1834       }
1835 
1836       if (  grouping
1837          && (  sComment[brace] == '{'
1838             || sComment[brace] == '}'))
1839       {
1840          leadin += '@';
1841          leadin += sComment[brace];
1842       }
1843    }
1844    // Special treatment for Qt translator or meta-data comments (treat as unity)
1845    // If true, space is added with sp_cmt_cpp_start will be added after Qt
1846    // translator or meta-data comments like '//:', '//=', and '//~'.
1847    log_rule_B("sp_cmt_cpp_qttr");
1848 
1849    if (options::sp_cmt_cpp_qttr())
1850    {
1851       const int c = first->str[2];
1852 
1853       if (  c == ':'
1854          || c == '='
1855          || c == '~')
1856       {
1857          leadin += c;
1858       }
1859    }
1860    // CPP comments can't be grouped unless they are converted to C comments
1861    log_rule_B("cmt_cpp_to_c");
1862 
1863    if (!options::cmt_cpp_to_c())
1864    {
1865       auto const *cmt_text = first->str.c_str() + 2;
1866       // Add or remove space after the opening of a C++ comment,
1867       // i.e. '// A' vs. '//A'.
1868       auto *sp_cmt = &options::sp_cmt_cpp_start;
1869 
1870       cmt.cont_text = leadin;
1871 
1872       // Get start of comment text
1873       while (  *cmt_text != '\0'
1874             && unc_isspace(*cmt_text))
1875       {
1876          ++cmt_text;
1877       }
1878 
1879       // Determine if we are dealing with a region marker
1880       if (  (  !first->prev
1881             || first->prev->orig_line != first->orig_line)
1882          && (  strncmp(cmt_text, "BEGIN", 5) == 0
1883             || strncmp(cmt_text, "END", 3) == 0))
1884       {
1885          // If sp_cmt_cpp_region is not ignore, use that instead of
1886          // sp_cmt_cpp_start
1887          if (options::sp_cmt_cpp_region() != IARF_IGNORE)
1888          {
1889             sp_cmt = &options::sp_cmt_cpp_region;
1890          }
1891       }
1892       // Add or remove space after the opening of a C++ comment,
1893       // i.e. '// A' vs. '//A'.
1894       log_rule_B(sp_cmt->name());
1895 
1896       if ((*sp_cmt)() != IARF_REMOVE)
1897       {
1898          cmt.cont_text += ' ';
1899       }
1900       LOG_CONTTEXT();
1901 
1902       // Add or remove space after the opening of a C++ comment,
1903       // i.e. '// A' vs. '//A'.
1904       log_rule_B(sp_cmt->name());
1905 
1906       if ((*sp_cmt)() == IARF_IGNORE)
1907       {
1908          add_comment_text(first->str, cmt, false);
1909       }
1910       else
1911       {
1912          size_t   iLISz = leadin.size();
1913          unc_text tmp(first->str, 0, iLISz);
1914          add_comment_text(tmp, cmt, false);
1915 
1916          tmp.set(first->str, iLISz, first->len() - iLISz);
1917 
1918          // Add or remove space after the opening of a C++ comment,
1919          // i.e. '// A' vs. '//A'.
1920          log_rule_B("sp_cmt_cpp_start");
1921 
1922          if ((*sp_cmt)() & IARF_REMOVE)
1923          {
1924             while (  (tmp.size() > 0)
1925                   && unc_isspace(tmp[0]))
1926             {
1927                tmp.pop_front();
1928             }
1929          }
1930 
1931          if (tmp.size() > 0)
1932          {
1933             // Add or remove space after the opening of a C++ comment,
1934             // i.e. '// A' vs. '//A'.
1935             log_rule_B("sp_cmt_cpp_start");
1936 
1937             if ((*sp_cmt)() & IARF_ADD)
1938             {
1939                if (  !unc_isspace(tmp[0])
1940                   && (tmp[0] != '/'))
1941                {
1942                   add_comment_text(" ", cmt, false);
1943                }
1944             }
1945             add_comment_text(tmp, cmt, false);
1946          }
1947       }
1948       return(first);
1949    }
1950    // We are going to convert the CPP comments to C comments
1951    log_rule_B("cmt_star_cont");
1952    cmt.cont_text = options::cmt_star_cont() ? " * " : "   ";
1953    LOG_CONTTEXT();
1954 
1955    unc_text tmp;
1956 
1957    // See if we can combine this comment with the next comment
1958    log_rule_B("cmt_cpp_group");
1959 
1960    if (  !options::cmt_cpp_group()
1961       || !can_combine_comment(first, cmt))
1962    {
1963       // nothing to group: just output a single line
1964       add_text("/*");
1965 
1966       // patch # 32, 2012-03-23
1967       // Add or remove space after the opening of a C++ comment,
1968       // i.e. '// A' vs. '//A'.
1969       log_rule_B("sp_cmt_cpp_start");
1970 
1971       if (  !unc_isspace(first->str[2])
1972          && (options::sp_cmt_cpp_start() & IARF_ADD))
1973       {
1974          add_char(' ');
1975       }
1976       tmp.set(first->str, 2, first->len() - 2);
1977       add_comment_text(tmp, cmt, true);
1978       add_text(" */");
1979       return(first);
1980    }
1981    add_text("/*");
1982 
1983    log_rule_B("cmt_cpp_nl_start");
1984 
1985    if (options::cmt_cpp_nl_start())
1986    {
1987       add_comment_text("\n", cmt, false);
1988    }
1989    else
1990    {
1991       add_text(" ");
1992    }
1993    chunk_t *pc = first;
1994    int     offs;
1995 
1996    while (can_combine_comment(pc, cmt))
1997    {
1998       offs = unc_isspace(pc->str[2]) ? 1 : 0;
1999       tmp.set(pc->str, 2 + offs, pc->len() - (2 + offs));
2000 
2001       if (  cpd.last_char == '*'
2002          && tmp[0] == '/')
2003       {
2004          add_text(" ");
2005       }
2006       add_comment_text(tmp, cmt, true);
2007       add_comment_text("\n", cmt, false);
2008       pc = chunk_get_next(chunk_get_next(pc));
2009    }
2010    offs = unc_isspace(pc->str[2]) ? 1 : 0;
2011    tmp.set(pc->str, 2 + offs, pc->len() - (2 + offs));
2012    add_comment_text(tmp, cmt, true);
2013 
2014    log_rule_B("cmt_cpp_nl_end");
2015 
2016    if (options::cmt_cpp_nl_end())
2017    {
2018       cmt.cont_text = "";
2019       LOG_CONTTEXT();
2020       add_comment_text("\n", cmt, false);
2021    }
2022    add_comment_text(" */", cmt, false);
2023    return(pc);
2024 } // output_comment_cpp
2025 
2026 
cmt_trim_whitespace(unc_text & line,bool in_preproc)2027 static void cmt_trim_whitespace(unc_text &line, bool in_preproc)
2028 {
2029    // Remove trailing whitespace on the line
2030    while (  line.size() > 0
2031          && (  line.back() == ' '
2032             || line.back() == '\t'))
2033    {
2034       line.pop_back();
2035    }
2036 
2037    // Shift back to the comment text, ...
2038    if (  in_preproc             // if in a preproc ...
2039       && line.size() > 1        // with a line that holds ...
2040       && line.back() == '\\')   // a backslash-newline ...
2041    {
2042       bool do_space = false;
2043 
2044       // If there was any space before the backslash, change it to 1 space
2045       line.pop_back();
2046 
2047       while (  line.size() > 0
2048             && (  line.back() == ' '
2049                || line.back() == '\t'))
2050       {
2051          do_space = true;
2052          line.pop_back();
2053       }
2054 
2055       if (do_space)
2056       {
2057          line.append(' ');
2058       }
2059       line.append('\\');
2060    }
2061 } // cmt_trim_whitespace
2062 
2063 
2064 /**
2065  * Return an indexed-map of reflow fold end of line/beginning of line regex pairs read
2066  * from file
2067  */
get_reflow_fold_regex_map()2068 static std::map<std::size_t, std::pair<std::wregex, std::wregex> > get_reflow_fold_regex_map()
2069 {
2070    /**
2071     * TODO: should the following be static to prevent initializing it multiple times?
2072     */
2073    static std::map<std::size_t, std::pair<std::wregex, std::wregex> > regex_map;
2074 
2075    if (regex_map.empty())
2076    {
2077       if (!options::cmt_reflow_fold_regex_file().empty())
2078       {
2079          std::wstring raw_wstring(cpd.reflow_fold_regex.raw.begin(),
2080                                   cpd.reflow_fold_regex.raw.end());
2081 
2082          std::wregex criteria(L"\\s*(?:(?:(beg_of_next)|(end_of_prev))_line_regex)"
2083                               "\\s*\\[\\s*([0-9]+)\\s*\\]\\s*=\\s*\"(.*)\"\\s*"
2084                               "(?=\\r\\n|\\r|\\n|$)");
2085          std::wsregex_iterator it_regex(raw_wstring.cbegin(), raw_wstring.cend(), criteria);
2086          std::wsregex_iterator it_regex_end = std::wsregex_iterator();
2087 
2088          while (it_regex != it_regex_end)
2089          {
2090             std::wsmatch match = *it_regex;
2091 
2092             if (  ((  match[1].matched
2093                    || match[2].matched))
2094                && match[3].matched
2095                && match[4].matched)
2096             {
2097                auto        &&index   = std::stoi(match[3].str());
2098                std::wregex *p_wregex = match[1].matched ? &regex_map[index].second
2099                                                         : &regex_map[index].first;
2100                *p_wregex = match[4].str();
2101             }
2102             ++it_regex;
2103          }
2104       }
2105       else
2106       {
2107          regex_map.emplace(0L, std::make_pair(L"[\\w,\\]\\)]$", L"^[\\w,\\[\\(]"));
2108          regex_map.emplace(1L, std::make_pair(L"\\.$", L"^[A-Z]"));
2109       }
2110    }
2111    return(regex_map);
2112 } // get_reflow_fold_regex_map
2113 
2114 
output_comment_multi(chunk_t * pc)2115 static void output_comment_multi(chunk_t *pc)
2116 {
2117    if (pc == nullptr)
2118    {
2119       return;
2120    }
2121    cmt_reflow cmt;
2122 
2123    char       copy[1000];
2124 
2125    LOG_FMT(LCONTTEXT, "%s(%d): text() is '%s', type is %s, orig_col is %zu, column is %zu\n",
2126            __func__, __LINE__, pc->elided_text(copy), get_token_name(pc->type), pc->orig_col, pc->column);
2127 
2128    output_cmt_start(cmt, pc);
2129    log_rule_B("cmt_reflow_mode");
2130    cmt.reflow = (options::cmt_reflow_mode() != 1);
2131 
2132    size_t cmt_col  = cmt.base_col;
2133    int    col_diff = pc->orig_col - cmt.base_col;
2134 
2135    calculate_comment_body_indent(cmt, pc->str);
2136 
2137    log_rule_B("cmt_indent_multi");
2138    log_rule_B("cmt_star_cont");
2139    cmt.cont_text = !options::cmt_indent_multi() ? "" :
2140                    (options::cmt_star_cont() ? "* " : "  ");
2141    LOG_CONTTEXT();
2142 
2143    std::wstring pc_wstring(pc->str.get().cbegin(),
2144                            pc->str.get().cend());
2145 
2146    size_t doxygen_javadoc_param_name_indent    = 0;
2147    size_t doxygen_javadoc_continuation_indent  = 0;
2148    size_t reflow_paragraph_continuation_indent = 0;
2149 
2150    calculate_doxygen_javadoc_indent_alignment(pc_wstring,
2151                                               doxygen_javadoc_param_name_indent,
2152                                               doxygen_javadoc_continuation_indent);
2153 
2154    size_t   line_count                   = 0;
2155    size_t   ccol                         = pc->column; // the col of subsequent comment lines
2156    size_t   cmt_idx                      = 0;
2157    bool     nl_end                       = false;
2158    bool     doxygen_javadoc_indent_align = false;
2159    unc_text line;
2160 
2161    /*
2162     * Get a map of regex pairs that define expressions to match at both the end
2163     * of the previous line and the beginning of the next line
2164     */
2165    auto &&cmt_reflow_regex_map = get_reflow_fold_regex_map();
2166 
2167    line.clear();
2168    LOG_FMT(LCONTTEXT, "%s(%d): pc->len() is %zu\n",
2169            __func__, __LINE__, pc->len());
2170    //LOG_FMT(LCONTTEXT, "%s(%d): pc->str is %s\n",
2171    //        __func__, __LINE__, pc->str.c_str());
2172 
2173    /**
2174     * check for enable/disable processing comment strings that may
2175     * both be embedded within the same multi-line comment
2176     */
2177    auto disable_processing_cmt_idx = find_disable_processing_comment_marker(pc->str);
2178    auto enable_processing_cmt_idx  = find_enable_processing_comment_marker(pc->str);
2179 
2180    while (cmt_idx < pc->len())
2181    {
2182       int ch = pc->str[cmt_idx];
2183       cmt_idx++;
2184 
2185       if (  cmt_idx > std::size_t(disable_processing_cmt_idx)
2186          && enable_processing_cmt_idx > disable_processing_cmt_idx)
2187       {
2188          auto     length = enable_processing_cmt_idx - disable_processing_cmt_idx;
2189          unc_text verbatim_text(pc->str,
2190                                 disable_processing_cmt_idx,
2191                                 length);
2192 
2193          add_text(verbatim_text);
2194 
2195          cmt_idx = enable_processing_cmt_idx;
2196 
2197          /**
2198           * check for additional enable/disable processing comment strings that may
2199           * both be embedded within the same multi-line comment
2200           */
2201          disable_processing_cmt_idx = find_disable_processing_comment_marker(pc->str,
2202                                                                              enable_processing_cmt_idx);
2203          enable_processing_cmt_idx = find_enable_processing_comment_marker(pc->str,
2204                                                                            enable_processing_cmt_idx);
2205 
2206          /**
2207           * it's probably necessary to reset the line count to prevent line
2208           * continuation characters from being added to the end of the current line
2209           */
2210          line_count = 0;
2211       }
2212 
2213       // handle the CRLF and CR endings. convert both to LF
2214       if (ch == '\r')
2215       {
2216          ch = '\n';
2217 
2218          if (  cmt_idx < pc->len()
2219             && pc->str[cmt_idx] == '\n')
2220          {
2221             cmt_idx++;
2222          }
2223       }
2224 
2225       // Find the start column
2226       if (line.size() == 0)
2227       {
2228          nl_end = false;
2229 
2230          if (ch == ' ')
2231          {
2232             ccol++;
2233             continue;
2234          }
2235          else if (ch == '\t')
2236          {
2237             log_rule_B("input_tab_size");
2238             ccol = calc_next_tab_column(ccol, options::input_tab_size());
2239             continue;
2240          }
2241          else
2242          {
2243             LOG_FMT(LCONTTEXT, "%s(%d):ch is %d, %c\n", __func__, __LINE__, ch, char(ch));
2244          }
2245       }
2246 
2247       if (  ch == '@'
2248          && options::cmt_align_doxygen_javadoc_tags())
2249       {
2250          int start_idx = cmt_idx - 1;
2251          int end_idx   = match_doxygen_javadoc_tag(pc_wstring, start_idx);
2252 
2253          if (end_idx > start_idx)
2254          {
2255             doxygen_javadoc_indent_align = true;
2256 
2257             std::string match(pc->str.get().cbegin() + start_idx,
2258                               pc->str.get().cbegin() + end_idx);
2259 
2260             match.erase(std::remove_if(match.begin(),
2261                                        match.end(),
2262                                        ::isspace),
2263                         match.end());
2264 
2265             /**
2266              * remove whitespace before the '@'
2267              */
2268             int line_size_before_indent = line.size();
2269 
2270             while (  line_size_before_indent > 0
2271                   && unc_isspace(line.back()))
2272             {
2273                line.pop_back();
2274                --line_size_before_indent;
2275             }
2276             log_rule_B("cmt_sp_before_doxygen_javadoc_tags");
2277 
2278             int indent = options::cmt_sp_before_doxygen_javadoc_tags();
2279 
2280             while (indent-- > 0)
2281             {
2282                line.append(' ');
2283             }
2284             cmt_idx += (end_idx - start_idx);
2285             line.append(match.c_str());
2286 
2287             bool is_exception_tag = match.find("@exception") != std::string::npos;
2288             bool is_param_tag     = match.find("@param") != std::string::npos;
2289             bool is_throws_tag    = match.find("@throws") != std::string::npos;
2290 
2291             if (  is_exception_tag
2292                || is_param_tag
2293                || is_throws_tag)
2294             {
2295                indent = int(doxygen_javadoc_param_name_indent) - int(line.size());
2296 
2297                while (indent-- > -line_size_before_indent)
2298                {
2299                   line.append(' ');
2300                }
2301 
2302                while (true)
2303                {
2304                   cmt_idx = eat_line_whitespace(pc->str,
2305                                                 cmt_idx);
2306 
2307                   while (  cmt_idx < pc->len()
2308                         && !unc_isspace(pc->str[cmt_idx])
2309                         && pc->str[cmt_idx] != ',')
2310                   {
2311                      line.append(pc->str[cmt_idx++]);
2312                   }
2313 
2314                   if (!is_param_tag)
2315                   {
2316                      break;
2317                   }
2318                   /**
2319                    * check for the possibility that comma-separated parameter names are present
2320                    */
2321                   cmt_idx = eat_line_whitespace(pc->str,
2322                                                 cmt_idx);
2323 
2324                   if (pc->str[cmt_idx] != ',')
2325                   {
2326                      break;
2327                   }
2328                   ++cmt_idx;
2329                   line.append(", ");
2330                }
2331             }
2332             cmt_idx = eat_line_whitespace(pc->str,
2333                                           cmt_idx);
2334             indent = int(doxygen_javadoc_continuation_indent) - int(line.size());
2335 
2336             while (indent-- > -line_size_before_indent)
2337             {
2338                line.append(' ');
2339             }
2340 
2341             while (  cmt_idx < pc->len()
2342                   && !unc_isspace(pc->str[cmt_idx]))
2343             {
2344                line.append(pc->str[cmt_idx++]);
2345             }
2346             continue;
2347          }
2348       }
2349       /*
2350        * Now see if we need/must fold the next line with the current to enable
2351        * full reflow
2352        */
2353       log_rule_B("cmt_reflow_mode");
2354 
2355       if (  options::cmt_reflow_mode() == 2
2356          && ch == '\n'
2357          && cmt_idx < pc->len())
2358       {
2359          int    next_nonempty_line = -1;
2360          int    prev_nonempty_line = -1;
2361          size_t nwidx              = line.size();
2362 
2363          // strip trailing whitespace from the line collected so far
2364          while (nwidx > 0)
2365          {
2366             nwidx--;
2367 
2368             if (  prev_nonempty_line < 0
2369                && !unc_isspace(line[nwidx])
2370                && line[nwidx] != '*'    // block comment: skip '*' at end of line
2371                && (pc->flags.test(PCF_IN_PREPROC)
2372                    ? (  line[nwidx] != '\\'
2373                      || (  line[nwidx + 1] != '\r'
2374                         && line[nwidx + 1] != '\n'))
2375                    : true))
2376             {
2377                prev_nonempty_line = nwidx; // last non-whitespace char in the previous line
2378             }
2379          }
2380 
2381          for (size_t nxt_idx = cmt_idx;
2382               (  nxt_idx < pc->len()
2383               && pc->str[nxt_idx] != '\r'
2384               && pc->str[nxt_idx] != '\n');
2385               nxt_idx++)
2386          {
2387             if (  next_nonempty_line < 0
2388                && !unc_isspace(pc->str[nxt_idx])
2389                && pc->str[nxt_idx] != '*'
2390                && (pc->flags.test(PCF_IN_PREPROC)
2391                    ? (  pc->str[nxt_idx] != '\\'
2392                      || (  pc->str[nxt_idx + 1] != '\r'
2393                         && pc->str[nxt_idx + 1] != '\n'))
2394                    : true))
2395             {
2396                next_nonempty_line = nxt_idx;  // first non-whitespace char in the next line
2397             }
2398          }
2399 
2400          if (  options::cmt_reflow_indent_to_paragraph_start()
2401             && next_nonempty_line >= 0
2402             && (  prev_nonempty_line <= 0
2403                || doxygen_javadoc_indent_align))
2404          {
2405             log_rule_B("cmt_reflow_indent_to_paragraph_start");
2406 
2407             int cmt_star_indent = 0;
2408 
2409             while (  next_nonempty_line > cmt_star_indent
2410                   && pc->str[next_nonempty_line - cmt_star_indent - 1] != '*')
2411             {
2412                ++cmt_star_indent;
2413             }
2414             reflow_paragraph_continuation_indent = size_t(cmt_star_indent);
2415          }
2416 
2417          /*
2418           * see if we should fold up; usually that'd be a YES, but there are a few
2419           * situations where folding/reflowing by merging lines is frowned upon:
2420           *
2421           * - ASCII art in the comments (most often, these are drawings done in +-\/|.,*)
2422           *
2423           * - Doxygen/JavaDoc/etc. parameters: these often start with \ or @, at least
2424           *   something clearly non-alphanumeric (you see where we're going with this?)
2425           *
2426           * - bullet lists that are closely spaced: bullets are always non-alphanumeric
2427           *   characters, such as '-' or '+' (or, oh horror, '*' - that's bloody ambiguous
2428           *   to parse :-( ... with or without '*' comment start prefix, that's the
2429           *   question, then.)
2430           *
2431           * - semi-HTML formatted code, e.g. <pre>...</pre> comment sections (NDoc, etc.)
2432           *
2433           * - New lines which form a new paragraph without there having been added an
2434           *   extra empty line between the last sentence and the new one.
2435           *   A bit like this, really; so it is opportune to check if the last line ended
2436           *   in a terminal (that would be the set '.:;!?') and the new line starts with
2437           *   a capital.
2438           *   Though new lines starting with comment delimiters, such as '(', should be
2439           *   pulled up.
2440           *
2441           * So it bores down to this: the only folding (& reflowing) that's going to happen
2442           * is when the next line starts with an alphanumeric character AND the last
2443           * line didn't end with an non-alphanumeric character, except: ',' AND the next
2444           * line didn't start with a '*' all of a sudden while the previous one didn't
2445           * (the ambiguous '*'-for-bullet case!)
2446           */
2447          if (  prev_nonempty_line >= 0
2448             && next_nonempty_line >= int(cmt_idx))
2449          {
2450             std::wstring prev_line(line.get().cbegin(),
2451                                    line.get().cend());
2452             std::wstring next_line(pc->str.get().cbegin() + next_nonempty_line,
2453                                    pc->str.get().cend());
2454 
2455             for (auto &&cmt_reflow_regex_map_entry : cmt_reflow_regex_map)
2456             {
2457                auto         &&cmt_reflow_regex_pair  = cmt_reflow_regex_map_entry.second;
2458                auto         &&end_of_prev_line_regex = cmt_reflow_regex_pair.first;
2459                auto         &&beg_of_next_line_regex = cmt_reflow_regex_pair.second;
2460                std::wsmatch match[2];
2461 
2462                if (  std::regex_search(prev_line, match[0], end_of_prev_line_regex)
2463                   && match[0].position(0) + match[0].length(0) == std::wsmatch::difference_type(line.size())
2464                   && std::regex_search(next_line, match[1], beg_of_next_line_regex)
2465                   && match[1].position(0) == 0)
2466                {
2467                   // rewind the line to the last non-alpha:
2468                   line.resize(prev_nonempty_line + 1);
2469 
2470                   // roll the current line forward to the first non-alpha:
2471                   cmt_idx = next_nonempty_line;
2472                   // override the NL and make it a single whitespace:
2473                   ch = ' ';
2474 
2475                   break;
2476                }
2477             }
2478          }
2479       }
2480 
2481       if (ch == '\n')
2482       {
2483          LOG_FMT(LCONTTEXT, "%s(%d):ch is newline\n", __func__, __LINE__);
2484       }
2485       else
2486       {
2487          LOG_FMT(LCONTTEXT, "%s(%d):ch is %d, %c\n", __func__, __LINE__, ch, char(ch));
2488       }
2489       line.append(ch);
2490 
2491       // If we just hit an end of line OR we just hit end-of-comment...
2492       if (  ch == '\n'
2493          || cmt_idx == pc->len())
2494       {
2495          if (ch == '\n')
2496          {
2497             LOG_FMT(LCONTTEXT, "%s(%d):ch is newline\n", __func__, __LINE__);
2498          }
2499          else
2500          {
2501             LOG_FMT(LCONTTEXT, "%s(%d):ch is %d, %c\n", __func__, __LINE__, ch, char(ch));
2502          }
2503          line_count++;
2504          LOG_FMT(LCONTTEXT, "%s(%d):line_count is %zu\n", __func__, __LINE__, line_count);
2505 
2506          // strip trailing tabs and spaces before the newline
2507          if (ch == '\n')
2508          {
2509             nl_end = true;
2510             line.pop_back();
2511             cmt_trim_whitespace(line, pc->flags.test(PCF_IN_PREPROC));
2512          }
2513 
2514          if (line_count == 1)
2515          {
2516             // this is the first line - add unchanged
2517             add_comment_text(line, cmt, false);
2518 
2519             if (nl_end)
2520             {
2521                add_char('\n');
2522             }
2523          }
2524          else
2525          {
2526             /*
2527              * This is not the first line, so we need to indent to the
2528              * correct column. Each line is indented 0 or more spaces.
2529              */
2530             // Ensure ccol is not negative
2531             if (static_cast<int>(ccol) >= col_diff)
2532             {
2533                ccol -= col_diff;
2534             }
2535 
2536             if (ccol < (cmt_col + 3))
2537             {
2538                ccol = cmt_col + 3;
2539             }
2540 
2541             if (line.size() == 0)
2542             {
2543                // Empty line - just a '\n'
2544                log_rule_B("cmt_star_cont");
2545 
2546                if (options::cmt_star_cont())
2547                {
2548                   // The number of spaces to insert at the start of subsequent comment lines.
2549                   log_rule_B("cmt_sp_before_star_cont");
2550                   cmt.column = cmt_col + options::cmt_sp_before_star_cont();
2551                   cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
2552 
2553                   if (cmt.xtra_indent > 0)
2554                   {
2555                      add_char(' ');
2556                   }
2557                   // multiline comments can have empty lines with some spaces in them for alignment
2558                   // while adding * symbol and aligning them we don't want to keep these trailing spaces
2559                   unc_text tmp = unc_text(cmt.cont_text);
2560                   cmt_trim_whitespace(tmp, false);
2561                   add_text(tmp);
2562                }
2563                add_char('\n');
2564             }
2565             else
2566             {
2567                /*
2568                 * If this doesn't start with a '*' or '|'.
2569                 * '\name' is a common parameter documentation thing.
2570                 */
2571                log_rule_B("cmt_indent_multi");
2572 
2573                if (  options::cmt_indent_multi()
2574                   && line[0] != '*'
2575                   && line[0] != '|'
2576                   && line[0] != '#'
2577                   && (  line[0] != '\\'
2578                      || unc_isalpha(line[1]))
2579                   && line[0] != '+')
2580                {
2581                   // The number of spaces to insert at the start of subsequent comment lines.
2582                   log_rule_B("cmt_sp_before_star_cont");
2583                   size_t start_col = cmt_col + options::cmt_sp_before_star_cont();
2584 
2585                   log_rule_B("cmt_star_cont");
2586 
2587                   if (options::cmt_star_cont())
2588                   {
2589                      cmt.column = start_col;
2590                      cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
2591 
2592                      if (cmt.xtra_indent > 0)
2593                      {
2594                         add_char(' ');
2595                      }
2596                      add_text(cmt.cont_text);
2597                      // The number of spaces to insert after the star on subsequent comment lines.
2598                      log_rule_B("cmt_sp_after_star_cont");
2599                      output_to_column(ccol + options::cmt_sp_after_star_cont(), false);
2600                   }
2601                   else
2602                   {
2603                      cmt.column = ccol;
2604                      cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
2605                   }
2606                }
2607                else
2608                {
2609                   // The number of spaces to insert at the start of subsequent comment lines.
2610                   log_rule_B("cmt_sp_before_star_cont");
2611                   cmt.column = cmt_col + options::cmt_sp_before_star_cont();
2612                   cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
2613 
2614                   if (cmt.xtra_indent > 0)
2615                   {
2616                      add_char(' ');
2617                   }
2618                   size_t idx;
2619 
2620                   // Checks for and updates the lead chars.
2621                   // @return 0=not present, >0=number of chars that are part of the lead
2622                   idx = cmt_parse_lead(line, (cmt_idx == pc->len()));
2623 
2624                   if (idx > 0)
2625                   {
2626                      // >0=number of chars that are part of the lead
2627                      cmt.cont_text.set(line, 0, idx);
2628                      LOG_CONTTEXT();
2629 
2630                      if (  (line.size() >= 2)
2631                         && (line[0] == '*')
2632                         && unc_isalnum(line[1]))
2633                      {
2634                         line.insert(1, ' ');
2635                      }
2636                   }
2637                   else
2638                   {
2639                      // bug #653
2640                      if (language_is_set(LANG_D))
2641                      {
2642                         // 0=no lead char present
2643                         add_text(cmt.cont_text);
2644                      }
2645                   }
2646                }
2647                size_t continuation_indent = 0;
2648 
2649                if (doxygen_javadoc_indent_align)
2650                {
2651                   continuation_indent = doxygen_javadoc_continuation_indent;
2652                }
2653                else if (reflow_paragraph_continuation_indent > 0)
2654                {
2655                   continuation_indent = reflow_paragraph_continuation_indent;
2656                }
2657                add_comment_text(line,
2658                                 cmt,
2659                                 false,
2660                                 continuation_indent);
2661 
2662                if (nl_end)
2663                {
2664                   add_text("\n");
2665                }
2666             }
2667          }
2668          line.clear();
2669          doxygen_javadoc_indent_align = false;
2670          ccol                         = 1;
2671       }
2672    }
2673 } // output_comment_multi
2674 
2675 
kw_fcn_filename(chunk_t * cmt,unc_text & out_txt)2676 static bool kw_fcn_filename(chunk_t *cmt, unc_text &out_txt)
2677 {
2678    UNUSED(cmt);
2679    out_txt.append(path_basename(cpd.filename.c_str()));
2680    return(true);
2681 }
2682 
2683 
kw_fcn_class(chunk_t * cmt,unc_text & out_txt)2684 static bool kw_fcn_class(chunk_t *cmt, unc_text &out_txt)
2685 {
2686    chunk_t *tmp = nullptr;
2687 
2688    if (language_is_set(LANG_CPP | LANG_OC))
2689    {
2690       chunk_t *fcn = get_next_function(cmt);
2691 
2692       if (chunk_is_token(fcn, CT_OC_MSG_DECL))
2693       {
2694          tmp = get_prev_oc_class(cmt);
2695       }
2696       else
2697       {
2698          tmp = get_next_class(cmt);
2699       }
2700    }
2701    else if (language_is_set(LANG_OC))
2702    {
2703       tmp = get_prev_oc_class(cmt);
2704    }
2705 
2706    if (tmp == nullptr)
2707    {
2708       tmp = get_next_class(cmt);
2709    }
2710 
2711    if (tmp != nullptr)
2712    {
2713       out_txt.append(tmp->str);
2714 
2715       while ((tmp = chunk_get_next(tmp)) != nullptr)
2716       {
2717          if (tmp->type != CT_DC_MEMBER)
2718          {
2719             break;
2720          }
2721          tmp = chunk_get_next(tmp);
2722 
2723          if (tmp != nullptr)
2724          {
2725             out_txt.append("::");
2726             out_txt.append(tmp->str);
2727          }
2728       }
2729       return(true);
2730    }
2731    return(false);
2732 } // kw_fcn_class
2733 
2734 
kw_fcn_message(chunk_t * cmt,unc_text & out_txt)2735 static bool kw_fcn_message(chunk_t *cmt, unc_text &out_txt)
2736 {
2737    chunk_t *fcn = get_next_function(cmt);
2738 
2739    if (!fcn)
2740    {
2741       return(false);
2742    }
2743    out_txt.append(fcn->str);
2744 
2745    chunk_t *tmp  = chunk_get_next_ncnnl(fcn);
2746    chunk_t *word = nullptr;
2747 
2748    while (tmp != nullptr)
2749    {
2750       if (  chunk_is_token(tmp, CT_BRACE_OPEN)
2751          || chunk_is_token(tmp, CT_SEMICOLON))
2752       {
2753          break;
2754       }
2755 
2756       if (chunk_is_token(tmp, CT_OC_COLON))
2757       {
2758          if (word != nullptr)
2759          {
2760             out_txt.append(word->str);
2761             word = nullptr;
2762          }
2763          out_txt.append(":");
2764       }
2765 
2766       if (chunk_is_token(tmp, CT_WORD))
2767       {
2768          word = tmp;
2769       }
2770       tmp = chunk_get_next_ncnnl(tmp);
2771    }
2772    return(true);
2773 } // kw_fcn_message
2774 
2775 
kw_fcn_category(chunk_t * cmt,unc_text & out_txt)2776 static bool kw_fcn_category(chunk_t *cmt, unc_text &out_txt)
2777 {
2778    chunk_t *category = get_prev_category(cmt);
2779 
2780    if (category)
2781    {
2782       out_txt.append('(');
2783       out_txt.append(category->str);
2784       out_txt.append(')');
2785    }
2786    return(true);
2787 } // kw_fcn_category
2788 
2789 
kw_fcn_scope(chunk_t * cmt,unc_text & out_txt)2790 static bool kw_fcn_scope(chunk_t *cmt, unc_text &out_txt)
2791 {
2792    chunk_t *scope = get_next_scope(cmt);
2793 
2794    if (scope)
2795    {
2796       out_txt.append(scope->str);
2797       return(true);
2798    }
2799    return(false);
2800 } // kw_fcn_scope
2801 
2802 
kw_fcn_function(chunk_t * cmt,unc_text & out_txt)2803 static bool kw_fcn_function(chunk_t *cmt, unc_text &out_txt)
2804 {
2805    chunk_t *fcn = get_next_function(cmt);
2806 
2807    if (fcn)
2808    {
2809       if (get_chunk_parent_type(fcn) == CT_OPERATOR)
2810       {
2811          out_txt.append("operator ");
2812       }
2813 
2814       if (  fcn->prev != nullptr
2815          && fcn->prev->type == CT_DESTRUCTOR)
2816       {
2817          out_txt.append('~');
2818       }
2819       out_txt.append(fcn->str);
2820       return(true);
2821    }
2822    return(false);
2823 }
2824 
2825 
kw_fcn_javaparam(chunk_t * cmt,unc_text & out_txt)2826 static bool kw_fcn_javaparam(chunk_t *cmt, unc_text &out_txt)
2827 {
2828    chunk_t *fcn = get_next_function(cmt);
2829 
2830    if (!fcn)
2831    {
2832       return(false);
2833    }
2834    chunk_t *fpo;
2835    chunk_t *fpc;
2836    bool    has_param = true;
2837    bool    need_nl   = false;
2838 
2839    if (chunk_is_token(fcn, CT_OC_MSG_DECL))
2840    {
2841       chunk_t *tmp = chunk_get_next_ncnnl(fcn);
2842       has_param = false;
2843 
2844       while (tmp != nullptr)
2845       {
2846          if (  chunk_is_token(tmp, CT_BRACE_OPEN)
2847             || chunk_is_token(tmp, CT_SEMICOLON))
2848          {
2849             break;
2850          }
2851 
2852          if (has_param)
2853          {
2854             if (need_nl)
2855             {
2856                out_txt.append("\n");
2857             }
2858             need_nl = true;
2859             out_txt.append("@param");
2860             out_txt.append(" ");
2861             out_txt.append(tmp->str);
2862             out_txt.append(" TODO");
2863          }
2864          has_param = false;
2865 
2866          if (chunk_is_token(tmp, CT_PAREN_CLOSE))
2867          {
2868             has_param = true;
2869          }
2870          tmp = chunk_get_next_ncnnl(tmp);
2871       }
2872       fpo = fpc = nullptr;
2873    }
2874    else
2875    {
2876       fpo = chunk_get_next_type(fcn, CT_FPAREN_OPEN, fcn->level);
2877 
2878       if (fpo == nullptr)
2879       {
2880          return(true);
2881       }
2882       fpc = chunk_get_next_type(fpo, CT_FPAREN_CLOSE, fcn->level);
2883 
2884       if (fpc == nullptr)
2885       {
2886          return(true);
2887       }
2888    }
2889    chunk_t *tmp;
2890 
2891    // Check for 'foo()' and 'foo(void)'
2892    if (chunk_get_next_ncnnl(fpo) == fpc)
2893    {
2894       has_param = false;
2895    }
2896    else
2897    {
2898       tmp = chunk_get_next_ncnnl(fpo);
2899 
2900       if (  (tmp == chunk_get_prev_ncnnl(fpc))
2901          && chunk_is_str(tmp, "void", 4))
2902       {
2903          has_param = false;
2904       }
2905    }
2906 
2907    if (has_param)
2908    {
2909       chunk_t *prev = nullptr;
2910       tmp = fpo;
2911 
2912       while ((tmp = chunk_get_next(tmp)) != nullptr)
2913       {
2914          if (  chunk_is_token(tmp, CT_COMMA)
2915             || tmp == fpc)
2916          {
2917             if (need_nl)
2918             {
2919                out_txt.append("\n");
2920             }
2921             need_nl = true;
2922             out_txt.append("@param");
2923 
2924             if (prev != nullptr)
2925             {
2926                out_txt.append(" ");
2927                out_txt.append(prev->str);
2928                out_txt.append(" TODO");
2929             }
2930             prev = nullptr;
2931 
2932             if (tmp == fpc)
2933             {
2934                break;
2935             }
2936          }
2937 
2938          if (chunk_is_token(tmp, CT_WORD))
2939          {
2940             prev = tmp;
2941          }
2942       }
2943    }
2944    // Do the return stuff
2945    tmp = chunk_get_prev_ncnnl(fcn);
2946 
2947    // For Objective-C we need to go to the previous chunk
2948    if (  tmp != nullptr
2949       && get_chunk_parent_type(tmp) == CT_OC_MSG_DECL
2950       && chunk_is_token(tmp, CT_PAREN_CLOSE))
2951    {
2952       tmp = chunk_get_prev_ncnnl(tmp);
2953    }
2954 
2955    if (  tmp != nullptr
2956       && !chunk_is_str(tmp, "void", 4))
2957    {
2958       if (need_nl)
2959       {
2960          out_txt.append("\n");
2961       }
2962       out_txt.append("@return TODO");
2963    }
2964    return(true);
2965 } // kw_fcn_javaparam
2966 
2967 
kw_fcn_fclass(chunk_t * cmt,unc_text & out_txt)2968 static bool kw_fcn_fclass(chunk_t *cmt, unc_text &out_txt)
2969 {
2970    chunk_t *fcn = get_next_function(cmt);
2971 
2972    if (!fcn)
2973    {
2974       return(false);
2975    }
2976 
2977    if (fcn->flags.test(PCF_IN_CLASS))
2978    {
2979       // if inside a class, we need to find to the class name
2980       chunk_t *tmp = chunk_get_prev_type(fcn, CT_BRACE_OPEN, fcn->level - 1);
2981       tmp = chunk_get_prev_type(tmp, CT_CLASS, tmp->level);
2982       tmp = chunk_get_next_ncnnl(tmp);
2983 
2984       while (chunk_is_token(chunk_get_next_ncnnl(tmp), CT_DC_MEMBER))
2985       {
2986          tmp = chunk_get_next_ncnnl(tmp);
2987          tmp = chunk_get_next_ncnnl(tmp);
2988       }
2989 
2990       if (tmp != nullptr)
2991       {
2992          out_txt.append(tmp->str);
2993          return(true);
2994       }
2995    }
2996    else
2997    {
2998       // if outside a class, we expect "CLASS::METHOD(...)"
2999       chunk_t *tmp = chunk_get_prev_ncnnl(fcn);
3000 
3001       if (chunk_is_token(tmp, CT_OPERATOR))
3002       {
3003          tmp = chunk_get_prev_ncnnl(tmp);
3004       }
3005 
3006       if (  tmp != nullptr
3007          && (  chunk_is_token(tmp, CT_DC_MEMBER)
3008             || chunk_is_token(tmp, CT_MEMBER)))
3009       {
3010          tmp = chunk_get_prev_ncnnl(tmp);
3011          out_txt.append(tmp->str);
3012          return(true);
3013       }
3014    }
3015    return(false);
3016 } // kw_fcn_fclass
3017 
3018 
kw_fcn_year(chunk_t * cmt,unc_text & out_txt)3019 static bool kw_fcn_year(chunk_t *cmt, unc_text &out_txt)
3020 {
3021    UNUSED(cmt);
3022    time_t now = time(nullptr);
3023 
3024    out_txt.append(std::to_string(1900 + localtime(&now)->tm_year));
3025    return(true);
3026 }
3027 
3028 
3029 struct kw_subst_t
3030 {
3031    const char *tag;
3032    bool       (*func)(chunk_t *cmt, unc_text &out_txt);
3033 };
3034 
3035 
3036 static const kw_subst_t kw_subst_table[] =
3037 {
3038    { "$(filename)",  kw_fcn_filename  },
3039    { "$(class)",     kw_fcn_class     },
3040    { "$(message)",   kw_fcn_message   },
3041    { "$(category)",  kw_fcn_category  },
3042    { "$(scope)",     kw_fcn_scope     },
3043    { "$(function)",  kw_fcn_function  },
3044    { "$(javaparam)", kw_fcn_javaparam },
3045    { "$(fclass)",    kw_fcn_fclass    },
3046    { "$(year)",      kw_fcn_year      },
3047 };
3048 
3049 
do_kw_subst(chunk_t * pc)3050 static void do_kw_subst(chunk_t *pc)
3051 {
3052    for (const auto &kw : kw_subst_table)
3053    {
3054       int idx = pc->str.find(kw.tag);
3055 
3056       if (idx < 0)
3057       {
3058          continue;
3059       }
3060       unc_text tmp_txt;
3061       tmp_txt.clear();
3062 
3063       if (kw.func(pc, tmp_txt))
3064       {
3065          // if the replacement contains '\n' we need to fix the lead
3066          if (tmp_txt.find("\n") >= 0)
3067          {
3068             size_t nl_idx = pc->str.rfind("\n", idx);
3069 
3070             if (nl_idx > 0)
3071             {
3072                // idx and nl_idx are both positive
3073                unc_text nl_txt;
3074                nl_txt.append("\n");
3075                nl_idx++;
3076 
3077                while (  (nl_idx < static_cast<size_t>(idx))
3078                      && !unc_isalnum(pc->str[nl_idx]))
3079                {
3080                   nl_txt.append(pc->str[nl_idx++]);
3081                }
3082                tmp_txt.replace("\n", nl_txt);
3083             }
3084          }
3085          pc->str.replace(kw.tag, tmp_txt);
3086       }
3087    }
3088 } // do_kw_subst
3089 
3090 
output_comment_multi_simple(chunk_t * pc)3091 static void output_comment_multi_simple(chunk_t *pc)
3092 {
3093    if (pc == nullptr)
3094    {
3095       return;
3096    }
3097    cmt_reflow cmt;
3098 
3099    LOG_FMT(LCONTTEXT, "%s(%d): text() is '%s', type is %s, orig_col is %zu, column is %zu\n",
3100            __func__, __LINE__, pc->text(), get_token_name(pc->type), pc->orig_col, pc->column);
3101 
3102    output_cmt_start(cmt, pc);
3103 
3104    // The multiline comment is saved inside one chunk. If the comment is
3105    // shifted all lines of the comment need to be shifted by the same amount.
3106    // Save the difference of initial and current position to apply it on every
3107    // line_column
3108    const int col_diff = [pc]()
3109    {
3110       int diff = 0;
3111 
3112       if (chunk_is_newline(chunk_get_prev(pc)))
3113       {
3114          // The comment should be indented correctly
3115          diff = pc->column - pc->orig_col;
3116       }
3117       return(diff);
3118    }();
3119 
3120    /**
3121     * check for enable/disable processing comment strings that may
3122     * both be embedded within the same multi-line comment
3123     */
3124    auto     disable_processing_cmt_idx = find_disable_processing_comment_marker(pc->str);
3125    auto     enable_processing_cmt_idx  = find_enable_processing_comment_marker(pc->str);
3126 
3127    unc_text line;
3128    size_t   line_count  = 0;
3129    size_t   line_column = pc->column;
3130    size_t   cmt_idx     = 0;
3131 
3132    while (cmt_idx < pc->len())
3133    {
3134       int ch = pc->str[cmt_idx];
3135       cmt_idx++;
3136 
3137       if (  cmt_idx > std::size_t(disable_processing_cmt_idx)
3138          && enable_processing_cmt_idx > disable_processing_cmt_idx)
3139       {
3140          auto     length = enable_processing_cmt_idx - disable_processing_cmt_idx;
3141          unc_text verbatim_text(pc->str,
3142                                 disable_processing_cmt_idx,
3143                                 length);
3144 
3145          add_text(verbatim_text);
3146 
3147          cmt_idx = enable_processing_cmt_idx;
3148 
3149          /**
3150           * check for additional enable/disable processing comment strings that may
3151           * both be embedded within the same multi-line comment
3152           */
3153          disable_processing_cmt_idx = find_disable_processing_comment_marker(pc->str,
3154                                                                              enable_processing_cmt_idx);
3155          enable_processing_cmt_idx = find_enable_processing_comment_marker(pc->str,
3156                                                                            enable_processing_cmt_idx);
3157 
3158          line.clear();
3159 
3160          continue;
3161       }
3162       // 1: step through leading tabs and spaces to find the start column
3163       log_rule_B("cmt_convert_tab_to_spaces");
3164 
3165       if (  line.size() == 0
3166          && (  line_column < cmt.base_col
3167             || options::cmt_convert_tab_to_spaces()))
3168       {
3169          if (ch == ' ')
3170          {
3171             line_column++;
3172             continue;
3173          }
3174          else if (ch == '\t')
3175          {
3176             log_rule_B("input_tab_size");
3177             line_column = calc_next_tab_column(line_column, options::input_tab_size());
3178             continue;
3179          }
3180          else
3181          {
3182             LOG_FMT(LCONTTEXT, "%s(%d):ch is %d, %c\n", __func__, __LINE__, ch, char(ch));
3183          }
3184       }
3185 
3186       // 2: add chars to line, handle the CRLF and CR endings (convert both to LF)
3187       if (ch == '\r')
3188       {
3189          ch = '\n';
3190 
3191          if (  (cmt_idx < pc->len())
3192             && (pc->str[cmt_idx] == '\n'))
3193          {
3194             cmt_idx++;
3195          }
3196       }
3197       LOG_FMT(LCONTTEXT, "%s(%d):Line is %s\n", __func__, __LINE__, line.c_str());
3198       line.append(ch);
3199       LOG_FMT(LCONTTEXT, "%s(%d):Line is %s\n", __func__, __LINE__, line.c_str());
3200 
3201       // If we just hit an end of line OR we just hit end-of-comment...
3202       if (  ch == '\n'
3203          || cmt_idx == pc->len())
3204       {
3205          line_count++;
3206          LOG_FMT(LCONTTEXT, "%s(%d):line_count is %zu\n", __func__, __LINE__, line_count);
3207 
3208          // strip trailing tabs and spaces before the newline
3209          if (ch == '\n')
3210          {
3211             line.pop_back();
3212 
3213             // Say we aren't in a preproc to prevent changing any bs-nl
3214             cmt_trim_whitespace(line, false);
3215 
3216             line.append('\n');
3217          }
3218 
3219          if (line.size() > 0)
3220          {
3221             // unless line contains only a single newline char, indent if the
3222             // line consists of either:
3223             if (  line.size() > 1 // more than a single newline char or
3224                || ch != '\n')     // (end-of-comment) a single non newline char
3225             {
3226                if (line_count > 1)
3227                {
3228                   // apply comment column shift without underflowing
3229                   line_column = (  col_diff < 0
3230                                 && (cast_abs(line_column, col_diff) > line_column))
3231                                 ? 0 : line_column + col_diff;
3232                }
3233                cmt.column = line_column;
3234                cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column);
3235             }
3236             add_text(line);
3237 
3238             line.clear();
3239          }
3240          line_column = 1;
3241       }
3242    }
3243 } // output_comment_multi_simple
3244 
3245 
generate_if_conditional_as_text(unc_text & dst,chunk_t * ifdef)3246 static void generate_if_conditional_as_text(unc_text &dst, chunk_t *ifdef)
3247 {
3248    int column = -1;
3249 
3250    dst.clear();
3251 
3252    for (chunk_t *pc = ifdef; pc != nullptr; pc = chunk_get_next(pc))
3253    {
3254       if (column == -1)
3255       {
3256          column = pc->column;
3257       }
3258 
3259       if (  chunk_is_token(pc, CT_NEWLINE)
3260          || chunk_is_token(pc, CT_COMMENT_MULTI)
3261          || chunk_is_token(pc, CT_COMMENT_CPP))
3262       {
3263          break;
3264       }
3265       else if (chunk_is_token(pc, CT_NL_CONT))
3266       {
3267          dst   += ' ';
3268          column = -1;
3269       }
3270       else if (  chunk_is_token(pc, CT_COMMENT)
3271               || chunk_is_token(pc, CT_COMMENT_EMBED))
3272       {
3273       }
3274       else // if (chunk_is_token(pc, CT_JUNK)) || else
3275       {
3276          for (int spacing = pc->column - column; spacing > 0; spacing--)
3277          {
3278             dst += ' ';
3279             column++;
3280          }
3281 
3282          dst.append(pc->str);
3283          column += pc->len();
3284       }
3285    }
3286 } // generate_if_conditional_as_text
3287 
3288 
add_long_preprocessor_conditional_block_comment(void)3289 void add_long_preprocessor_conditional_block_comment(void)
3290 {
3291    chunk_t *pp_start = nullptr;
3292    chunk_t *pp_end   = nullptr;
3293 
3294    for (chunk_t *pc = chunk_get_head(); pc; pc = chunk_get_next_ncnnl(pc))
3295    {
3296       // just track the preproc level:
3297       if (chunk_is_token(pc, CT_PREPROC))
3298       {
3299          pp_end = pp_start = pc;
3300       }
3301 
3302       if (  pc->type != CT_PP_IF
3303          || !pp_start)
3304       {
3305          continue;
3306       }
3307 #if 0
3308       if (pc->flags.test(PCF_IN_PREPROC))
3309       {
3310          continue;
3311       }
3312 #endif
3313 
3314       chunk_t *br_close;
3315       chunk_t *br_open = pc;
3316       size_t  nl_count = 0;
3317 
3318       chunk_t *tmp = pc;
3319 
3320       while ((tmp = chunk_get_next(tmp)) != nullptr)
3321       {
3322          // just track the preproc level:
3323          if (chunk_is_token(tmp, CT_PREPROC))
3324          {
3325             pp_end = tmp;
3326          }
3327 
3328          if (chunk_is_newline(tmp))
3329          {
3330             nl_count += tmp->nl_count;
3331          }
3332          else if (  pp_end->pp_level == pp_start->pp_level
3333                  && (  chunk_is_token(tmp, CT_PP_ENDIF)
3334                     || ((chunk_is_token(br_open, CT_PP_IF)) ? (chunk_is_token(tmp, CT_PP_ELSE)) : 0)))
3335          {
3336             br_close = tmp;
3337 
3338             LOG_FMT(LPPIF, "found #if / %s section on lines %zu and %zu, nl_count=%zu\n",
3339                     (chunk_is_token(tmp, CT_PP_ENDIF) ? "#endif" : "#else"),
3340                     br_open->orig_line, br_close->orig_line, nl_count);
3341 
3342             // Found the matching #else or #endif - make sure a newline is next
3343             tmp = chunk_get_next(tmp);
3344 
3345             LOG_FMT(LPPIF, "next item type %d (is %s)\n",
3346                     (tmp ? tmp->type : -1), (tmp ? chunk_is_newline(tmp) ? "newline"
3347                                              : chunk_is_comment(tmp) ? "comment" : "other" : "---"));
3348 
3349             if (  tmp == nullptr
3350                || chunk_is_token(tmp, CT_NEWLINE)) // chunk_is_newline(tmp))
3351             {
3352                size_t nl_min;
3353 
3354                if (chunk_is_token(br_close, CT_PP_ENDIF))
3355                {
3356                   log_rule_B("mod_add_long_ifdef_endif_comment");
3357                   nl_min = options::mod_add_long_ifdef_endif_comment();
3358                }
3359                else
3360                {
3361                   log_rule_B("mod_add_long_ifdef_else_comment");
3362                   nl_min = options::mod_add_long_ifdef_else_comment();
3363                }
3364                const char *txt = !tmp ? "EOF" : ((chunk_is_token(tmp, CT_PP_ENDIF)) ? "#endif" : "#else");
3365                LOG_FMT(LPPIF, "#if / %s section candidate for augmenting when over NL threshold %zu != 0 (nl_count=%zu)\n",
3366                        txt, nl_min, nl_count);
3367 
3368                if (  nl_min > 0
3369                   && nl_count > nl_min) // nl_count is 1 too large at all times as #if line was counted too
3370                {
3371                   // determine the added comment style
3372                   c_token_t style = (language_is_set(LANG_CPP)) ?
3373                                     CT_COMMENT_CPP : CT_COMMENT;
3374 
3375                   unc_text str;
3376                   generate_if_conditional_as_text(str, br_open);
3377 
3378                   LOG_FMT(LPPIF, "#if / %s section over threshold %zu (nl_count=%zu) --> insert comment after the %s: %s\n",
3379                           txt, nl_min, nl_count, txt, str.c_str());
3380 
3381                   // Add a comment after the close brace
3382                   insert_comment_after(br_close, style, str);
3383                }
3384             }
3385 
3386             // checks both the #else and #endif for a given level, only then look further in the main loop
3387             if (chunk_is_token(br_close, CT_PP_ENDIF))
3388             {
3389                break;
3390             }
3391          }
3392       }
3393    }
3394 } // add_long_preprocessor_conditional_block_comment
3395