1 /* GNU fmt -- simple text formatter.
2    Copyright (C) 1994-2020 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Written by Ross Paterson <rap@doc.ic.ac.uk>.  */
18 
19 #include <config.h>
20 #include <stdio.h>
21 #include <sys/types.h>
22 #include <getopt.h>
23 #include <assert.h>
24 
25 /* Redefine.  Otherwise, systems (Unicos for one) with headers that define
26    it to be a type get syntax errors for the variable declaration below.  */
27 #define word unused_word_type
28 
29 #include "system.h"
30 #include "error.h"
31 #include "fadvise.h"
32 #include "xdectoint.h"
33 
34 /* The official name of this program (e.g., no 'g' prefix).  */
35 #define PROGRAM_NAME "fmt"
36 
37 #define AUTHORS proper_name ("Ross Paterson")
38 
39 /* The following parameters represent the program's idea of what is
40    "best".  Adjust to taste, subject to the caveats given.  */
41 
42 /* Default longest permitted line length (max_width).  */
43 #define WIDTH	75
44 
45 /* Prefer lines to be LEEWAY % shorter than the maximum width, giving
46    room for optimization.  */
47 #define LEEWAY	7
48 
49 /* The default secondary indent of tagged paragraph used for unindented
50    one-line paragraphs not preceded by any multi-line paragraphs.  */
51 #define DEF_INDENT 3
52 
53 /* Costs and bonuses are expressed as the equivalent departure from the
54    optimal line length, multiplied by 10.  e.g. assigning something a
55    cost of 50 means that it is as bad as a line 5 characters too short
56    or too long.  The definition of SHORT_COST(n) should not be changed.
57    However, EQUIV(n) may need tuning.  */
58 
59 /* FIXME: "fmt" misbehaves given large inputs or options.  One
60    possible workaround for part of the problem is to change COST to be
61    a floating-point type.  There are other problems besides COST,
62    though; see MAXWORDS below.  */
63 
64 typedef long int COST;
65 
66 #define MAXCOST	TYPE_MAXIMUM (COST)
67 
68 #define SQR(n)		((n) * (n))
69 #define EQUIV(n)	SQR ((COST) (n))
70 
71 /* Cost of a filled line n chars longer or shorter than goal_width.  */
72 #define SHORT_COST(n)	EQUIV ((n) * 10)
73 
74 /* Cost of the difference between adjacent filled lines.  */
75 #define RAGGED_COST(n)	(SHORT_COST (n) / 2)
76 
77 /* Basic cost per line.  */
78 #define LINE_COST	EQUIV (70)
79 
80 /* Cost of breaking a line after the first word of a sentence, where
81    the length of the word is N.  */
82 #define WIDOW_COST(n)	(EQUIV (200) / ((n) + 2))
83 
84 /* Cost of breaking a line before the last word of a sentence, where
85    the length of the word is N.  */
86 #define ORPHAN_COST(n)	(EQUIV (150) / ((n) + 2))
87 
88 /* Bonus for breaking a line at the end of a sentence.  */
89 #define SENTENCE_BONUS	EQUIV (50)
90 
91 /* Cost of breaking a line after a period not marking end of a sentence.
92    With the definition of sentence we are using (borrowed from emacs, see
93    get_line()) such a break would then look like a sentence break.  Hence
94    we assign a very high cost -- it should be avoided unless things are
95    really bad.  */
96 #define NOBREAK_COST	EQUIV (600)
97 
98 /* Bonus for breaking a line before open parenthesis.  */
99 #define PAREN_BONUS	EQUIV (40)
100 
101 /* Bonus for breaking a line after other punctuation.  */
102 #define PUNCT_BONUS	EQUIV(40)
103 
104 /* Credit for breaking a long paragraph one line later.  */
105 #define LINE_CREDIT	EQUIV(3)
106 
107 /* Size of paragraph buffer, in words and characters.  Longer paragraphs
108    are handled neatly (cf. flush_paragraph()), so long as these values
109    are considerably greater than required by the width.  These values
110    cannot be extended indefinitely: doing so would run into size limits
111    and/or cause more overflows in cost calculations.  FIXME: Remove these
112    arbitrary limits.  */
113 
114 #define MAXWORDS	1000
115 #define MAXCHARS	5000
116 
117 /* Extra ctype(3)-style macros.  */
118 
119 #define isopen(c)	(strchr ("(['`\"", c) != NULL)
120 #define isclose(c)	(strchr (")]'\"", c) != NULL)
121 #define isperiod(c)	(strchr (".?!", c) != NULL)
122 
123 /* Size of a tab stop, for expansion on input and re-introduction on
124    output.  */
125 #define TABWIDTH	8
126 
127 /* Word descriptor structure.  */
128 
129 typedef struct Word WORD;
130 
131 struct Word
132   {
133 
134     /* Static attributes determined during input.  */
135 
136     const char *text;		/* the text of the word */
137     int length;			/* length of this word */
138     int space;			/* the size of the following space */
139     unsigned int paren:1;	/* starts with open paren */
140     unsigned int period:1;	/* ends in [.?!])* */
141     unsigned int punct:1;	/* ends in punctuation */
142     unsigned int final:1;	/* end of sentence */
143 
144     /* The remaining fields are computed during the optimization.  */
145 
146     int line_length;		/* length of the best line starting here */
147     COST best_cost;		/* cost of best paragraph starting here */
148     WORD *next_break;		/* break which achieves best_cost */
149   };
150 
151 /* Forward declarations.  */
152 
153 static void set_prefix (char *p);
154 static void fmt (FILE *f);
155 static bool get_paragraph (FILE *f);
156 static int get_line (FILE *f, int c);
157 static int get_prefix (FILE *f);
158 static int get_space (FILE *f, int c);
159 static int copy_rest (FILE *f, int c);
160 static bool same_para (int c);
161 static void flush_paragraph (void);
162 static void fmt_paragraph (void);
163 static void check_punctuation (WORD *w);
164 static COST base_cost (WORD *this);
165 static COST line_cost (WORD *next, int len);
166 static void put_paragraph (WORD *finish);
167 static void put_line (WORD *w, int indent);
168 static void put_word (WORD *w);
169 static void put_space (int space);
170 
171 /* Option values.  */
172 
173 /* If true, first 2 lines may have different indent (default false).  */
174 static bool crown;
175 
176 /* If true, first 2 lines _must_ have different indent (default false).  */
177 static bool tagged;
178 
179 /* If true, each line is a paragraph on its own (default false).  */
180 static bool split;
181 
182 /* If true, don't preserve inter-word spacing (default false).  */
183 static bool uniform;
184 
185 /* Prefix minus leading and trailing spaces (default "").  */
186 static const char *prefix;
187 
188 /* User-supplied maximum line width (default WIDTH).  The only output
189    lines longer than this will each comprise a single word.  */
190 static int max_width;
191 
192 /* Values derived from the option values.  */
193 
194 /* The length of prefix minus leading space.  */
195 static int prefix_full_length;
196 
197 /* The length of the leading space trimmed from the prefix.  */
198 static int prefix_lead_space;
199 
200 /* The length of prefix minus leading and trailing space.  */
201 static int prefix_length;
202 
203 /* The preferred width of text lines, set to LEEWAY % less than max_width.  */
204 static int goal_width;
205 
206 /* Dynamic variables.  */
207 
208 /* Start column of the character most recently read from the input file.  */
209 static int in_column;
210 
211 /* Start column of the next character to be written to stdout.  */
212 static int out_column;
213 
214 /* Space for the paragraph text -- longer paragraphs are handled neatly
215    (cf. flush_paragraph()).  */
216 static char parabuf[MAXCHARS];
217 
218 /* A pointer into parabuf, indicating the first unused character position.  */
219 static char *wptr;
220 
221 /* The words of a paragraph -- longer paragraphs are handled neatly
222    (cf. flush_paragraph()).  */
223 static WORD word[MAXWORDS];
224 
225 /* A pointer into the above word array, indicating the first position
226    after the last complete word.  Sometimes it will point at an incomplete
227    word.  */
228 static WORD *word_limit;
229 
230 /* If true, current input file contains tab characters, and so tabs can be
231    used for white space on output.  */
232 static bool tabs;
233 
234 /* Space before trimmed prefix on each line of the current paragraph.  */
235 static int prefix_indent;
236 
237 /* Indentation of the first line of the current paragraph.  */
238 static int first_indent;
239 
240 /* Indentation of other lines of the current paragraph */
241 static int other_indent;
242 
243 /* To detect the end of a paragraph, we need to look ahead to the first
244    non-blank character after the prefix on the next line, or the first
245    character on the following line that failed to match the prefix.
246    We can reconstruct the lookahead from that character (next_char), its
247    position on the line (in_column) and the amount of space before the
248    prefix (next_prefix_indent).  See get_paragraph() and copy_rest().  */
249 
250 /* The last character read from the input file.  */
251 static int next_char;
252 
253 /* The space before the trimmed prefix (or part of it) on the next line
254    after the current paragraph.  */
255 static int next_prefix_indent;
256 
257 /* If nonzero, the length of the last line output in the current
258    paragraph, used to charge for raggedness at the split point for long
259    paragraphs chosen by fmt_paragraph().  */
260 static int last_line_length;
261 
262 void
usage(int status)263 usage (int status)
264 {
265   if (status != EXIT_SUCCESS)
266     emit_try_help ();
267   else
268     {
269       printf (_("Usage: %s [-WIDTH] [OPTION]... [FILE]...\n"), program_name);
270       fputs (_("\
271 Reformat each paragraph in the FILE(s), writing to standard output.\n\
272 The option -WIDTH is an abbreviated form of --width=DIGITS.\n\
273 "), stdout);
274 
275       emit_stdin_note ();
276       emit_mandatory_arg_note ();
277 
278       fputs (_("\
279   -c, --crown-margin        preserve indentation of first two lines\n\
280   -p, --prefix=STRING       reformat only lines beginning with STRING,\n\
281                               reattaching the prefix to reformatted lines\n\
282   -s, --split-only          split long lines, but do not refill\n\
283 "),
284              stdout);
285       /* Tell xgettext that the "% o" below is not a printf-style
286          format string:  xgettext:no-c-format */
287       fputs (_("\
288   -t, --tagged-paragraph    indentation of first line different from second\n\
289   -u, --uniform-spacing     one space between words, two after sentences\n\
290   -w, --width=WIDTH         maximum line width (default of 75 columns)\n\
291   -g, --goal=WIDTH          goal width (default of 93% of width)\n\
292 "), stdout);
293       fputs (HELP_OPTION_DESCRIPTION, stdout);
294       fputs (VERSION_OPTION_DESCRIPTION, stdout);
295       emit_ancillary_info (PROGRAM_NAME);
296     }
297   exit (status);
298 }
299 
300 /* Decode options and launch execution.  */
301 
302 static struct option const long_options[] =
303 {
304   {"crown-margin", no_argument, NULL, 'c'},
305   {"prefix", required_argument, NULL, 'p'},
306   {"split-only", no_argument, NULL, 's'},
307   {"tagged-paragraph", no_argument, NULL, 't'},
308   {"uniform-spacing", no_argument, NULL, 'u'},
309   {"width", required_argument, NULL, 'w'},
310   {"goal", required_argument, NULL, 'g'},
311   {GETOPT_HELP_OPTION_DECL},
312   {GETOPT_VERSION_OPTION_DECL},
313   {NULL, 0, NULL, 0},
314 };
315 
316 int
main(int argc,char ** argv)317 main (int argc, char **argv)
318 {
319   int optchar;
320   bool ok = true;
321   char const *max_width_option = NULL;
322   char const *goal_width_option = NULL;
323 
324   initialize_main (&argc, &argv);
325   set_program_name (argv[0]);
326   setlocale (LC_ALL, "");
327   bindtextdomain (PACKAGE, LOCALEDIR);
328   textdomain (PACKAGE);
329 
330   atexit (close_stdout);
331 
332   crown = tagged = split = uniform = false;
333   max_width = WIDTH;
334   prefix = "";
335   prefix_length = prefix_lead_space = prefix_full_length = 0;
336 
337   if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
338     {
339       /* Old option syntax; a dash followed by one or more digits.  */
340       max_width_option = argv[1] + 1;
341 
342       /* Make the option we just parsed invisible to getopt.  */
343       argv[1] = argv[0];
344       argv++;
345       argc--;
346     }
347 
348   while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:g:",
349                                  long_options, NULL))
350          != -1)
351     switch (optchar)
352       {
353       default:
354         if (ISDIGIT (optchar))
355           error (0, 0, _("invalid option -- %c; -WIDTH is recognized\
356  only when it is the first\noption; use -w N instead"),
357                  optchar);
358         usage (EXIT_FAILURE);
359 
360       case 'c':
361         crown = true;
362         break;
363 
364       case 's':
365         split = true;
366         break;
367 
368       case 't':
369         tagged = true;
370         break;
371 
372       case 'u':
373         uniform = true;
374         break;
375 
376       case 'w':
377         max_width_option = optarg;
378         break;
379 
380       case 'g':
381         goal_width_option = optarg;
382         break;
383 
384       case 'p':
385         set_prefix (optarg);
386         break;
387 
388       case_GETOPT_HELP_CHAR;
389 
390       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
391 
392       }
393 
394   if (max_width_option)
395     {
396       /* Limit max_width to MAXCHARS / 2; otherwise, the resulting
397          output can be quite ugly.  */
398       max_width = xdectoumax (max_width_option, 0, MAXCHARS / 2, "",
399                               _("invalid width"), 0);
400     }
401 
402   if (goal_width_option)
403     {
404       /* Limit goal_width to max_width.  */
405       goal_width = xdectoumax (goal_width_option, 0, max_width, "",
406                                _("invalid width"), 0);
407       if (max_width_option == NULL)
408         max_width = goal_width + 10;
409     }
410   else
411     {
412       goal_width = max_width * (2 * (100 - LEEWAY) + 1) / 200;
413     }
414 
415   if (optind == argc)
416     fmt (stdin);
417   else
418     {
419       for (; optind < argc; optind++)
420         {
421           char *file = argv[optind];
422           if (STREQ (file, "-"))
423             fmt (stdin);
424           else
425             {
426               FILE *in_stream;
427               in_stream = fopen (file, "r");
428               if (in_stream != NULL)
429                 {
430                   fmt (in_stream);
431                   if (fclose (in_stream) == EOF)
432                     {
433                       error (0, errno, "%s", quotef (file));
434                       ok = false;
435                     }
436                 }
437               else
438                 {
439                   error (0, errno, _("cannot open %s for reading"),
440                          quoteaf (file));
441                   ok = false;
442                 }
443             }
444         }
445     }
446 
447   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
448 }
449 
450 /* Trim space from the front and back of the string P, yielding the prefix,
451    and record the lengths of the prefix and the space trimmed.  */
452 
453 static void
set_prefix(char * p)454 set_prefix (char *p)
455 {
456   char *s;
457 
458   prefix_lead_space = 0;
459   while (*p == ' ')
460     {
461       prefix_lead_space++;
462       p++;
463     }
464   prefix = p;
465   prefix_full_length = strlen (p);
466   s = p + prefix_full_length;
467   while (s > p && s[-1] == ' ')
468     s--;
469   *s = '\0';
470   prefix_length = s - p;
471 }
472 
473 /* read file F and send formatted output to stdout.  */
474 
475 static void
fmt(FILE * f)476 fmt (FILE *f)
477 {
478   fadvise (f, FADVISE_SEQUENTIAL);
479   tabs = false;
480   other_indent = 0;
481   next_char = get_prefix (f);
482   while (get_paragraph (f))
483     {
484       fmt_paragraph ();
485       put_paragraph (word_limit);
486     }
487 }
488 
489 /* Set the global variable 'other_indent' according to SAME_PARAGRAPH
490    and other global variables.  */
491 
492 static void
set_other_indent(bool same_paragraph)493 set_other_indent (bool same_paragraph)
494 {
495   if (split)
496     other_indent = first_indent;
497   else if (crown)
498     {
499       other_indent = (same_paragraph ? in_column : first_indent);
500     }
501   else if (tagged)
502     {
503       if (same_paragraph && in_column != first_indent)
504         {
505           other_indent = in_column;
506         }
507 
508       /* Only one line: use the secondary indent from last time if it
509          splits, or 0 if there have been no multi-line paragraphs in the
510          input so far.  But if these rules make the two indents the same,
511          pick a new secondary indent.  */
512 
513       else if (other_indent == first_indent)
514         other_indent = first_indent == 0 ? DEF_INDENT : 0;
515     }
516   else
517     {
518       other_indent = first_indent;
519     }
520 }
521 
522 /* Read a paragraph from input file F.  A paragraph consists of a
523    maximal number of non-blank (excluding any prefix) lines subject to:
524    * In split mode, a paragraph is a single non-blank line.
525    * In crown mode, the second and subsequent lines must have the
526    same indentation, but possibly different from the indent of the
527    first line.
528    * Tagged mode is similar, but the first and second lines must have
529    different indentations.
530    * Otherwise, all lines of a paragraph must have the same indent.
531    If a prefix is in effect, it must be present at the same indent for
532    each line in the paragraph.
533 
534    Return false if end-of-file was encountered before the start of a
535    paragraph, else true.  */
536 
537 static bool
get_paragraph(FILE * f)538 get_paragraph (FILE *f)
539 {
540   int c;
541 
542   last_line_length = 0;
543   c = next_char;
544 
545   /* Scan (and copy) blank lines, and lines not introduced by the prefix.  */
546 
547   while (c == '\n' || c == EOF
548          || next_prefix_indent < prefix_lead_space
549          || in_column < next_prefix_indent + prefix_full_length)
550     {
551       c = copy_rest (f, c);
552       if (c == EOF)
553         {
554           next_char = EOF;
555           return false;
556         }
557       putchar ('\n');
558       c = get_prefix (f);
559     }
560 
561   /* Got a suitable first line for a paragraph.  */
562 
563   prefix_indent = next_prefix_indent;
564   first_indent = in_column;
565   wptr = parabuf;
566   word_limit = word;
567   c = get_line (f, c);
568   set_other_indent (same_para (c));
569 
570   /* Read rest of paragraph (unless split is specified).  */
571 
572   if (split)
573     {
574       /* empty */
575     }
576   else if (crown)
577     {
578       if (same_para (c))
579         {
580           do
581             {			/* for each line till the end of the para */
582               c = get_line (f, c);
583             }
584           while (same_para (c) && in_column == other_indent);
585         }
586     }
587   else if (tagged)
588     {
589       if (same_para (c) && in_column != first_indent)
590         {
591           do
592             {			/* for each line till the end of the para */
593               c = get_line (f, c);
594             }
595           while (same_para (c) && in_column == other_indent);
596         }
597     }
598   else
599     {
600       while (same_para (c) && in_column == other_indent)
601         c = get_line (f, c);
602     }
603 
604   /* Tell static analysis tools that using word_limit[-1] is ok.
605      word_limit is guaranteed to have been incremented by get_line.  */
606   assert (word < word_limit);
607 
608   (word_limit - 1)->period = (word_limit - 1)->final = true;
609   next_char = c;
610   return true;
611 }
612 
613 /* Copy to the output a line that failed to match the prefix, or that
614    was blank after the prefix.  In the former case, C is the character
615    that failed to match the prefix.  In the latter, C is \n or EOF.
616    Return the character (\n or EOF) ending the line.  */
617 
618 static int
copy_rest(FILE * f,int c)619 copy_rest (FILE *f, int c)
620 {
621   const char *s;
622 
623   out_column = 0;
624   if (in_column > next_prefix_indent || (c != '\n' && c != EOF))
625     {
626       put_space (next_prefix_indent);
627       for (s = prefix; out_column != in_column && *s; out_column++)
628         putchar (*s++);
629       if (c != EOF && c != '\n')
630         put_space (in_column - out_column);
631       if (c == EOF && in_column >= next_prefix_indent + prefix_length)
632         putchar ('\n');
633     }
634   while (c != '\n' && c != EOF)
635     {
636       putchar (c);
637       c = getc (f);
638     }
639   return c;
640 }
641 
642 /* Return true if a line whose first non-blank character after the
643    prefix (if any) is C could belong to the current paragraph,
644    otherwise false.  */
645 
646 static bool
same_para(int c)647 same_para (int c)
648 {
649   return (next_prefix_indent == prefix_indent
650           && in_column >= next_prefix_indent + prefix_full_length
651           && c != '\n' && c != EOF);
652 }
653 
654 /* Read a line from input file F, given first non-blank character C
655    after the prefix, and the following indent, and break it into words.
656    A word is a maximal non-empty string of non-white characters.  A word
657    ending in [.?!]["')\]]* and followed by end-of-line or at least two
658    spaces ends a sentence, as in emacs.
659 
660    Return the first non-blank character of the next line.  */
661 
662 static int
get_line(FILE * f,int c)663 get_line (FILE *f, int c)
664 {
665   int start;
666   char *end_of_parabuf;
667   WORD *end_of_word;
668 
669   end_of_parabuf = &parabuf[MAXCHARS];
670   end_of_word = &word[MAXWORDS - 2];
671 
672   do
673     {				/* for each word in a line */
674 
675       /* Scan word.  */
676 
677       word_limit->text = wptr;
678       do
679         {
680           if (wptr == end_of_parabuf)
681             {
682               set_other_indent (true);
683               flush_paragraph ();
684             }
685           *wptr++ = c;
686           c = getc (f);
687         }
688       while (c != EOF && !isspace (c));
689       in_column += word_limit->length = wptr - word_limit->text;
690       check_punctuation (word_limit);
691 
692       /* Scan inter-word space.  */
693 
694       start = in_column;
695       c = get_space (f, c);
696       word_limit->space = in_column - start;
697       word_limit->final = (c == EOF
698                            || (word_limit->period
699                                && (c == '\n' || word_limit->space > 1)));
700       if (c == '\n' || c == EOF || uniform)
701         word_limit->space = word_limit->final ? 2 : 1;
702       if (word_limit == end_of_word)
703         {
704           set_other_indent (true);
705           flush_paragraph ();
706         }
707       word_limit++;
708     }
709   while (c != '\n' && c != EOF);
710   return get_prefix (f);
711 }
712 
713 /* Read a prefix from input file F.  Return either first non-matching
714    character, or first non-blank character after the prefix.  */
715 
716 static int
get_prefix(FILE * f)717 get_prefix (FILE *f)
718 {
719   int c;
720 
721   in_column = 0;
722   c = get_space (f, getc (f));
723   if (prefix_length == 0)
724     next_prefix_indent = prefix_lead_space < in_column ?
725       prefix_lead_space : in_column;
726   else
727     {
728       const char *p;
729       next_prefix_indent = in_column;
730       for (p = prefix; *p != '\0'; p++)
731         {
732           unsigned char pc = *p;
733           if (c != pc)
734             return c;
735           in_column++;
736           c = getc (f);
737         }
738       c = get_space (f, c);
739     }
740   return c;
741 }
742 
743 /* Read blank characters from input file F, starting with C, and keeping
744    in_column up-to-date.  Return first non-blank character.  */
745 
746 static int
get_space(FILE * f,int c)747 get_space (FILE *f, int c)
748 {
749   while (true)
750     {
751       if (c == ' ')
752         in_column++;
753       else if (c == '\t')
754         {
755           tabs = true;
756           in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
757         }
758       else
759         return c;
760       c = getc (f);
761     }
762 }
763 
764 /* Set extra fields in word W describing any attached punctuation.  */
765 
766 static void
check_punctuation(WORD * w)767 check_punctuation (WORD *w)
768 {
769   char const *start = w->text;
770   char const *finish = start + (w->length - 1);
771   unsigned char fin = *finish;
772 
773   w->paren = isopen (*start);
774   w->punct = !! ispunct (fin);
775   while (start < finish && isclose (*finish))
776     finish--;
777   w->period = isperiod (*finish);
778 }
779 
780 /* Flush part of the paragraph to make room.  This function is called on
781    hitting the limit on the number of words or characters.  */
782 
783 static void
flush_paragraph(void)784 flush_paragraph (void)
785 {
786   WORD *split_point;
787   WORD *w;
788   int shift;
789   COST best_break;
790 
791   /* In the special case where it's all one word, just flush it.  */
792 
793   if (word_limit == word)
794     {
795       fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout);
796       wptr = parabuf;
797       return;
798     }
799 
800   /* Otherwise:
801      - format what you have so far as a paragraph,
802      - find a low-cost line break near the end,
803      - output to there,
804      - make that the start of the paragraph.  */
805 
806   fmt_paragraph ();
807 
808   /* Choose a good split point.  */
809 
810   split_point = word_limit;
811   best_break = MAXCOST;
812   for (w = word->next_break; w != word_limit; w = w->next_break)
813     {
814       if (w->best_cost - w->next_break->best_cost < best_break)
815         {
816           split_point = w;
817           best_break = w->best_cost - w->next_break->best_cost;
818         }
819       if (best_break <= MAXCOST - LINE_CREDIT)
820         best_break += LINE_CREDIT;
821     }
822   put_paragraph (split_point);
823 
824   /* Copy text of words down to start of parabuf -- we use memmove because
825      the source and target may overlap.  */
826 
827   memmove (parabuf, split_point->text, wptr - split_point->text);
828   shift = split_point->text - parabuf;
829   wptr -= shift;
830 
831   /* Adjust text pointers.  */
832 
833   for (w = split_point; w <= word_limit; w++)
834     w->text -= shift;
835 
836   /* Copy words from split_point down to word -- we use memmove because
837      the source and target may overlap.  */
838 
839   memmove (word, split_point, (word_limit - split_point + 1) * sizeof *word);
840   word_limit -= split_point - word;
841 }
842 
843 /* Compute the optimal formatting for the whole paragraph by computing
844    and remembering the optimal formatting for each suffix from the empty
845    one to the whole paragraph.  */
846 
847 static void
fmt_paragraph(void)848 fmt_paragraph (void)
849 {
850   WORD *start, *w;
851   int len;
852   COST wcost, best;
853   int saved_length;
854 
855   word_limit->best_cost = 0;
856   saved_length = word_limit->length;
857   word_limit->length = max_width;	/* sentinel */
858 
859   for (start = word_limit - 1; start >= word; start--)
860     {
861       best = MAXCOST;
862       len = start == word ? first_indent : other_indent;
863 
864       /* At least one word, however long, in the line.  */
865 
866       w = start;
867       len += w->length;
868       do
869         {
870           w++;
871 
872           /* Consider breaking before w.  */
873 
874           wcost = line_cost (w, len) + w->best_cost;
875           if (start == word && last_line_length > 0)
876             wcost += RAGGED_COST (len - last_line_length);
877           if (wcost < best)
878             {
879               best = wcost;
880               start->next_break = w;
881               start->line_length = len;
882             }
883 
884           /* This is a kludge to keep us from computing 'len' as the
885              sum of the sentinel length and some non-zero number.
886              Since the sentinel w->length may be INT_MAX, adding
887              to that would give a negative result.  */
888           if (w == word_limit)
889             break;
890 
891           len += (w - 1)->space + w->length;	/* w > start >= word */
892         }
893       while (len < max_width);
894       start->best_cost = best + base_cost (start);
895     }
896 
897   word_limit->length = saved_length;
898 }
899 
900 /* Return the constant component of the cost of breaking before the
901    word THIS.  */
902 
903 static COST
base_cost(WORD * this)904 base_cost (WORD *this)
905 {
906   COST cost;
907 
908   cost = LINE_COST;
909 
910   if (this > word)
911     {
912       if ((this - 1)->period)
913         {
914           if ((this - 1)->final)
915             cost -= SENTENCE_BONUS;
916           else
917             cost += NOBREAK_COST;
918         }
919       else if ((this - 1)->punct)
920         cost -= PUNCT_BONUS;
921       else if (this > word + 1 && (this - 2)->final)
922         cost += WIDOW_COST ((this - 1)->length);
923     }
924 
925   if (this->paren)
926     cost -= PAREN_BONUS;
927   else if (this->final)
928     cost += ORPHAN_COST (this->length);
929 
930   return cost;
931 }
932 
933 /* Return the component of the cost of breaking before word NEXT that
934    depends on LEN, the length of the line beginning there.  */
935 
936 static COST
line_cost(WORD * next,int len)937 line_cost (WORD *next, int len)
938 {
939   int n;
940   COST cost;
941 
942   if (next == word_limit)
943     return 0;
944   n = goal_width - len;
945   cost = SHORT_COST (n);
946   if (next->next_break != word_limit)
947     {
948       n = len - next->line_length;
949       cost += RAGGED_COST (n);
950     }
951   return cost;
952 }
953 
954 /* Output to stdout a paragraph from word up to (but not including)
955    FINISH, which must be in the next_break chain from word.  */
956 
957 static void
put_paragraph(WORD * finish)958 put_paragraph (WORD *finish)
959 {
960   WORD *w;
961 
962   put_line (word, first_indent);
963   for (w = word->next_break; w != finish; w = w->next_break)
964     put_line (w, other_indent);
965 }
966 
967 /* Output to stdout the line beginning with word W, beginning in column
968    INDENT, including the prefix (if any).  */
969 
970 static void
put_line(WORD * w,int indent)971 put_line (WORD *w, int indent)
972 {
973   WORD *endline;
974 
975   out_column = 0;
976   put_space (prefix_indent);
977   fputs (prefix, stdout);
978   out_column += prefix_length;
979   put_space (indent - out_column);
980 
981   endline = w->next_break - 1;
982   for (; w != endline; w++)
983     {
984       put_word (w);
985       put_space (w->space);
986     }
987   put_word (w);
988   last_line_length = out_column;
989   putchar ('\n');
990 }
991 
992 /* Output to stdout the word W.  */
993 
994 static void
put_word(WORD * w)995 put_word (WORD *w)
996 {
997   const char *s;
998   int n;
999 
1000   s = w->text;
1001   for (n = w->length; n != 0; n--)
1002     putchar (*s++);
1003   out_column += w->length;
1004 }
1005 
1006 /* Output to stdout SPACE spaces, or equivalent tabs.  */
1007 
1008 static void
put_space(int space)1009 put_space (int space)
1010 {
1011   int space_target, tab_target;
1012 
1013   space_target = out_column + space;
1014   if (tabs)
1015     {
1016       tab_target = space_target / TABWIDTH * TABWIDTH;
1017       if (out_column + 1 < tab_target)
1018         while (out_column < tab_target)
1019           {
1020             putchar ('\t');
1021             out_column = (out_column / TABWIDTH + 1) * TABWIDTH;
1022           }
1023     }
1024   while (out_column < space_target)
1025     {
1026       putchar (' ');
1027       out_column++;
1028     }
1029 }
1030