1#!MUNGERPATH
2
3; Copyright (c) 2004, James Bailie <jimmy@mammothcheese.ca>.
4; All rights reserved.
5;
6; Redistribution and use in source form, with or without
7; modification, are permitted provided that the following conditions are met:
8;
9;     * Redistributions of source code must retain the above copyright
10; notice, this list of conditions and the following disclaimer.
11;     * The name of James Bailie may not be used to endorse or promote
12; products derived from this software without specific prior written permission.
13;
14; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS"
15; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
18; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24; POSSIBILITY OF SUCH DAMAGE.
25
26; This script emulates the "fmt" utility.
27
28; The script recognizes three optional arguments:
29
30; -l n specifies the desired line length of output lines.  Defaults to 75
31;      characters.
32
33; -t n specifies where tabstops occur.  Defaults to every 8 columns.
34
35; -p   by itself indicates the script should only format the text appearing
36;      after a prefix string consisting of non-alphanumeric characters,
37;      optionally delimited by whitespace.  The prefix string is extracted
38;      from the first input line and printed at the beginning of all output
39;      lines.  This option can also be used to cause the leading whitespace
40;      of the first input line to be propagated to all the output lines,
41;      which means with this option, you can prevent leading whitespace
42;      from being consumed during the reflow operation.
43;      Prefixes are not recognized by default.
44
45; If the script joins sentence endpoints together which had previously
46; occupied adjacent lines, it will insert two spaces between the sentences.
47
48(fatal)                  ; Exit on lisp errors.
49
50(setq len 75)            ; Default desired line length of output lines.
51(setq tabstop 8)         ; Default tabstop periodicity.
52(setq prefix "")         ; Variable to hold quote or comment
53                         ; prefix if we are recognizing one.
54
55(setq current_line "")   ; Working variable holding text accumulated
56                         ; from the current paragraph.
57
58; Regexp used to detect quote and comment prefixes, and indentation.
59
60(setq prefix_rx (regcomp "^([\b\t]*[^A-Za-z0-9\b\t\"'`&<(]*[\b\t]*)([^\b\t])?"))
61
62; Regexps used to detect leading and trailing whitespace, and strings composed
63; entirely of whitespace.
64
65(setq trail_rx (regcomp "\b*$"))
66(setq lead_rx (regcomp "^\b+(.*)"))
67(setq space_rx (regcomp "^\b*$"))
68
69; Regexp used to find the breakpoint when wrapping long lines.
70
71(setq wrap_rx (regcomp "^(.*[^\b])?\b+([^\b]+)?$"))
72
73; Regexps used to detect sentence endpoints.  We insert two spaces between
74; sentences we join together.
75
76(setq start_rx (regcomp "^\b*[\"`]?[A-Z]"))
77(setq end_rx (regcomp "[.?!]['\"]?\b*$"))
78
79; Regexp used to detect lines ending with a colon.  We insert two spaces
80; between a line ending with a colon and the subsequent line, if we are
81; joining the two lines together.
82
83(setq colon_rx (regcomp ":\b*$"))
84
85; Check for options.  Don't allow a value <= 0.
86
87(load (join "/" (libdir) "options.munger"))
88(getopt "p")
89
90(let ((arg ()))
91
92   (when (setq arg (lookup options "l"))
93      (setq len (or (abs (digitize arg)) 75)))
94
95   (when (setq arg (lookup options "p"))
96      (setq prefix arg))
97
98   (when (setq arg (lookup options "t"))
99      (setq tabstop (or (abs (digitize arg)) 8))))
100
101; This function is called to continue to process unprocessed input for the
102; current paragraph, after we have stopped receiving input for the current
103; paragraph.
104
105(defun print_rest (last)
106
107   ; Feed process_line dummy lines, to get it to work through the remaining
108   ; stored text.
109
110   (while (process_line ""))
111
112   ; process_line will return 0 when there is insufficient stored text left to
113   ; break into two lines.  This means a last short line may still be present
114   ; in current_line.  If so, we print it out.
115
116   (when (not (eq current_line ""))
117      (print prefix current_line (char 10)))
118
119   ; If this function is called with a "last" argument of 0, then the script
120   ; has encountered a blank line between paragraphs, so we need to print a
121   ; blank line and clear out the last line of input data.  Otherwise, the
122   ; script has reached the end of input data, and we need do nothing more.
123
124   (unless last
125      (print prefix (char 10))
126      (setq current_line "")))
127
128; Wrapper function for process_line, which detects paragraph-separating blank
129; lines, and removes quote and comment prefixes if we are recognizing them.
130
131(defun wrapper (line)
132
133   ; Remove terminators and expand tabs.
134
135   (setq line (expand tabstop (chomp line)))
136
137   ; If we're recognizing prefixes and this is the first line read, record the
138   ; prefix and alter the line length as necessary.
139
140   (when (eq prefix 1)
141      (setq prefix (cadr (matches prefix_rx line)))
142
143      ; If the prefix found is longer than the specified line length, ignore
144      ; the request to recognize prefixes.
145
146      (let ((old len))
147         (when (<= (setq len (- len (length prefix))) 0)
148            (setq prefix "")
149            (setq len old))))
150
151   ; If we're recognizing prefixes, remove the prefix from the line.
152
153   (when (not (eq prefix ""))
154      (setq line (substitute prefix_rx "\2" line)))
155
156   ; Do we have a paragraph separating blank line, or text?
157
158   (if (match space_rx line)
159      (print_rest 0)
160      (process_line line)))
161
162; Function which does the reflow.
163
164(defun process_line (line)
165
166   ; First section appends new input line to stored text, with appropriate
167   ; separator.  Trailing whitespace is removed from input lines.
168
169   (setq current_line
170      (concat current_line
171
172              (cond ((or (match colon_rx current_line)
173                         (and (match start_rx line)
174                              (match end_rx current_line)))
175                     "  ")
176
177                    ((or (eq line "") (eq current_line "")) "")
178
179                    (1 " "))
180
181              (substitute trail_rx "" line)))
182
183   ; Second section is executed only when we have enough stored text to split
184   ; off a new line of the specified width.
185
186   (when (> (length current_line) len)
187
188      ; The stored text is split at the desired width.
189
190      (letn ((before (substring current_line 0 len))
191             (after (substring current_line len 0))
192             (m ())
193             (m2 (matches lead_rx after))
194             (len2 (length after)))
195
196         (if m2
197
198            ; This clause is executed if the stored text splits naturally
199            ; before a chunk of whitespace.  If the part of the stored text
200            ; before the split is not whitespace, we print it as a new line,
201            ; trimming any trailing whitespace, and update the stored text.
202
203            (progn
204               (unless (match space_rx before)
205                  (print prefix (substitute trail_rx "" before) (char 10)))
206               (setq current_line (cadr m2)))
207
208            ; Otherwise if wrap_rx does not match the segment of the stored
209            ; text before the split location, that means there is no
210            ; whitespace in that segment, so we move the location of the split
211            ; forward until we either run out of characters, or we find some
212            ; whitespace to break the line at.
213
214            (while (and (not (setq m (matches wrap_rx before))) len2)
215               (setq before (concat before (substring after 0 1)))
216               (setq after (if (> len2 1) (substring after 1 0) ""))
217               (dec len2))
218
219            (if (not m)
220
221               ; This clause executes if there is no whitespace left in the
222               ; stored input text.  We just spit it all out as a one big
223               ; line, and clear out the stored text.
224
225               (progn
226                  (print prefix current_line (char 10))
227                  (setq current_line ""))
228
229               ; Otherwise, we print the segment of the stored text before the
230               ; (possibly new) split location and update the stored text
231               ; text.
232
233               (when (cadr m) (print prefix (cadr m) (char 10)))
234               (setq current_line (concat (car (cddr m)) after)))))))
235
236(foreach_line wrapper)
237(print_rest 1)
238(exit 0)
239