1#!MUNGERPATH 2 3; Copyright (c) 2004, James Bailie <jimmy@mammothcheese.ca>. 4; All rights reserved. 5; 6; Redistribution and use in source form, with or without 7; modification, are permitted provided that the following conditions are met: 8; 9; * Redistributions of source code must retain the above copyright 10; notice, this list of conditions and the following disclaimer. 11; * The name of James Bailie may not be used to endorse or promote 12; products derived from this software without specific prior written permission. 13; 14; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" 15; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 18; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24; POSSIBILITY OF SUCH DAMAGE. 25 26; This script emulates the "fmt" utility. 27 28; The script recognizes three optional arguments: 29 30; -l n specifies the desired line length of output lines. Defaults to 75 31; characters. 32 33; -t n specifies where tabstops occur. Defaults to every 8 columns. 34 35; -p by itself indicates the script should only format the text appearing 36; after a prefix string consisting of non-alphanumeric characters, 37; optionally delimited by whitespace. The prefix string is extracted 38; from the first input line and printed at the beginning of all output 39; lines. This option can also be used to cause the leading whitespace 40; of the first input line to be propagated to all the output lines, 41; which means with this option, you can prevent leading whitespace 42; from being consumed during the reflow operation. 43; Prefixes are not recognized by default. 44 45; If the script joins sentence endpoints together which had previously 46; occupied adjacent lines, it will insert two spaces between the sentences. 47 48(fatal) ; Exit on lisp errors. 49 50(setq len 75) ; Default desired line length of output lines. 51(setq tabstop 8) ; Default tabstop periodicity. 52(setq prefix "") ; Variable to hold quote or comment 53 ; prefix if we are recognizing one. 54 55(setq current_line "") ; Working variable holding text accumulated 56 ; from the current paragraph. 57 58; Regexp used to detect quote and comment prefixes, and indentation. 59 60(setq prefix_rx (regcomp "^([\b\t]*[^A-Za-z0-9\b\t\"'`&<(]*[\b\t]*)([^\b\t])?")) 61 62; Regexps used to detect leading and trailing whitespace, and strings composed 63; entirely of whitespace. 64 65(setq trail_rx (regcomp "\b*$")) 66(setq lead_rx (regcomp "^\b+(.*)")) 67(setq space_rx (regcomp "^\b*$")) 68 69; Regexp used to find the breakpoint when wrapping long lines. 70 71(setq wrap_rx (regcomp "^(.*[^\b])?\b+([^\b]+)?$")) 72 73; Regexps used to detect sentence endpoints. We insert two spaces between 74; sentences we join together. 75 76(setq start_rx (regcomp "^\b*[\"`]?[A-Z]")) 77(setq end_rx (regcomp "[.?!]['\"]?\b*$")) 78 79; Regexp used to detect lines ending with a colon. We insert two spaces 80; between a line ending with a colon and the subsequent line, if we are 81; joining the two lines together. 82 83(setq colon_rx (regcomp ":\b*$")) 84 85; Check for options. Don't allow a value <= 0. 86 87(load (join "/" (libdir) "options.munger")) 88(getopt "p") 89 90(let ((arg ())) 91 92 (when (setq arg (lookup options "l")) 93 (setq len (or (abs (digitize arg)) 75))) 94 95 (when (setq arg (lookup options "p")) 96 (setq prefix arg)) 97 98 (when (setq arg (lookup options "t")) 99 (setq tabstop (or (abs (digitize arg)) 8)))) 100 101; This function is called to continue to process unprocessed input for the 102; current paragraph, after we have stopped receiving input for the current 103; paragraph. 104 105(defun print_rest (last) 106 107 ; Feed process_line dummy lines, to get it to work through the remaining 108 ; stored text. 109 110 (while (process_line "")) 111 112 ; process_line will return 0 when there is insufficient stored text left to 113 ; break into two lines. This means a last short line may still be present 114 ; in current_line. If so, we print it out. 115 116 (when (not (eq current_line "")) 117 (print prefix current_line (char 10))) 118 119 ; If this function is called with a "last" argument of 0, then the script 120 ; has encountered a blank line between paragraphs, so we need to print a 121 ; blank line and clear out the last line of input data. Otherwise, the 122 ; script has reached the end of input data, and we need do nothing more. 123 124 (unless last 125 (print prefix (char 10)) 126 (setq current_line ""))) 127 128; Wrapper function for process_line, which detects paragraph-separating blank 129; lines, and removes quote and comment prefixes if we are recognizing them. 130 131(defun wrapper (line) 132 133 ; Remove terminators and expand tabs. 134 135 (setq line (expand tabstop (chomp line))) 136 137 ; If we're recognizing prefixes and this is the first line read, record the 138 ; prefix and alter the line length as necessary. 139 140 (when (eq prefix 1) 141 (setq prefix (cadr (matches prefix_rx line))) 142 143 ; If the prefix found is longer than the specified line length, ignore 144 ; the request to recognize prefixes. 145 146 (let ((old len)) 147 (when (<= (setq len (- len (length prefix))) 0) 148 (setq prefix "") 149 (setq len old)))) 150 151 ; If we're recognizing prefixes, remove the prefix from the line. 152 153 (when (not (eq prefix "")) 154 (setq line (substitute prefix_rx "\2" line))) 155 156 ; Do we have a paragraph separating blank line, or text? 157 158 (if (match space_rx line) 159 (print_rest 0) 160 (process_line line))) 161 162; Function which does the reflow. 163 164(defun process_line (line) 165 166 ; First section appends new input line to stored text, with appropriate 167 ; separator. Trailing whitespace is removed from input lines. 168 169 (setq current_line 170 (concat current_line 171 172 (cond ((or (match colon_rx current_line) 173 (and (match start_rx line) 174 (match end_rx current_line))) 175 " ") 176 177 ((or (eq line "") (eq current_line "")) "") 178 179 (1 " ")) 180 181 (substitute trail_rx "" line))) 182 183 ; Second section is executed only when we have enough stored text to split 184 ; off a new line of the specified width. 185 186 (when (> (length current_line) len) 187 188 ; The stored text is split at the desired width. 189 190 (letn ((before (substring current_line 0 len)) 191 (after (substring current_line len 0)) 192 (m ()) 193 (m2 (matches lead_rx after)) 194 (len2 (length after))) 195 196 (if m2 197 198 ; This clause is executed if the stored text splits naturally 199 ; before a chunk of whitespace. If the part of the stored text 200 ; before the split is not whitespace, we print it as a new line, 201 ; trimming any trailing whitespace, and update the stored text. 202 203 (progn 204 (unless (match space_rx before) 205 (print prefix (substitute trail_rx "" before) (char 10))) 206 (setq current_line (cadr m2))) 207 208 ; Otherwise if wrap_rx does not match the segment of the stored 209 ; text before the split location, that means there is no 210 ; whitespace in that segment, so we move the location of the split 211 ; forward until we either run out of characters, or we find some 212 ; whitespace to break the line at. 213 214 (while (and (not (setq m (matches wrap_rx before))) len2) 215 (setq before (concat before (substring after 0 1))) 216 (setq after (if (> len2 1) (substring after 1 0) "")) 217 (dec len2)) 218 219 (if (not m) 220 221 ; This clause executes if there is no whitespace left in the 222 ; stored input text. We just spit it all out as a one big 223 ; line, and clear out the stored text. 224 225 (progn 226 (print prefix current_line (char 10)) 227 (setq current_line "")) 228 229 ; Otherwise, we print the segment of the stored text before the 230 ; (possibly new) split location and update the stored text 231 ; text. 232 233 (when (cadr m) (print prefix (cadr m) (char 10))) 234 (setq current_line (concat (car (cddr m)) after))))))) 235 236(foreach_line wrapper) 237(print_rest 1) 238(exit 0) 239