1;;; 2;;; gauche.pp - pretty printer 3;;; 4;;; Copyright (c) 2000-2020 Shiro Kawai <shiro@acm.org> 5;;; 6;;; Redistribution and use in source and binary forms, with or without 7;;; modification, are permitted provided that the following conditions 8;;; are met: 9;;; 10;;; 1. Redistributions of source code must retain the above copyright 11;;; notice, this list of conditions and the following disclaimer. 12;;; 13;;; 2. Redistributions in binary form must reproduce the above copyright 14;;; notice, this list of conditions and the following disclaimer in the 15;;; documentation and/or other materials provided with the distribution. 16;;; 17;;; 3. Neither the name of the authors nor the names of its contributors 18;;; may be used to endorse or promote products derived from this 19;;; software without specific prior written permission. 20;;; 21;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 27;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32;;; 33 34;; Experimental 35 36(define-module gauche.pputil 37 (use util.match) 38 (export pprint)) 39(select-module gauche.pputil) 40 41;; List printing modes: 42;; 43;; oneline: 44;; (Lorem ipsum dolor sit amet consectetur adipisicing) 45;; 46;; fill: 47;; (Lorem ipsum dolor sit amet consectetur adipisicing elit 48;; sed do eiusmod tempor incididunt ut labore et dolore) 49;; 50;; linear: 51;; (Lorem 52;; ipsum 53;; dolor 54;; ...) 55;; 56;; (0) If all the elements can be printed in one line, do so. 57;; 58;; (1) If the list itself doesn't fit in one line, but each element 59;; does, then we use fill mode. 60;; 61;; (2) Otherwise, we fall back to linear mode. 62 63;; Pretty printer steps 64;; 65;; 1. Scan the tree and mark shared substrctures. 66;; 2. Scan the tree again, to build a layouter network. 67;; 3. Run the layouter network to produce "formatted" tree, which is 68;; a tree of strings with separator / line-break directives. 69;; 4. Render the formatted tree into string. 70 71;; Various states carried around while the layouter network is built. 72;; 'Shared' table is used for shared/circular structure output. 73;; The first pass maps object => # of encounter. If # > 1, we use labels. 74;; In the second pass we replace its value with -N where N is the label. 75(define-class <pp-context> () 76 ([writer :init-form write :init-keyword :writer] 77 [shared :init-form (make-hash-table 'eq?) :init-keyword :shared] 78 [counter :init-value 0] ;shared label counter 79 [controls :init-keyword :controls])) 80 81;; for internal convenience 82(define-inline (rp-shared c) (~ c 'shared)) 83(define-inline (rp-length c) (~ c 'controls 'length)) 84(define-inline (rp-level c) (~ c 'controls 'level)) 85(define-inline (rp-width c) (~ c 'controls 'width)) 86 87(define simple-obj? 88 (any-pred number? boolean? char? port? symbol? null? 89 (cut member <> '("" #())))) 90 91;; Maybe-ish monadic comparison 92(define-inline (=* a b) (and a b (= a b))) 93(define-inline (<* a b) (and a b (< a b))) 94(define-inline (>* a b) (and a b (> a b))) 95(define-inline (<=* a b) (and a b (<= a b))) 96(define-inline (>=* a b) (and a b (>= a b))) 97(define-inline (-* a b . args) 98 (and a b (if (null? args) (- a b) (apply -* (- a b) args)))) 99(define-inline (min* a b) (if a (if b (min a b) a) b)) 100 101;; Recurse to the system's writer to handle objects other than 102;; lists and vectors. We want to pass down the controls (for 103;; print-base etc.), but the system's writer directly recurse into 104;; %pretty-print if print-pretty is true, causing infinite loop. 105;; So we drop pretty-print. 106(define (rec-writer c) 107 (let ([w (~ c'writer)] 108 [c2 (write-controls-copy (~ c'controls) :pretty #f)]) 109 (^x (w x c2)))) 110 111(define (need-label? obj c) (> (hash-table-get (rp-shared c) obj 0) 1)) 112(define (has-label? obj c) (<= (hash-table-get (rp-shared c) obj 1) 0)) 113(define (add-label! obj c) 114 (rlet1 n (~ c'counter) 115 (hash-table-put! (rp-shared c) obj (- n)) 116 (set! (~ c'counter) (+ n 1)))) 117 118;; Creates a layouter that takes width and try to find the best 119;; layout of obj. A layouter returns a pair of two values---a formatted 120;; string tree (FSTree), and the width the layout occupies. The width may 121;; be #f if the layout spills to more than one lines. FSTree is a 122;; tree of strings, with a symbol 's or 'b, indicating inter-datum space 123;; and line break. A layouter may be called more than once on 124;; the same object if the layout is "retried". 125;; 126;; Because of retrying, running layouter might cost exponential time 127;; in the worst case, when leaf layouter is invoked again and again with 128;; the same width parameter. So we also carry aronud a hashtable to 129;; memoize the result. The table uses (cons Width Layouter) as key, and 130;; (cons FStree Width) as result. 131 132;; Layouter = Integer, Memo -> (FSTree . Integer) 133;; FSTree = String | 's | 'b | (FSTree ...) 134 135;; Memoizing lambda 136(define-syntax memo^ 137 (er-macro-transformer 138 (^[f r c] 139 (match f 140 [(_ (w m) . body) 141 (quasirename r 142 `(rec (fn ,w ,m) 143 (or (hash-table-get ,m (cons ,w fn) #f) 144 (rlet1 p (begin ,@body) 145 (hash-table-put! ,m (cons ,w fn) p)))))])))) 146 147(define (make-memo-hash) 148 (make-hash-table 149 (make-comparator pair? 150 (^[a b] (and (eqv? (car a) (car b)) 151 (eq? (cdr a) (cdr b)))) 152 #f 153 (^p (combine-hash-value (eqv-hash (car p)) 154 (eq-hash (cdr p))))))) 155 156;; layout :: (Obj, Integer, Context) -> Layouter 157(define (layout obj level c) 158 (cond [(has-label? obj c) (layout-ref obj c)] 159 [(simple-obj? obj) (layout-simple (write-to-string obj (rec-writer c)))] 160 [(>=* level (rp-level c)) (layout-simple "#")] 161 [else (layout-misc obj (cute layout <> (+ level 1) c) c)])) 162 163;; layout-misc :: (Obj, (Obj -> Layouter), Context) -> Layouter 164(define (layout-misc obj rec c) 165 166 ;; mapi :: (Obj -> Layouter), Vector -> Layouter 167 (define (mapi fn vec) 168 (let* ([s (vector-length vec)] 169 [rs (map (^i (fn (vector-ref vec i))) 170 (iota (min* s (rp-length c))))]) 171 (if (>=* s (rp-length c)) `(,@rs ,dots) rs))) 172 173 ;; mapu :: (Obj -> Layouter), UVector -> Layouter 174 (define (mapu fn vec) 175 (let* ([s (uvector-length vec)] 176 [rs (map (^i (fn (uvector-ref vec i))) 177 (iota (min* s (rp-length c))))]) 178 (if (>=* s (rp-length c)) `(,@rs ,dots) rs))) 179 180 ;; map+ :: (Obj -> Layouter), List -> Layouter 181 ;; map considering dotted list, print-length, and shared structure 182 (define (map+ fn lis) 183 (let loop ([lis lis] [len 0] [rs '()]) 184 (match lis 185 [() (reverse rs)] 186 [((and (or 'quote 'quasiquote 'unquote 'unquote-splicing) p) _) 187 (if (has-label? (cdr lis) c) 188 (reverse (list* (layout-ref (cdr lis) c) dot (fn p) rs)) 189 (reverse (list* (layout-shorthand lis fn c) dot rs)))] 190 [(l . lis) 191 (if (>=* len (rp-length c)) 192 (reverse `(,dots ,@rs)) 193 (let1 r (fn l) 194 (cond [(has-label? lis c) 195 (reverse `(,(layout-ref lis c) ,dot ,r ,@rs))] 196 [(need-label? lis c) 197 (reverse `(,(fn lis) ,dot ,r ,@rs))] 198 [else (loop lis (+ len 1) (cons r rs))])))] 199 [x (reverse `(,(fn x) ,dot ,@rs))]))) 200 201 (cond [(pair? obj) 202 (or (and (pair? (cdr obj)) 203 (null? (cddr obj)) 204 (not (has-label? (cdr obj) c)) 205 (memq (car obj) '(quote quasiquote unquote unquote-splicing)) 206 (layout-shorthand obj rec c)) 207 (layout-list (sprefix obj "(" c) (map+ rec obj) c))] 208 [(vector? obj) (layout-list (sprefix obj "#(" c) (mapi rec obj) c)] 209 [(is-a? obj <uvector>) 210 (let1 tag (rxmatch-substring 211 (#/[csuf]\d+/ (x->string (class-name (class-of obj))))) 212 (layout-list (sprefix obj (format "#~a(" tag) c) (mapu rec obj) c))] 213 [else 214 (layout-simple (sprefix obj (write-to-string obj (rec-writer c)) c))])) 215 216;; :: Layouter 217(define dots (^[w m] '("...." . 4))) 218(define dot (^[w m] '("." . 1))) 219 220;; layout-simple :: String -> Layouter 221(define (layout-simple str) (^[w m] (cons str (string-length str)))) 222 223;; layout-ref :: Object -> Layouter 224(define (layout-ref obj c) 225 (layout-simple (format "#~d#" (- (hash-table-get (rp-shared c) obj))))) 226 227;; layout-shorthand :: (Object, (Object -> Layouter), Context) -> Layouter 228;; handles quote etc. 229;; Object is a two-element list, e.g. (quote X) 230;; The second argument is to recursively layout X. 231;; The case that cdr of the form is shared should be handled by the caller. 232(define (layout-shorthand form rec c) 233 (let* ([pfx (sprefix form 234 (assq-ref '((quote . "'") 235 (quasiquote . "`") 236 (unquote . ",") 237 (unquote-splicing . ",@")) 238 (car form)) 239 c)] 240 [plen (string-length pfx)] 241 [inner (if (has-label? (cadr form) c) 242 (layout-ref (cadr form) c) 243 (rec (cadr form)))]) 244 (memo^ [room memo] 245 (match-let1 (s . w) (inner (-* room plen) memo) 246 (cons (list pfx s) (and w (+ w plen))))))) 247 248;; layout-list :: (String, [Layouter], Context) -> Layouter 249(define (layout-list prefix elts c) 250 (let1 plen (string-length prefix) 251 (memo^ [room memo] 252 (match-let1 (s . w) (do-layout-elements (-* room plen) memo elts) 253 (cons (cons prefix (reverse s)) (and w (+ w plen))))))) 254 255;; sprefix :: (Object, String, Context) -> String 256(define (sprefix obj s c) 257 (if (need-label? obj c) 258 (format "#~d=~a" (add-label! obj c) s) 259 s)) 260 261;; do-layout-elements :: Integer, [Layouter] -> (Formatted, Integer) 262;; This is the core of layout&retry algorithm. Invoke layouters to 263;; find out best fit. Each layouter may be invoked more than once, 264;; when retry happens. 265(define (do-layout-elements room memo elts) 266 (define (do-oneline r es strs) 267 (match es 268 [() (cons strs (-* room r))] 269 [(e . es) (match-let1 (s . w) (e room memo) 270 (cond [(not w) (do-linear room elts)] ;giveup 271 [(>* w room) ;too big 272 (do-fill room es (list* 'b s 'b strs))] 273 [(>* w r) 274 (do-fill (-* room w) es (list* s 'b strs))] 275 [else 276 (do-oneline (-* r w 1) es (list* s 's strs))]))])) 277 (define (do-fill r es strs) 278 (match es 279 [() (cons strs #f)] 280 [(e . es) (match-let1 (s . w) (e room memo) 281 (cond [(not w) (do-linear room elts)] 282 [(>* w (-* r 1)) 283 (do-fill (-* room w 1) es (list* s 'b strs))] 284 [else (do-fill (-* r w 1) es (list* s 's strs))]))])) 285 (define (do-linear r es) 286 (cons (fold (^[e strs] (match-let1 (s . w) (e room memo) (list* s 'b strs))) 287 '() es) 288 #f)) 289 (match-let1 (s . w) (do-oneline room elts '()) 290 (cons (cons ")" s) w))) 291 292;; Render the nested list of strings. Some trick: S's and b's right 293;; after open paren are ignored. S's right after b's are also ignored. 294;; B's insert a newline and a proper indentation. 295(define (render stree indent port) 296 (define (next-line col) (newline port) (dotimes [i col] (display " " port))) 297 (define (drop-while pred xs) ; avoid depending on srfi-1 (for now) 298 (cond [(null? xs) '()] 299 [(pred (car xs)) (drop-while pred (cdr xs))] 300 [else xs])) 301 (match stree 302 [(prefix . es) 303 (display prefix port) 304 (let1 ind (+ indent (string-length prefix)) 305 (let loop ([es (drop-while symbol? es)]) 306 (match es 307 [() #f] 308 [('b . es) (next-line ind) (loop (drop-while symbol? es))] 309 [('s . es) (display " " port) (loop es)] 310 [(s . es) (render s ind port) (loop es)])))] 311 [else (display stree port)])) 312 313;; Stitch together. This is called from Scm_Write() family. 314(define-in-module gauche (%pretty-print obj port shared-table controls) 315 (assume shared-table) 316 (let1 context (make <pp-context> 317 :controls controls 318 :shared shared-table) 319 (let* ([layouter (layout obj 0 context)] 320 [memo (make-memo-hash)] 321 [fstree (car (layouter (rp-width context) memo))]) 322 (render fstree 0 port)))) 323 324;; Write controls used by pprint 325(define *default-controls* (make-write-controls :length #f 326 :level #f 327 :width 79 328 :pretty #t)) 329 330;; External API 331(define (pprint obj 332 :key (port (current-output-port)) 333 (controls *default-controls*) 334 width length level 335 ((:newline nl) #t)) 336 (let1 controls (write-controls-copy controls 337 :width width 338 :length length 339 :level level 340 :pretty #t) 341 (write obj port controls) 342 (when nl (newline port)))) 343