1;;;
2;;; gauche.pp - pretty printer
3;;;
4;;;   Copyright (c) 2000-2020  Shiro Kawai  <shiro@acm.org>
5;;;
6;;;   Redistribution and use in source and binary forms, with or without
7;;;   modification, are permitted provided that the following conditions
8;;;   are met:
9;;;
10;;;   1. Redistributions of source code must retain the above copyright
11;;;      notice, this list of conditions and the following disclaimer.
12;;;
13;;;   2. Redistributions in binary form must reproduce the above copyright
14;;;      notice, this list of conditions and the following disclaimer in the
15;;;      documentation and/or other materials provided with the distribution.
16;;;
17;;;   3. Neither the name of the authors nor the names of its contributors
18;;;      may be used to endorse or promote products derived from this
19;;;      software without specific prior written permission.
20;;;
21;;;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22;;;   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23;;;   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24;;;   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25;;;   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26;;;   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27;;;   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28;;;   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29;;;   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30;;;   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31;;;   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32;;;
33
34;; Experimental
35
36(define-module gauche.pputil
37  (use util.match)
38  (export pprint))
39(select-module gauche.pputil)
40
41;; List printing modes:
42;;
43;; oneline:
44;;   (Lorem ipsum dolor sit amet consectetur adipisicing)
45;;
46;; fill:
47;;   (Lorem ipsum dolor sit amet consectetur adipisicing elit
48;;    sed do eiusmod tempor incididunt ut labore et dolore)
49;;
50;; linear:
51;;   (Lorem
52;;    ipsum
53;;    dolor
54;;    ...)
55;;
56;; (0) If all the elements can be printed in one line, do so.
57;;
58;; (1) If the list itself doesn't fit in one line, but each element
59;;     does, then we use fill mode.
60;;
61;; (2) Otherwise, we fall back to linear mode.
62
63;; Pretty printer steps
64;;
65;; 1. Scan the tree and mark shared substrctures.
66;; 2. Scan the tree again, to build a layouter network.
67;; 3. Run the layouter network to produce "formatted" tree, which is
68;;    a tree of strings with separator / line-break directives.
69;; 4. Render the formatted tree into string.
70
71;; Various states carried around while the layouter network is built.
72;; 'Shared' table is used for shared/circular structure output.
73;; The first pass maps object => # of encounter.  If # > 1, we use labels.
74;; In the second pass we replace its value with -N where N is the label.
75(define-class <pp-context> ()
76  ([writer      :init-form write :init-keyword :writer]
77   [shared      :init-form (make-hash-table 'eq?) :init-keyword :shared]
78   [counter     :init-value 0]          ;shared label counter
79   [controls    :init-keyword :controls]))
80
81;; for internal convenience
82(define-inline (rp-shared c) (~ c 'shared))
83(define-inline (rp-length c) (~ c 'controls 'length))
84(define-inline (rp-level c)  (~ c 'controls 'level))
85(define-inline (rp-width c)  (~ c 'controls 'width))
86
87(define simple-obj?
88  (any-pred number? boolean? char? port? symbol? null?
89            (cut member <> '("" #()))))
90
91;; Maybe-ish monadic comparison
92(define-inline (=* a b) (and a b (= a b)))
93(define-inline (<* a b) (and a b (< a b)))
94(define-inline (>* a b) (and a b (> a b)))
95(define-inline (<=* a b) (and a b (<= a b)))
96(define-inline (>=* a b) (and a b (>= a b)))
97(define-inline (-* a b . args)
98  (and a b (if (null? args) (- a b) (apply -* (- a b) args))))
99(define-inline (min* a b) (if a (if b (min a b) a) b))
100
101;; Recurse to the system's writer to handle objects other than
102;; lists and vectors.  We want to pass down the controls (for
103;; print-base etc.), but the system's writer directly recurse into
104;; %pretty-print if print-pretty is true, causing infinite loop.
105;; So we drop pretty-print.
106(define (rec-writer c)
107  (let ([w  (~ c'writer)]
108        [c2 (write-controls-copy (~ c'controls) :pretty #f)])
109    (^x (w x c2))))
110
111(define (need-label? obj c) (> (hash-table-get (rp-shared c) obj 0) 1))
112(define (has-label? obj c) (<= (hash-table-get (rp-shared c) obj 1) 0))
113(define (add-label! obj c)
114  (rlet1 n (~ c'counter)
115    (hash-table-put! (rp-shared c) obj (- n))
116    (set! (~ c'counter) (+ n 1))))
117
118;; Creates a layouter that takes width and try to find the best
119;; layout of obj.  A layouter returns a pair of two values---a formatted
120;; string tree (FSTree), and the width the layout occupies.  The width may
121;; be #f if the layout spills to more than one lines.  FSTree is a
122;; tree of strings, with a symbol 's or 'b, indicating inter-datum space
123;; and line break.   A layouter may be called more than once on
124;; the same object if the layout is "retried".
125;;
126;; Because of retrying, running layouter might cost exponential time
127;; in the worst case, when leaf layouter is invoked again and again with
128;; the same width parameter.  So we also carry aronud a hashtable to
129;; memoize the result. The table uses (cons Width Layouter) as key, and
130;; (cons FStree Width) as result.
131
132;; Layouter = Integer, Memo -> (FSTree . Integer)
133;; FSTree = String | 's | 'b | (FSTree ...)
134
135;; Memoizing lambda
136(define-syntax memo^
137  (er-macro-transformer
138   (^[f r c]
139     (match f
140       [(_ (w m) . body)
141        (quasirename r
142          `(rec (fn ,w ,m)
143             (or (hash-table-get ,m (cons ,w fn) #f)
144                 (rlet1 p (begin ,@body)
145                   (hash-table-put! ,m (cons ,w fn) p)))))]))))
146
147(define (make-memo-hash)
148  (make-hash-table
149   (make-comparator pair?
150                    (^[a b] (and (eqv? (car a) (car b))
151                                 (eq? (cdr a) (cdr b))))
152                    #f
153                    (^p (combine-hash-value (eqv-hash (car p))
154                                            (eq-hash (cdr p)))))))
155
156;; layout :: (Obj, Integer, Context) -> Layouter
157(define (layout obj level c)
158  (cond [(has-label? obj c) (layout-ref obj c)]
159        [(simple-obj? obj) (layout-simple (write-to-string obj (rec-writer c)))]
160        [(>=* level (rp-level c)) (layout-simple "#")]
161        [else (layout-misc obj (cute layout <> (+ level 1) c) c)]))
162
163;; layout-misc :: (Obj, (Obj -> Layouter), Context) -> Layouter
164(define (layout-misc obj rec c)
165
166  ;; mapi :: (Obj -> Layouter), Vector -> Layouter
167  (define (mapi fn vec)
168    (let* ([s (vector-length vec)]
169           [rs (map (^i (fn (vector-ref vec i)))
170                    (iota (min* s (rp-length c))))])
171      (if (>=* s (rp-length c)) `(,@rs ,dots) rs)))
172
173  ;; mapu :: (Obj -> Layouter), UVector -> Layouter
174  (define (mapu fn vec)
175    (let* ([s (uvector-length vec)]
176           [rs (map (^i (fn (uvector-ref vec i)))
177                    (iota (min* s (rp-length c))))])
178      (if (>=* s (rp-length c)) `(,@rs ,dots) rs)))
179
180  ;; map+ :: (Obj -> Layouter), List -> Layouter
181  ;; map considering dotted list, print-length, and shared structure
182  (define (map+ fn lis)
183    (let loop ([lis lis] [len 0] [rs '()])
184      (match lis
185        [() (reverse rs)]
186        [((and (or 'quote 'quasiquote 'unquote 'unquote-splicing) p) _)
187         (if (has-label? (cdr lis) c)
188           (reverse (list* (layout-ref (cdr lis) c) dot (fn p) rs))
189           (reverse (list* (layout-shorthand lis fn c) dot rs)))]
190        [(l . lis)
191         (if (>=* len (rp-length c))
192           (reverse `(,dots ,@rs))
193           (let1 r (fn l)
194             (cond [(has-label? lis c)
195                    (reverse `(,(layout-ref lis c) ,dot ,r ,@rs))]
196                   [(need-label? lis c)
197                    (reverse `(,(fn lis) ,dot ,r ,@rs))]
198                   [else (loop lis (+ len 1) (cons r rs))])))]
199        [x (reverse `(,(fn x) ,dot ,@rs))])))
200
201  (cond [(pair? obj)
202         (or (and (pair? (cdr obj))
203                  (null? (cddr obj))
204                  (not (has-label? (cdr obj) c))
205                  (memq (car obj) '(quote quasiquote unquote unquote-splicing))
206                  (layout-shorthand obj rec c))
207             (layout-list (sprefix obj "(" c) (map+ rec obj) c))]
208        [(vector? obj) (layout-list (sprefix obj "#(" c) (mapi rec obj) c)]
209        [(is-a? obj <uvector>)
210         (let1 tag (rxmatch-substring
211                     (#/[csuf]\d+/ (x->string (class-name (class-of obj)))))
212           (layout-list (sprefix obj (format "#~a(" tag) c) (mapu rec obj) c))]
213        [else
214         (layout-simple (sprefix obj (write-to-string obj (rec-writer c)) c))]))
215
216;; :: Layouter
217(define dots (^[w m] '("...." . 4)))
218(define dot  (^[w m] '("." . 1)))
219
220;; layout-simple :: String -> Layouter
221(define (layout-simple str) (^[w m] (cons str (string-length str))))
222
223;; layout-ref :: Object -> Layouter
224(define (layout-ref obj c)
225  (layout-simple (format "#~d#" (- (hash-table-get (rp-shared c) obj)))))
226
227;; layout-shorthand :: (Object, (Object -> Layouter), Context) -> Layouter
228;;   handles quote etc.
229;;   Object is a two-element list, e.g. (quote X)
230;;   The second argument is to recursively layout X.
231;;   The case that cdr of the form is shared should be handled by the caller.
232(define (layout-shorthand form rec c)
233  (let* ([pfx (sprefix form
234                       (assq-ref '((quote . "'")
235                                   (quasiquote . "`")
236                                   (unquote . ",")
237                                   (unquote-splicing . ",@"))
238                                 (car form))
239                       c)]
240         [plen (string-length pfx)]
241         [inner (if (has-label? (cadr form) c)
242                  (layout-ref (cadr form) c)
243                  (rec (cadr form)))])
244    (memo^ [room memo]
245           (match-let1 (s . w) (inner (-* room plen) memo)
246             (cons (list pfx s) (and w (+ w plen)))))))
247
248;; layout-list :: (String, [Layouter], Context) -> Layouter
249(define (layout-list prefix elts c)
250  (let1 plen (string-length prefix)
251    (memo^ [room memo]
252           (match-let1 (s . w) (do-layout-elements (-* room plen) memo elts)
253             (cons (cons prefix (reverse s)) (and w (+ w plen)))))))
254
255;; sprefix :: (Object, String, Context) -> String
256(define (sprefix obj s c)
257  (if (need-label? obj c)
258    (format "#~d=~a" (add-label! obj c) s)
259    s))
260
261;; do-layout-elements :: Integer, [Layouter] -> (Formatted, Integer)
262;; This is the core of layout&retry algorithm.  Invoke layouters to
263;; find out best fit.  Each layouter may be invoked more than once,
264;; when retry happens.
265(define (do-layout-elements room memo elts)
266  (define (do-oneline r es strs)
267    (match es
268      [() (cons strs (-* room r))]
269      [(e . es) (match-let1 (s . w) (e room memo)
270                  (cond [(not w) (do-linear room elts)] ;giveup
271                        [(>* w room)                            ;too big
272                         (do-fill room es (list* 'b s 'b strs))]
273                        [(>* w r)
274                         (do-fill (-* room w) es (list* s 'b strs))]
275                        [else
276                         (do-oneline (-* r w 1) es (list* s 's strs))]))]))
277  (define (do-fill r es strs)
278    (match es
279      [() (cons strs #f)]
280      [(e . es) (match-let1 (s . w) (e room memo)
281                  (cond [(not w) (do-linear room elts)]
282                        [(>* w (-* r 1))
283                         (do-fill (-* room w 1) es (list* s 'b strs))]
284                        [else (do-fill (-* r w 1) es (list* s 's strs))]))]))
285  (define (do-linear r es)
286    (cons (fold (^[e strs] (match-let1 (s . w) (e room memo) (list* s 'b strs)))
287                '() es)
288          #f))
289  (match-let1 (s . w) (do-oneline room elts '())
290    (cons (cons ")" s) w)))
291
292;; Render the nested list of strings.  Some trick: S's and b's right
293;; after open paren are ignored.  S's right after b's are also ignored.
294;; B's insert a newline and a proper indentation.
295(define (render stree indent port)
296  (define (next-line col) (newline port) (dotimes [i col] (display " " port)))
297  (define (drop-while pred xs) ; avoid depending on srfi-1 (for now)
298    (cond [(null? xs) '()]
299          [(pred (car xs)) (drop-while pred (cdr xs))]
300          [else xs]))
301  (match stree
302    [(prefix . es)
303     (display prefix port)
304     (let1 ind (+ indent (string-length prefix))
305       (let loop ([es (drop-while symbol? es)])
306         (match es
307           [() #f]
308           [('b . es) (next-line ind) (loop (drop-while symbol? es))]
309           [('s . es) (display " " port) (loop es)]
310           [(s . es)  (render s ind port) (loop es)])))]
311    [else (display stree port)]))
312
313;; Stitch together.  This is called from Scm_Write() family.
314(define-in-module gauche (%pretty-print obj port shared-table controls)
315  (assume shared-table)
316  (let1 context (make <pp-context>
317                  :controls controls
318                  :shared shared-table)
319    (let* ([layouter (layout obj 0 context)]
320           [memo (make-memo-hash)]
321           [fstree (car (layouter (rp-width context) memo))])
322      (render fstree 0 port))))
323
324;; Write controls used by pprint
325(define *default-controls* (make-write-controls :length #f
326                                                :level #f
327                                                :width 79
328                                                :pretty #t))
329
330;; External API
331(define (pprint obj
332                :key (port (current-output-port))
333                     (controls *default-controls*)
334                     width length level
335                     ((:newline nl) #t))
336  (let1 controls (write-controls-copy controls
337                                      :width width
338                                      :length length
339                                      :level level
340                                      :pretty #t)
341    (write obj port controls)
342    (when nl (newline port))))
343