1#lang racket/base
2(require "../locale/string.rkt"
3         "../format/main.rkt"
4         "check.rkt"
5         "path.rkt"
6         "sep.rkt"
7         "windows.rkt")
8
9(provide build-path
10         build-path/convention-type)
11
12(define (build-path base . subs)
13  (build 'build-path #f base subs))
14
15(define (build-path/convention-type convention base . subs)
16  (build 'build-path/convention-type convention base subs))
17
18(define (build who init-convention base subs)
19  (check-build-path-arg who base)
20  (define convention
21    (let loop ([convention (argument->convention base init-convention who #:first? #t)]
22               [subs subs])
23      (cond
24       [(null? subs) convention]
25       [else
26        (define sub (car subs))
27        (check-build-path-arg who sub)
28        (loop (argument->convention sub convention who #:first? #f)
29              (cdr subs))])))
30  (define final-convention (or convention (system-path-convention-type)))
31  (path (append-path-parts final-convention who base subs)
32        final-convention))
33
34;; ----------------------------------------
35
36(define (check-build-path-arg who p)
37  (check who
38         (lambda (p) (or (path-string? p)
39                    (path-for-some-system? p)
40                    (eq? p 'up)
41                    (eq? p 'same)))
42         #:contract "(or/c path-string? path-for-some-system? 'up 'same)"
43         p))
44
45(define (argument->convention p convention who #:first? first?)
46  (define (check c)
47    (when (and convention (not (eq? c convention)))
48      (raise-arguments-error who
49                             (format
50                              (if first?
51                                  "specified convention incompatible with ~a path element"
52                                  "preceding path's convention incompatible with ~a path element")
53                              (if (string? p)
54                                  "string"
55                                  "given"))
56                             "path element" p
57                             (if first? "convention" "preceding path's convention")
58                             convention))
59    c)
60  (cond
61   [(path? p) (check (path-convention p))]
62   [(string? p) (check (system-path-convention-type))]
63   [else convention]))
64
65;; ----------------------------------------
66
67(define (append-path-parts convention who base subs)
68  (define result-is-backslash-backslash-questionmark?
69    (and (eq? convention 'windows)
70         (for/or ([sub (in-list (cons base subs))])
71           (backslash-backslash-questionmark? (as-bytes sub)))))
72  (define base-accum
73    (let ([bstr (as-bytes base)])
74      (cond
75        [(eq? convention 'windows)
76         (if result-is-backslash-backslash-questionmark?
77             (convert-to-initial-backslash-backslash-questionmark bstr)
78             (list (strip-trailing-spaces bstr)))]
79        [else (list bstr)])))
80  (define unc-result?
81    (and (eq? convention 'windows)
82         (not result-is-backslash-backslash-questionmark?)
83         (parse-unc (car base-accum) 0)))
84  ;; The `accum` list accumulates byte strings in reverse order to be
85  ;; appended. On Windows in \\?\ mode, each byte string corresponds
86  ;; to a single path element with a leading backslash, except that
87  ;; the last item is a starting-point; otherwise, the byte strings can
88  ;; be a mixture of compound path elements and separators
89  (let loop ([accum base-accum] [subs subs] [first? #t])
90    (cond
91      [(null? subs)
92       (define elems (reverse accum))
93       (combine-build-elements elems unc-result?)]
94      [else
95       (define sub (car subs))
96       (define bstr (as-bytes sub))
97       (case convention
98         [(unix)
99          ;; Unix is fairly straightforward
100          (when (is-sep? (bytes-ref bstr 0) 'unix)
101            (raise-arguments-error who
102                                   "absolute path cannot be added to a path"
103                                   "absolute path" sub))
104          (define prev (car accum))
105          (cond
106            [(is-sep? (bytes-ref prev (sub1 (bytes-length prev))) 'unix)
107             (loop (cons bstr accum) (cdr subs) #f)]
108            [else
109             (loop (list* bstr #"/" accum) (cdr subs) #f)])]
110         [(windows)
111          ;; For Windows, the implementation immediately here is
112          ;; mostly error checking, and actual combining work is in
113          ;; `combine-windows-path`
114          (define len (bytes-length bstr))
115          (define (combine is-rel? is-complete? is-drive?)
116            (when (or is-complete?
117                      (and (not is-rel?)
118                           (or (not first?)
119                               (not (and (null? (cdr accum))
120                                         (drive? (car accum)))))))
121              (define what (if is-drive? "drive" "absolute path"))
122              (raise-arguments-error who
123                                     (string-append what " cannot be added to a base path")
124                                     what sub
125                                     "base path" (path (combine-build-elements (reverse accum) unc-result?)
126                                                       'windows)))
127            (loop (combine-windows-path (if (and (null? subs)
128                                                 ;; because \\?\ mode does its own stripping:
129                                                 (not result-is-backslash-backslash-questionmark?))
130                                            bstr
131                                            (strip-trailing-spaces bstr))
132                                        accum
133                                        result-is-backslash-backslash-questionmark?
134                                        (null? (cdr subs)))
135                  (cdr subs)
136                  #f))
137          (cond
138            [(is-sep? (bytes-ref bstr 0) 'windows)
139             (cond
140               [(backslash-backslash-questionmark? bstr)
141                (define-values (kind drive-len orig-drive-len clean-start-pos add-sep-pos)
142                  (parse-backslash-backslash-questionmark bstr))
143                (define abs? (or (eq? kind 'abs) (eq? kind 'unc)))
144                (combine (eq? kind 'rel)
145                         abs?
146                         (and abs?
147                              (just-backslashes-after? bstr drive-len)))]
148               [(parse-unc bstr 0)
149                => (lambda (drive-len)
150                     (combine #t #t (just-separators-after? bstr drive-len)))]
151               [else
152                (combine #f #f #f)])]
153            [(letter-drive-start? bstr len)
154             (combine #f #t (just-separators-after? bstr 2))]
155            [else
156             (combine #t #f #f)])])])))
157
158(define (combine-windows-path bstr accum result-is-backslash-backslash-questionmark? is-last?)
159  (cond
160    [result-is-backslash-backslash-questionmark?
161     ;; Split `bstr` into pieces, and handle the pieces one-by-one
162     (let loop ([elems (windows-split-into-path-elements bstr is-last?)] [accum accum] [to-dir? #f])
163       (cond
164         [(null? elems)
165          (if (and is-last? to-dir? (pair? (cdr accum)))
166              (cons (bytes-append (car accum) #"\\") (cdr accum))
167              accum)]
168         [else
169          (define sub (car elems))
170          (cond
171            [(eq? 'same sub)
172             ;; Ignore 'same for \\?\ mode
173             (loop (cdr elems) accum #t)]
174            [(eq? 'up sub)
175             ;; Drop previous element for 'up in \\?\ mode
176             (loop (cdr elems)
177                   (if (null? (cdr accum))
178                       (list (starting-point-add-up (car accum)))
179                       (cdr accum))
180                   #t)]
181            [else
182             (loop (cdr elems) (cons sub accum) #f)])]))]
183    [else
184     ;; Not in \\?\ mode, so `bstr` must not be a \\?\ path.
185     ;; In case `accum` is drive-relative, start by dropping any
186     ;; leading slashes.
187     (define len (bytes-length bstr))
188     (define sub (let loop ([i 0])
189                   (cond
190                     [(= i len) #""]
191                     [(is-sep? (bytes-ref bstr i) 'windows)
192                      (loop (add1 i))]
193                     [(zero? i) bstr]
194                     [else (subbytes bstr i)])))
195     ;; Now, relatively simple: add a slash if needed between the parts
196     (define prev-bstr (car accum))
197     (define new-accum (if (is-sep? (bytes-ref prev-bstr (sub1 (bytes-length prev-bstr))) 'windows)
198                           accum
199                           (cons #"\\" accum)))
200     (if (equal? sub #"") ; in case the argument was just "/"
201         new-accum
202         (cons sub new-accum))]))
203
204(define (windows-split-into-path-elements bstr keep-trailing-separator?)
205  (cond
206    [(backslash-backslash-questionmark? bstr)
207     ;; It must be REL or RED (with only a drive to build on)
208     (define-values (dots-end literal-start)
209       (backslash-backslash-questionmark-dot-ups-end bstr (bytes-length bstr)))
210     (append (extract-dot-ups bstr 8 (or dots-end 8))
211             (extract-separate-parts bstr literal-start
212                                     #:bbq-mode? #t
213                                     #:keep-trailing-separator? keep-trailing-separator?))]
214    [else
215     (extract-separate-parts bstr 0 #:keep-trailing-separator? keep-trailing-separator?)]))
216
217(define (as-bytes p)
218  (cond
219   [(eq? p 'up) #".."]
220   [(eq? p 'same) #"."]
221   [(path? p) (path-bytes p)]
222   [else (string->bytes/locale p (char->integer #\?))]))
223
224(define (just-separators-after? bstr drive-len)
225  (for/and ([b (in-bytes bstr drive-len)])
226    (is-sep? b 'windows)))
227
228(define (just-backslashes-after? bstr drive-len)
229  (for/and ([b (in-bytes bstr drive-len)])
230    (eqv? b (char->integer #\\))))
231
232;; Check whether `s`, a byte string or a `starting-point`,
233;; is just a drive, in which case we can add a non-complete
234;; absolute path
235(define (drive? s)
236  (cond
237    [(starting-point? s) (starting-point-drive? s)]
238    ;; must be a byte string
239    [(parse-unc s 0)
240     => (lambda (drive-len) (just-separators-after? s drive-len))]
241    [(letter-drive-start? s (bytes-length s))
242     (just-separators-after? s 2)]
243    [else #f]))
244
245(struct starting-point (kind        ; 'rel, 'red, 'unc, or 'abs
246                        bstr        ; byte string that contains the starting path
247                        len         ; number of bytes to use when adding more element
248                        orig-len    ; number of bytes to use when not adding more elements
249                        extra-sep   ; extra separator before first added element
250                        add-ups?    ; whether to add `up`s to the base string, as opposed to dropping them
251                        drive?))    ; is bstr an absolute root?
252
253(define (make-starting-point kind
254                             bstr
255                             len
256                             #:orig-len [orig-len len]
257                             #:extra-sep [extra-sep #""]
258                             #:add-ups? [add-ups? #f]
259                             #:drive? [drive? #t])
260  (list
261   (starting-point kind bstr len orig-len extra-sep add-ups? drive?)))
262
263(define (combine-build-elements elems unc-result?)
264  (cond
265    [(starting-point? (car elems))
266     ;; in \\?\ mode for Windows
267     (define s (car elems))
268     (cond
269       [(null? (cdr elems))
270        (let ([bstr (subbytes (starting-point-bstr s)
271                              0
272                              (starting-point-orig-len s))])
273          (cond
274            [(equal? bstr #"\\\\?\\REL")
275             #"."]
276            [(equal? bstr #"\\\\?\\RED")
277             #"\\"]
278            [else
279             (case (starting-point-kind s)
280               [(rel unc)
281                ;; Canonical form of \\?\REL\..[\..[etc.]] or \\?\UNC\[etc.] ends in slash:
282                (if (eqv? (bytes-ref bstr (sub1 (bytes-length bstr))) (char->integer #\\))
283                    bstr
284                    (bytes-append bstr #"\\"))]
285               [else bstr])]))]
286       [else
287        (define init-bstr (subbytes (starting-point-bstr s)
288                                    0
289                                    (starting-point-len s)))
290        (apply bytes-append
291               init-bstr
292               (case (starting-point-kind s)
293                 [(rel red) #"\\"]
294                 [else #""])
295               (starting-point-extra-sep s)
296               (cdr elems))])]
297    [else
298     ;; simple case...
299     (define bstr (apply bytes-append elems))
300     ;; ... unless we've accidentally constructed something that
301     ;; looks like a \\?\ path or a UNC path, in which case we can
302     ;; correct by dropping a leading [back]slash
303     (cond
304       [(backslash-backslash-questionmark? bstr)
305        (subbytes bstr 1)]
306       [(and (not unc-result?)
307             (parse-unc bstr 0))
308        (subbytes bstr 1)]
309       [else bstr])]))
310
311(define (convert-to-initial-backslash-backslash-questionmark bstr)
312  (cond
313    [(backslash-backslash-questionmark? bstr)
314     (define-values (kind drive-len orig-drive-len clean-start-pos add-sep)
315       (parse-backslash-backslash-questionmark bstr))
316     (case kind
317       [(abs unc)
318        (append (reverse (extract-separate-parts bstr drive-len #:bbq-mode? #t))
319                (if (equal? add-sep #"")
320                    ;; drop implicit terminator in drive:
321                    (make-starting-point kind bstr (sub1 drive-len) #:orig-len orig-drive-len)
322                    (make-starting-point kind bstr drive-len #:orig-len orig-drive-len #:extra-sep (subbytes add-sep 1))))]
323       [else
324        ;; We can't back up over any dots before `dots-end`,
325        ;; so keep those toegether with \\?\REL
326        (define-values (dots-end literal-start)
327          (backslash-backslash-questionmark-dot-ups-end bstr (bytes-length bstr)))
328        (append (reverse (extract-separate-parts bstr literal-start #:bbq-mode? #t))
329                (make-starting-point kind bstr (or dots-end 7) #:add-ups? (eq? kind 'rel) #:drive? #f))])]
330    [(parse-unc bstr 0)
331     => (lambda (root-len)
332          (define-values (machine volume)
333            (let ([l (extract-separate-parts (subbytes bstr 0 root-len) 0)])
334              (values (car l) (cadr l))))
335          (append (reverse (simplify-dots (extract-separate-parts bstr root-len) #:drop-leading? #t))
336                  (let* ([unc-bstr (bytes-append #"\\\\?\\UNC" machine volume)]
337                         [unc-len (bytes-length unc-bstr)])
338                    (make-starting-point 'unc unc-bstr unc-len))))]
339    [(bytes=? #"." bstr)
340     (make-starting-point 'rel #"\\\\?\\REL" 7 #:add-ups? #t #:drive? #f)]
341    [(bytes=? #".." bstr)
342     (make-starting-point 'rel #"\\\\?\\REL\\.." 10  #:add-ups? #t #:drive? #f)]
343    [(is-sep? (bytes-ref bstr 0) 'windows)
344     (append (reverse (extract-separate-parts bstr 0))
345             (make-starting-point 'red #"\\\\?\\RED" 7 #:drive? #f))]
346    [(and ((bytes-length bstr) . >= . 2)
347          (drive-letter? (bytes-ref bstr 0))
348          (eqv? (bytes-ref bstr 1) (char->integer #\:)))
349     (append (reverse (simplify-dots (extract-separate-parts bstr 2) #:drop-leading? #t))
350             (let ([drive-bstr (bytes-append #"\\\\?\\" (subbytes bstr 0 2) #"\\")])
351               (make-starting-point 'abs drive-bstr 6 #:orig-len 7)))]
352    [else
353     ;; Create \\?\REL, combinding any leading dots into the \\?\REL part:
354     (define elems (simplify-dots (extract-separate-parts bstr 0) #:drop-leading? #f))
355     (let loop ([dots null] [elems elems])
356       (cond
357         [(or (null? elems)
358              (not (equal? (car elems) 'up)))
359          (append (reverse elems)
360                  (let* ([rel-bstr (apply bytes-append #"\\\\?\\REL" dots)]
361                         [rel-len (bytes-length rel-bstr)])
362                    (make-starting-point 'rel rel-bstr rel-len #:add-ups? #t #:drive? #f)))]
363         [else
364          (loop (cons (car elems) dots) (cdr elems))]))]))
365
366;; Split on separators, removing trailing whitespace from the last
367;; element, and prefix each extracted element with a backslash:
368(define (extract-separate-parts bstr pos
369                                #:bbq-mode? [bbq-mode? #f]
370                                #:keep-trailing-separator? [keep-trailing-separator? #f])
371  (define (is-a-sep? b)
372    (if bbq-mode?
373        (eqv? b (char->integer #\\))
374        (is-sep? b 'windows)))
375  (define len (bytes-length bstr))
376  (let loop ([pos pos])
377    (cond
378      [(= pos len) null]
379      [(is-a-sep? (bytes-ref bstr pos))
380       (loop (add1 pos))]
381      [else
382       (let e-loop ([end-pos (add1 pos)])
383         (cond
384           [(or (= end-pos len)
385                (is-a-sep? (bytes-ref bstr end-pos)))
386            (define rest (loop end-pos))
387            (define elem-bstr (subbytes bstr pos end-pos))
388            (define new-bstr (if (and (null? rest)
389                                      (not bbq-mode?))
390                                 (strip-trailing-spaces elem-bstr)
391                                 elem-bstr))
392            (define new-sub (cond
393                              [(and (not bbq-mode?)
394                                    (bytes=? new-bstr #"."))
395                               'same]
396                              [(and (not bbq-mode?)
397                                    (bytes=? new-bstr #".."))
398                               'up]
399                              [else
400                               (if (and keep-trailing-separator?
401                                        (null? rest)
402                                        (end-pos . < . len))
403                                   (bytes-append #"\\" new-bstr #"\\")
404                                   (bytes-append #"\\" new-bstr))]))
405            (cons new-sub rest)]
406           [else (e-loop (add1 end-pos))]))])))
407
408;; Create a list containing one 'up for each ".." in the range:
409(define (extract-dot-ups bstr start dots-end)
410  (if (= start dots-end)
411      '()
412      (let loop ([i (add1 start)])
413        (cond
414          [(i . >= . dots-end) '()]
415          [(and (eqv? (bytes-ref bstr i) (char->integer #\.))
416                (eqv? (bytes-ref bstr (sub1 i)) (char->integer #\.)))
417           (cons 'up (loop (add1 i)))]
418          [else (loop (add1 i))]))))
419
420;; For \\?\REL paths, add an 'up at the start to the initial path.
421;; Otherwise, at a root, just drop an 'up.
422(define (starting-point-add-up s)
423  (cond
424    [(starting-point-add-ups? s)
425     (define bstr (bytes-append (subbytes (starting-point-bstr s)
426                                          0
427                                          (starting-point-len s))
428                                #"\\.."))
429     (define len (bytes-length bstr))
430     (struct-copy starting-point s
431                  [bstr bstr]
432                  [len len]
433                  [orig-len len])]
434    [else s]))
435
436(define (simplify-dots bstrs #:drop-leading? [drop-leading? #t])
437  (let loop ([bstrs bstrs] [accum null])
438    (cond
439      [(null? bstrs) (reverse accum)]
440      [(eq? 'same (car bstrs)) (loop (cdr bstrs) accum)]
441      [(eq? 'up (car bstrs)) (if (null? accum)
442                                 (if drop-leading?
443                                     (loop (cdr bstrs) accum)
444                                     (loop (cdr bstrs) (cons (car bstrs) accum)))
445                                 (loop (cdr bstrs) (cdr accum)))]
446      [else (loop (cdr bstrs) (cons (car bstrs) accum))])))
447