1#lang racket/base 2(require "../locale/string.rkt" 3 "../format/main.rkt" 4 "check.rkt" 5 "path.rkt" 6 "sep.rkt" 7 "windows.rkt") 8 9(provide build-path 10 build-path/convention-type) 11 12(define (build-path base . subs) 13 (build 'build-path #f base subs)) 14 15(define (build-path/convention-type convention base . subs) 16 (build 'build-path/convention-type convention base subs)) 17 18(define (build who init-convention base subs) 19 (check-build-path-arg who base) 20 (define convention 21 (let loop ([convention (argument->convention base init-convention who #:first? #t)] 22 [subs subs]) 23 (cond 24 [(null? subs) convention] 25 [else 26 (define sub (car subs)) 27 (check-build-path-arg who sub) 28 (loop (argument->convention sub convention who #:first? #f) 29 (cdr subs))]))) 30 (define final-convention (or convention (system-path-convention-type))) 31 (path (append-path-parts final-convention who base subs) 32 final-convention)) 33 34;; ---------------------------------------- 35 36(define (check-build-path-arg who p) 37 (check who 38 (lambda (p) (or (path-string? p) 39 (path-for-some-system? p) 40 (eq? p 'up) 41 (eq? p 'same))) 42 #:contract "(or/c path-string? path-for-some-system? 'up 'same)" 43 p)) 44 45(define (argument->convention p convention who #:first? first?) 46 (define (check c) 47 (when (and convention (not (eq? c convention))) 48 (raise-arguments-error who 49 (format 50 (if first? 51 "specified convention incompatible with ~a path element" 52 "preceding path's convention incompatible with ~a path element") 53 (if (string? p) 54 "string" 55 "given")) 56 "path element" p 57 (if first? "convention" "preceding path's convention") 58 convention)) 59 c) 60 (cond 61 [(path? p) (check (path-convention p))] 62 [(string? p) (check (system-path-convention-type))] 63 [else convention])) 64 65;; ---------------------------------------- 66 67(define (append-path-parts convention who base subs) 68 (define result-is-backslash-backslash-questionmark? 69 (and (eq? convention 'windows) 70 (for/or ([sub (in-list (cons base subs))]) 71 (backslash-backslash-questionmark? (as-bytes sub))))) 72 (define base-accum 73 (let ([bstr (as-bytes base)]) 74 (cond 75 [(eq? convention 'windows) 76 (if result-is-backslash-backslash-questionmark? 77 (convert-to-initial-backslash-backslash-questionmark bstr) 78 (list (strip-trailing-spaces bstr)))] 79 [else (list bstr)]))) 80 (define unc-result? 81 (and (eq? convention 'windows) 82 (not result-is-backslash-backslash-questionmark?) 83 (parse-unc (car base-accum) 0))) 84 ;; The `accum` list accumulates byte strings in reverse order to be 85 ;; appended. On Windows in \\?\ mode, each byte string corresponds 86 ;; to a single path element with a leading backslash, except that 87 ;; the last item is a starting-point; otherwise, the byte strings can 88 ;; be a mixture of compound path elements and separators 89 (let loop ([accum base-accum] [subs subs] [first? #t]) 90 (cond 91 [(null? subs) 92 (define elems (reverse accum)) 93 (combine-build-elements elems unc-result?)] 94 [else 95 (define sub (car subs)) 96 (define bstr (as-bytes sub)) 97 (case convention 98 [(unix) 99 ;; Unix is fairly straightforward 100 (when (is-sep? (bytes-ref bstr 0) 'unix) 101 (raise-arguments-error who 102 "absolute path cannot be added to a path" 103 "absolute path" sub)) 104 (define prev (car accum)) 105 (cond 106 [(is-sep? (bytes-ref prev (sub1 (bytes-length prev))) 'unix) 107 (loop (cons bstr accum) (cdr subs) #f)] 108 [else 109 (loop (list* bstr #"/" accum) (cdr subs) #f)])] 110 [(windows) 111 ;; For Windows, the implementation immediately here is 112 ;; mostly error checking, and actual combining work is in 113 ;; `combine-windows-path` 114 (define len (bytes-length bstr)) 115 (define (combine is-rel? is-complete? is-drive?) 116 (when (or is-complete? 117 (and (not is-rel?) 118 (or (not first?) 119 (not (and (null? (cdr accum)) 120 (drive? (car accum))))))) 121 (define what (if is-drive? "drive" "absolute path")) 122 (raise-arguments-error who 123 (string-append what " cannot be added to a base path") 124 what sub 125 "base path" (path (combine-build-elements (reverse accum) unc-result?) 126 'windows))) 127 (loop (combine-windows-path (if (and (null? subs) 128 ;; because \\?\ mode does its own stripping: 129 (not result-is-backslash-backslash-questionmark?)) 130 bstr 131 (strip-trailing-spaces bstr)) 132 accum 133 result-is-backslash-backslash-questionmark? 134 (null? (cdr subs))) 135 (cdr subs) 136 #f)) 137 (cond 138 [(is-sep? (bytes-ref bstr 0) 'windows) 139 (cond 140 [(backslash-backslash-questionmark? bstr) 141 (define-values (kind drive-len orig-drive-len clean-start-pos add-sep-pos) 142 (parse-backslash-backslash-questionmark bstr)) 143 (define abs? (or (eq? kind 'abs) (eq? kind 'unc))) 144 (combine (eq? kind 'rel) 145 abs? 146 (and abs? 147 (just-backslashes-after? bstr drive-len)))] 148 [(parse-unc bstr 0) 149 => (lambda (drive-len) 150 (combine #t #t (just-separators-after? bstr drive-len)))] 151 [else 152 (combine #f #f #f)])] 153 [(letter-drive-start? bstr len) 154 (combine #f #t (just-separators-after? bstr 2))] 155 [else 156 (combine #t #f #f)])])]))) 157 158(define (combine-windows-path bstr accum result-is-backslash-backslash-questionmark? is-last?) 159 (cond 160 [result-is-backslash-backslash-questionmark? 161 ;; Split `bstr` into pieces, and handle the pieces one-by-one 162 (let loop ([elems (windows-split-into-path-elements bstr is-last?)] [accum accum] [to-dir? #f]) 163 (cond 164 [(null? elems) 165 (if (and is-last? to-dir? (pair? (cdr accum))) 166 (cons (bytes-append (car accum) #"\\") (cdr accum)) 167 accum)] 168 [else 169 (define sub (car elems)) 170 (cond 171 [(eq? 'same sub) 172 ;; Ignore 'same for \\?\ mode 173 (loop (cdr elems) accum #t)] 174 [(eq? 'up sub) 175 ;; Drop previous element for 'up in \\?\ mode 176 (loop (cdr elems) 177 (if (null? (cdr accum)) 178 (list (starting-point-add-up (car accum))) 179 (cdr accum)) 180 #t)] 181 [else 182 (loop (cdr elems) (cons sub accum) #f)])]))] 183 [else 184 ;; Not in \\?\ mode, so `bstr` must not be a \\?\ path. 185 ;; In case `accum` is drive-relative, start by dropping any 186 ;; leading slashes. 187 (define len (bytes-length bstr)) 188 (define sub (let loop ([i 0]) 189 (cond 190 [(= i len) #""] 191 [(is-sep? (bytes-ref bstr i) 'windows) 192 (loop (add1 i))] 193 [(zero? i) bstr] 194 [else (subbytes bstr i)]))) 195 ;; Now, relatively simple: add a slash if needed between the parts 196 (define prev-bstr (car accum)) 197 (define new-accum (if (is-sep? (bytes-ref prev-bstr (sub1 (bytes-length prev-bstr))) 'windows) 198 accum 199 (cons #"\\" accum))) 200 (if (equal? sub #"") ; in case the argument was just "/" 201 new-accum 202 (cons sub new-accum))])) 203 204(define (windows-split-into-path-elements bstr keep-trailing-separator?) 205 (cond 206 [(backslash-backslash-questionmark? bstr) 207 ;; It must be REL or RED (with only a drive to build on) 208 (define-values (dots-end literal-start) 209 (backslash-backslash-questionmark-dot-ups-end bstr (bytes-length bstr))) 210 (append (extract-dot-ups bstr 8 (or dots-end 8)) 211 (extract-separate-parts bstr literal-start 212 #:bbq-mode? #t 213 #:keep-trailing-separator? keep-trailing-separator?))] 214 [else 215 (extract-separate-parts bstr 0 #:keep-trailing-separator? keep-trailing-separator?)])) 216 217(define (as-bytes p) 218 (cond 219 [(eq? p 'up) #".."] 220 [(eq? p 'same) #"."] 221 [(path? p) (path-bytes p)] 222 [else (string->bytes/locale p (char->integer #\?))])) 223 224(define (just-separators-after? bstr drive-len) 225 (for/and ([b (in-bytes bstr drive-len)]) 226 (is-sep? b 'windows))) 227 228(define (just-backslashes-after? bstr drive-len) 229 (for/and ([b (in-bytes bstr drive-len)]) 230 (eqv? b (char->integer #\\)))) 231 232;; Check whether `s`, a byte string or a `starting-point`, 233;; is just a drive, in which case we can add a non-complete 234;; absolute path 235(define (drive? s) 236 (cond 237 [(starting-point? s) (starting-point-drive? s)] 238 ;; must be a byte string 239 [(parse-unc s 0) 240 => (lambda (drive-len) (just-separators-after? s drive-len))] 241 [(letter-drive-start? s (bytes-length s)) 242 (just-separators-after? s 2)] 243 [else #f])) 244 245(struct starting-point (kind ; 'rel, 'red, 'unc, or 'abs 246 bstr ; byte string that contains the starting path 247 len ; number of bytes to use when adding more element 248 orig-len ; number of bytes to use when not adding more elements 249 extra-sep ; extra separator before first added element 250 add-ups? ; whether to add `up`s to the base string, as opposed to dropping them 251 drive?)) ; is bstr an absolute root? 252 253(define (make-starting-point kind 254 bstr 255 len 256 #:orig-len [orig-len len] 257 #:extra-sep [extra-sep #""] 258 #:add-ups? [add-ups? #f] 259 #:drive? [drive? #t]) 260 (list 261 (starting-point kind bstr len orig-len extra-sep add-ups? drive?))) 262 263(define (combine-build-elements elems unc-result?) 264 (cond 265 [(starting-point? (car elems)) 266 ;; in \\?\ mode for Windows 267 (define s (car elems)) 268 (cond 269 [(null? (cdr elems)) 270 (let ([bstr (subbytes (starting-point-bstr s) 271 0 272 (starting-point-orig-len s))]) 273 (cond 274 [(equal? bstr #"\\\\?\\REL") 275 #"."] 276 [(equal? bstr #"\\\\?\\RED") 277 #"\\"] 278 [else 279 (case (starting-point-kind s) 280 [(rel unc) 281 ;; Canonical form of \\?\REL\..[\..[etc.]] or \\?\UNC\[etc.] ends in slash: 282 (if (eqv? (bytes-ref bstr (sub1 (bytes-length bstr))) (char->integer #\\)) 283 bstr 284 (bytes-append bstr #"\\"))] 285 [else bstr])]))] 286 [else 287 (define init-bstr (subbytes (starting-point-bstr s) 288 0 289 (starting-point-len s))) 290 (apply bytes-append 291 init-bstr 292 (case (starting-point-kind s) 293 [(rel red) #"\\"] 294 [else #""]) 295 (starting-point-extra-sep s) 296 (cdr elems))])] 297 [else 298 ;; simple case... 299 (define bstr (apply bytes-append elems)) 300 ;; ... unless we've accidentally constructed something that 301 ;; looks like a \\?\ path or a UNC path, in which case we can 302 ;; correct by dropping a leading [back]slash 303 (cond 304 [(backslash-backslash-questionmark? bstr) 305 (subbytes bstr 1)] 306 [(and (not unc-result?) 307 (parse-unc bstr 0)) 308 (subbytes bstr 1)] 309 [else bstr])])) 310 311(define (convert-to-initial-backslash-backslash-questionmark bstr) 312 (cond 313 [(backslash-backslash-questionmark? bstr) 314 (define-values (kind drive-len orig-drive-len clean-start-pos add-sep) 315 (parse-backslash-backslash-questionmark bstr)) 316 (case kind 317 [(abs unc) 318 (append (reverse (extract-separate-parts bstr drive-len #:bbq-mode? #t)) 319 (if (equal? add-sep #"") 320 ;; drop implicit terminator in drive: 321 (make-starting-point kind bstr (sub1 drive-len) #:orig-len orig-drive-len) 322 (make-starting-point kind bstr drive-len #:orig-len orig-drive-len #:extra-sep (subbytes add-sep 1))))] 323 [else 324 ;; We can't back up over any dots before `dots-end`, 325 ;; so keep those toegether with \\?\REL 326 (define-values (dots-end literal-start) 327 (backslash-backslash-questionmark-dot-ups-end bstr (bytes-length bstr))) 328 (append (reverse (extract-separate-parts bstr literal-start #:bbq-mode? #t)) 329 (make-starting-point kind bstr (or dots-end 7) #:add-ups? (eq? kind 'rel) #:drive? #f))])] 330 [(parse-unc bstr 0) 331 => (lambda (root-len) 332 (define-values (machine volume) 333 (let ([l (extract-separate-parts (subbytes bstr 0 root-len) 0)]) 334 (values (car l) (cadr l)))) 335 (append (reverse (simplify-dots (extract-separate-parts bstr root-len) #:drop-leading? #t)) 336 (let* ([unc-bstr (bytes-append #"\\\\?\\UNC" machine volume)] 337 [unc-len (bytes-length unc-bstr)]) 338 (make-starting-point 'unc unc-bstr unc-len))))] 339 [(bytes=? #"." bstr) 340 (make-starting-point 'rel #"\\\\?\\REL" 7 #:add-ups? #t #:drive? #f)] 341 [(bytes=? #".." bstr) 342 (make-starting-point 'rel #"\\\\?\\REL\\.." 10 #:add-ups? #t #:drive? #f)] 343 [(is-sep? (bytes-ref bstr 0) 'windows) 344 (append (reverse (extract-separate-parts bstr 0)) 345 (make-starting-point 'red #"\\\\?\\RED" 7 #:drive? #f))] 346 [(and ((bytes-length bstr) . >= . 2) 347 (drive-letter? (bytes-ref bstr 0)) 348 (eqv? (bytes-ref bstr 1) (char->integer #\:))) 349 (append (reverse (simplify-dots (extract-separate-parts bstr 2) #:drop-leading? #t)) 350 (let ([drive-bstr (bytes-append #"\\\\?\\" (subbytes bstr 0 2) #"\\")]) 351 (make-starting-point 'abs drive-bstr 6 #:orig-len 7)))] 352 [else 353 ;; Create \\?\REL, combinding any leading dots into the \\?\REL part: 354 (define elems (simplify-dots (extract-separate-parts bstr 0) #:drop-leading? #f)) 355 (let loop ([dots null] [elems elems]) 356 (cond 357 [(or (null? elems) 358 (not (equal? (car elems) 'up))) 359 (append (reverse elems) 360 (let* ([rel-bstr (apply bytes-append #"\\\\?\\REL" dots)] 361 [rel-len (bytes-length rel-bstr)]) 362 (make-starting-point 'rel rel-bstr rel-len #:add-ups? #t #:drive? #f)))] 363 [else 364 (loop (cons (car elems) dots) (cdr elems))]))])) 365 366;; Split on separators, removing trailing whitespace from the last 367;; element, and prefix each extracted element with a backslash: 368(define (extract-separate-parts bstr pos 369 #:bbq-mode? [bbq-mode? #f] 370 #:keep-trailing-separator? [keep-trailing-separator? #f]) 371 (define (is-a-sep? b) 372 (if bbq-mode? 373 (eqv? b (char->integer #\\)) 374 (is-sep? b 'windows))) 375 (define len (bytes-length bstr)) 376 (let loop ([pos pos]) 377 (cond 378 [(= pos len) null] 379 [(is-a-sep? (bytes-ref bstr pos)) 380 (loop (add1 pos))] 381 [else 382 (let e-loop ([end-pos (add1 pos)]) 383 (cond 384 [(or (= end-pos len) 385 (is-a-sep? (bytes-ref bstr end-pos))) 386 (define rest (loop end-pos)) 387 (define elem-bstr (subbytes bstr pos end-pos)) 388 (define new-bstr (if (and (null? rest) 389 (not bbq-mode?)) 390 (strip-trailing-spaces elem-bstr) 391 elem-bstr)) 392 (define new-sub (cond 393 [(and (not bbq-mode?) 394 (bytes=? new-bstr #".")) 395 'same] 396 [(and (not bbq-mode?) 397 (bytes=? new-bstr #"..")) 398 'up] 399 [else 400 (if (and keep-trailing-separator? 401 (null? rest) 402 (end-pos . < . len)) 403 (bytes-append #"\\" new-bstr #"\\") 404 (bytes-append #"\\" new-bstr))])) 405 (cons new-sub rest)] 406 [else (e-loop (add1 end-pos))]))]))) 407 408;; Create a list containing one 'up for each ".." in the range: 409(define (extract-dot-ups bstr start dots-end) 410 (if (= start dots-end) 411 '() 412 (let loop ([i (add1 start)]) 413 (cond 414 [(i . >= . dots-end) '()] 415 [(and (eqv? (bytes-ref bstr i) (char->integer #\.)) 416 (eqv? (bytes-ref bstr (sub1 i)) (char->integer #\.))) 417 (cons 'up (loop (add1 i)))] 418 [else (loop (add1 i))])))) 419 420;; For \\?\REL paths, add an 'up at the start to the initial path. 421;; Otherwise, at a root, just drop an 'up. 422(define (starting-point-add-up s) 423 (cond 424 [(starting-point-add-ups? s) 425 (define bstr (bytes-append (subbytes (starting-point-bstr s) 426 0 427 (starting-point-len s)) 428 #"\\..")) 429 (define len (bytes-length bstr)) 430 (struct-copy starting-point s 431 [bstr bstr] 432 [len len] 433 [orig-len len])] 434 [else s])) 435 436(define (simplify-dots bstrs #:drop-leading? [drop-leading? #t]) 437 (let loop ([bstrs bstrs] [accum null]) 438 (cond 439 [(null? bstrs) (reverse accum)] 440 [(eq? 'same (car bstrs)) (loop (cdr bstrs) accum)] 441 [(eq? 'up (car bstrs)) (if (null? accum) 442 (if drop-leading? 443 (loop (cdr bstrs) accum) 444 (loop (cdr bstrs) (cons (car bstrs) accum))) 445 (loop (cdr bstrs) (cdr accum)))] 446 [else (loop (cdr bstrs) (cons (car bstrs) accum))]))) 447