1#lang racket/base 2(require racket/tcp 3 racket/format 4 racket/file 5 racket/port 6 racket/string 7 file/gunzip 8 file/private/check-path 9 openssl/sha1 10 net/base64 11 net/url 12 net/head 13 net/http-client 14 (only-in net/url-connect current-https-protocol)) 15 16;; Stefan Saasen's "Reimplementing 'git clone' in Haskell from the bottom up" 17;; http://stefan.saasen.me/articles/git-clone-in-haskell-from-the-bottom-up/ 18;; provided many helpful hints for this implementation. 19 20(provide git-checkout 21 current-git-username 22 current-git-password 23 (struct-out exn:fail:git)) 24 25(define-logger git-checkout) 26 27(define current-git-username (make-parameter #f)) 28(define current-git-password (make-parameter #f)) 29 30(struct exn:fail:git exn:fail () #:transparent) 31 32(define (raise-git-error name fmt . vals) 33 (raise (exn:fail:git (apply format (string-append "~s: " fmt) name vals) 34 (current-continuation-marks)))) 35 36;; Like `git clone`, but producing just the checkout 37(define (git-checkout host 38 repo 39 #:dest-dir dest-dir ; #f => only find checkout 40 #:transport [transport 'git] 41 #:ref [ref/head 'head] 42 #:initial-search-ref [initial-search-ref "master"] 43 #:depth [given-depth 1] 44 #:status-printf [status-printf (lambda args 45 (apply printf args) 46 (flush-output))] 47 #:initial-error [initial-error #f] 48 #:tmp-dir [given-tmp-dir #f] 49 #:clean-tmp-dir? [clean-tmp-dir? (not given-tmp-dir)] 50 #:verify-server? [verify? #t] 51 #:port [given-port #f] 52 #:strict-links? [strict-links? #f] 53 #:username [username (current-git-username)] 54 #:password [password (current-git-password)]) 55 (let retry-loop ([given-depth given-depth] [try-limit-depth (and given-depth 8)] [try-search-only-ref initial-search-ref]) 56 (define tmp-dir (or given-tmp-dir 57 (make-temporary-file "git~a" 'directory))) 58 (define port (or given-port (case transport 59 [(git) 9418] 60 [(http) 80] 61 [(https) 443]))) 62 63 (define (status fmt . args) 64 (define msg (apply format fmt args)) 65 (status-printf "~a\n" msg) 66 (log-git-checkout-info msg)) 67 68 (status "Contacting ~a" host) 69 (define-values (i o dumb-protocol?) 70 (initial-connect transport host verify? port repo status username password)) 71 ((let/ec esc 72 (dynamic-wind 73 void 74 (lambda () 75 (status "Getting refs~a" (if dumb-protocol? " [dumb protocol]" "")) 76 (write-pkt o 77 "git-upload-pack " "/" repo "\0" 78 "host=" host "\0") 79 (define pkts (if dumb-protocol? 80 ;; dumb protocol provides plain lines: 81 (for/list ([l (in-lines i)]) (string-append l "\n")) 82 ;; smart protocol provides packets: 83 (read-pkts i))) 84 (unless (pair? pkts) 85 (raise-git-error 'git-checkout "no initial pkts from the server")) 86 87 ;; Parse server's initial reply 88 (define server-capabilities (parse-server-capabilities (car pkts))) 89 (define refs ; (list (list <name> <ID>) ...) 90 (parse-initial-refs pkts initial-error)) 91 92 (define ref (head->ref ref/head 93 server-capabilities dumb-protocol? 94 transport host verify? port repo username password 95 status)) 96 97 ;; Find the commits needed for `ref`: 98 (define-values (ref-commit ; #f or an ID string 99 want-commits) ; list of ID string 100 (select-commits ref refs server-capabilities status try-search-only-ref repo)) 101 102 (unless dest-dir 103 (write-pkt o) ; clean termination 104 ((esc (lambda () 105 ;; If we get this far and `ref-commit` is #f, 106 ;; then `ref` looks like a commit 107 (or ref-commit ref))))) 108 109 (define depth (and given-depth 110 (or ref-commit (and try-limit-depth 111 (eq? given-depth 1))) 112 (cond 113 [(member "shallow" server-capabilities) 114 (if ref-commit 115 given-depth 116 try-limit-depth)] 117 [else 118 (status "Server does not support `shallow`") 119 #f]))) 120 121 (unless dumb-protocol? 122 ;; Tell the server which commits we need 123 (set!-values (i o) (want-step transport host port repo i o)) 124 (for ([want-commit (in-list want-commits)] 125 [pos (in-naturals)]) 126 (write-pkt o "want " want-commit (if (zero? pos) " " "") "\n")) 127 (when depth 128 (write-pkt o "deepen " depth "\n")) 129 (write-pkt o) 130 131 ;; Tell the server that we're ready for the objects 132 (write-pkt o "done\n") 133 (set!-values (i o) (done-step transport host verify? port repo username password i o)) 134 135 (when depth 136 ;; If we wrote `deepen`, then the server replies with `shallow`s. 137 ;; Note that these were available before writing `done` in the 138 ;; case of the 'git transport, but it works here for all transports. 139 (let loop () 140 (define r (read-pkt i)) 141 (cond 142 [(eof-object? r) 143 (void)] 144 [(regexp-match? #rx"^shallow " r) 145 (loop)] 146 [else 147 (raise-git-error 'git-checkout "expected shallow, got ~s" r)]))) 148 149 ;; Tell the server that we're ready for the objects 150 (define nak (read-pkt i)) 151 (unless (or (eof-object? nak) ; happens when `want-commits` is empty 152 (equal? #"NAK\n" nak)) 153 (raise-git-error 'git-checkout "expected NAK, got ~s" nak))) 154 155 (make-directory* tmp-dir) 156 (define tmp (make-tmp-info tmp-dir #:fresh? #t)) 157 158 (define (maybe-save-objects objs fn) 159 (unless clean-tmp-dir? 160 (call-with-output-file* 161 (build-path tmp-dir fn) 162 #:exists 'truncate 163 (lambda (o) (write objs o) (newline o))))) 164 165 (dynamic-wind 166 void 167 (lambda () 168 (define obj-ids 169 (cond 170 [dumb-protocol? 171 (read-dumb-objects want-commits tmp 172 transport host verify? port repo 173 status maybe-save-objects)] 174 [else 175 ;; Read packfile pbjects, which are written into 176 ;; `tmp-dir`. If `depth` gives the server trouble, 177 ;; we might get an EOF, in which case we'll try again: 178 (define objs 179 (read-packfile i tmp status 180 (and depth 181 (lambda () 182 (esc (lambda () 183 (status "Unexpected EOF; retrying without depth") 184 (retry-loop #f #f #f))))))) 185 (maybe-save-objects objs "objs") 186 187 ;; Convert deltas into full objects within `tmp`: 188 (rewrite-deltas objs tmp status)])) 189 190 (maybe-save-objects obj-ids "all-objs") 191 192 (define commit 193 (or ref-commit 194 (find-commit-as-reference ref obj-ids 195 (and (or try-search-only-ref 196 (and try-limit-depth 197 (eqv? depth try-limit-depth))) 198 (lambda () 199 (esc (lambda () 200 (cond 201 [(and depth (eqv? depth try-limit-depth) 202 (try-limit-depth . < . 32)) 203 (status "no matching commit found; trying deeper search") 204 (retry-loop given-depth (* try-limit-depth 2) try-search-only-ref)] 205 [else 206 (status "no matching commit found; trying broader search") 207 (retry-loop given-depth #f #f)])))))))) 208 209 ;; Extract the tree from the packfile objects: 210 (status "Extracting tree to ~a" dest-dir) 211 (extract-commit-tree (hex-string->bytes commit) 212 obj-ids tmp dest-dir 213 strict-links?) 214 215 ;; Done; return checkout id 216 (lambda () commit)) 217 (lambda () 218 (status "Cleaning up") 219 (close-tmp-info tmp) 220 (when clean-tmp-dir? 221 (delete-directory/files tmp-dir))))) 222 (lambda () 223 (close-input-port i) 224 (close-output-port o))))))) 225 226;; ---------------------------------------- 227;; Transports: git, http, and https 228 229(define (http-request-headers username password) 230 ;; bitbucket.org seems to require a "git" value for "User-Agent", 231 ;; otherwise it returns a "broken link" web page 232 (define base-headers '("User-Agent: git/1.9")) 233 ;; include an Authorization header if credentials are provided 234 (if (and username password) 235 (cons (~a "Authorization: Basic " (base64-encode (string->bytes/utf-8 (~a username ":" password)) #"")) 236 base-headers) 237 base-headers)) 238 239;; initial-connect: transport-sym string bool natural string status-proc string string 240;; -> (values input-port output-port boolean) 241;; Contacts the server and returns an output port for writing 242;; the request (ignored if not needed for the the transport) 243;; and an input port from reading the available references. The 244;; boolean result indicates whether the protocol is "dumb". 245(define (initial-connect transport host verify? port repo status username password) 246 (case transport 247 [(git) 248 (define-values (i o) (tcp-or-tunnel-connect "git" host port)) 249 (values i o #f)] 250 [(http https) 251 (define url-str 252 (~a transport "://" host ":" port "/" repo 253 "/info/refs?service=git-upload-pack")) 254 (define-values (i headers) 255 (parameterize ([current-https-protocol (ssl-context verify?)]) 256 (get-pure-port/headers (string->url url-str) 257 (http-request-headers username password) 258 #:redirections 5))) 259 (define ok? #f) 260 (dynamic-wind 261 void 262 (lambda () 263 (define dumb? 264 (cond 265 [(equal? (extract-field "Content-Type" headers) 266 "application/x-git-upload-pack-advertisement") 267 ;; "smart" protocol 268 (unless (regexp-match-peek #px#"^[0-9a-f]{4}#" i) 269 (raise-git-error 'git-checkout (~a "error reading repository content;\n" 270 " response is not consistent with the Git protocol\n" 271 " initial portion: ~s") 272 (read-bytes 640 i))) 273 (define pkt (read-pkt i)) 274 (define term-pkt (read-pkt i)) 275 (unless (eof-object? term-pkt) 276 (raise-git-error 'git-checkout (~a "expected a null packet, received something else\n" 277 " packet: ~s") 278 term-pkt)) 279 #f] 280 [else 281 ;; "dumb" protocol 282 #t])) 283 (set! ok? #t) 284 (values i (open-output-nowhere) dumb?)) 285 (lambda () 286 (unless ok? (close-input-port i))))] 287 [else 288 (raise-git-error 'git-checkout "unrecognized transport\n given: ~e" transport)])) 289 290;; want-step: transport-sym string natural string input-port output-port 291;; -> (values input-port output-port) 292;; Replaces the connection, if appropriate to the transport, for 293;; writing the wanted references. 294(define (want-step transport host port repo i o) 295 (case transport 296 [(git) (values i o)] 297 [(http https) 298 (close-input-port i) 299 (values (open-input-bytes #"") (open-output-bytes))])) 300 301;; done-step: transport-sym string bool natural string string string input-port output-port 302;; -> (values input-port output-port) 303;; Replaces the connection, if appropriate to the transport, after 304;; writing the wanted references and before reading the server's 305;; response. 306(define (done-step transport host verify? port repo username password i o) 307 (case transport 308 [(git) (values i o)] 309 [(http https) 310 (define s (get-output-bytes o)) 311 (define i 312 (parameterize ([current-https-protocol (ssl-context verify?)]) 313 (post-pure-port 314 (string->url 315 (~a transport "://" host ":" port "/" repo 316 "/git-upload-pack")) 317 s 318 (append 319 (http-request-headers username password) 320 (list "Content-Type: application/x-git-upload-pack-request"))))) 321 (values i (open-output-nowhere))])) 322 323;; converts 'head to a branch/tag/commit ref 324(define (head->ref ref/head 325 server-capabilities dumb-protocol? 326 transport host verify? port repo username password 327 status) 328 (cond 329 [(eq? ref/head 'head) 330 (or 331 ;; Git 1.8.5 and later (smart protocol) maps HEAD in capabilities 332 (for/or ([cap (in-list server-capabilities)]) 333 (define m (regexp-match #rx"^symref=HEAD:(.*)$" cap)) 334 (and m (refspec->ref (cadr m)))) 335 ;; dumb protocol: fetch the "HEAD" reference 336 (case (and dumb-protocol? transport) 337 [(http https) 338 (status "Getting HEAD") 339 (define i 340 (parameterize ([current-https-protocol (ssl-context verify?)]) 341 (get-pure-port 342 (string->url 343 (~a transport "://" host ":" port "/" repo 344 "/HEAD")) 345 (append 346 (http-request-headers username password) 347 (list "Content-Type: application/x-git-upload-pack-request"))))) 348 (define s (port->string i)) 349 (define m (regexp-match #rx"(?m:^ref: (.*)$)" s)) 350 (and m (refspec->ref (cadr m)))] 351 [else #f]) 352 ;; If all else fails, keep 'head and try to match "HEAD" in refs 353 'head)] 354 [else ref/head])) 355 356(define (ssl-context verify?) 357 (cond 358 [(or (not verify?) 359 (getenv "GIT_SSL_NO_VERIFY")) 360 (current-https-protocol)] 361 [else 362 'secure])) 363 364(define (refspec->ref refspec) 365 (cond 366 [(regexp-match #rx"^refs/(?:heads|tags)/(.*)$" refspec) 367 => (lambda (m) (cadr m))] 368 [else refspec])) 369 370;; ---------------------------------------- 371 372;; parse-server-capabilities : bytes -> (listof string) 373(define (parse-server-capabilities first-pkt) 374 (let ([m (regexp-match #rx#"\0(.*)\n$" first-pkt)]) 375 (cond 376 [m (string-split (bytes->string/utf-8 (cadr m)))] 377 [else null]))) 378 379;; parse-initial-refs : (listof bytes) -> (listof (list bytes string)) 380;; In each element of the returned list, first item is 381;; the branch or tag name, second is the ID 382(define (parse-initial-refs pkts initial-error) 383 (filter 384 values 385 (for/list ([pkt (in-list pkts)]) 386 (define m (regexp-match #px#"^([0-9a-fA-F]{40})[ \t]([^\0\n]+)[\0\n]" pkt)) 387 (unless m 388 (when initial-error (initial-error)) 389 (raise-git-error 'git-checkout "could not parse ref pkt\n pkt: ~s" pkt)) 390 (define name (caddr m)) 391 (define id (bytes->string/utf-8 (cadr m))) 392 (cond 393 [(regexp-match? #rx#"\\^{}$" name) 394 ;; drop parent references 395 #f] 396 [else (list name id)])))) 397 398;; select-commits : (or/c string 'head) (listof (list bytes string)) .... 399;; -> (values string-or-#f (listof string)) 400;; Convert the user's request `ref`, which is a branch or tag or ID, 401;; into a specific ID --- if we can determine it from the server's 402;; initial response. If we can, the list of requested IDs will be 403;; just that one. Otherwise, we'll have to return a list of all 404;; IDs, and then we'll look for the reference later. 405(define (select-commits ref refs server-capabilities status try-search-only-ref repo) 406 (define ref-looks-like-id? (and (string? ref) 407 (regexp-match? #rx"^[0-9a-f]+$" ref))) 408 (define ref-rx (cond 409 [(eq? ref 'head) 410 ;; some servers report "HEAD" early, and we can 411 ;; expect it early in the list; if that fails, 412 ;; fall back to trying a "master" branch: 413 #"^HEAD|refs/heads/master$"] 414 [else (branch-or-tag->regexp ref)])) 415 416 (define ref-commit 417 (or 418 ;; Search list of branch and tag names: 419 (for/or ([ref (in-list refs)]) 420 (and (regexp-match? ref-rx (car ref)) 421 (cadr ref))) 422 ;; Try matching the references as a commit/tag ID of a branch or tag: 423 (and (string? ref) 424 (let ([rx (id-ref->regexp ref)]) 425 (for/or ([a-ref (in-list refs)]) 426 (and (regexp-match? rx (cadr a-ref)) 427 (begin 428 (status "Commit id ~s matches ~a" ref (car a-ref)) 429 (cadr a-ref)))))))) 430 (define want-commits 431 (cond 432 [ref-commit (list ref-commit)] 433 [ref-looks-like-id? 434 (cond 435 [try-search-only-ref 436 (status "Requested reference looks like commit id; try within ~a" try-search-only-ref) 437 (define master-rx (branch-or-tag->regexp try-search-only-ref)) 438 (cond 439 [(for/or ([ref (in-list refs)]) 440 (regexp-match? master-rx (car ref))) 441 (define-values (master-ref-commit want-commits) 442 (select-commits try-search-only-ref refs '() status #f repo)) 443 want-commits] 444 [else 445 (status "There does not appear to be a ~a branch or tag, however" try-search-only-ref) 446 ;; the branch to try appears not to exist 447 null])] 448 [else 449 (status "Requested reference looks like commit id; getting all commits") 450 (for/list ([ref (in-list refs)]) 451 (cadr ref))])] 452 [else 453 (raise-git-error 'git "could not find requested reference\n reference: ~a\n repo: ~a" ref repo)])) 454 455 (values ref-commit want-commits)) 456 457(define (branch-or-tag->regexp ref) 458 (byte-regexp (bytes-append 459 #"^refs/(?:heads|tags)/" 460 (regexp-quote (string->bytes/utf-8 ref)) 461 #"$"))) 462 463;; ---------------------------------------- 464;; A "pkt" is the basic unit of communication in many parts 465;; of the git protocol. The first four bytes specify the 466;; length of the package (including those bytes). 467 468;; write-pkt : output-port any ... -> void 469;; `display`s each argument to create a package 470(define (write-pkt o . args) 471 (define s (open-output-bytes)) 472 (for ([arg (in-list args)]) 473 (display arg s)) 474 (define msg (get-output-bytes s)) 475 (define len (bytes-length msg)) 476 (define full-msg 477 (cond 478 [(zero? len) #"0000"] 479 [else 480 (define len-bstr (string->bytes/utf-8 (format "~x" (+ 4 len)))) 481 (bytes-append (make-bytes (- 4 (bytes-length len-bstr)) 482 (char->integer #\0)) 483 len-bstr 484 msg)])) 485 (write-bytes full-msg o) 486 (flush-output o)) 487 488;; read-pkt : input-port -> bstr-or-eof 489;; Reads one pkt, returning eof of the special "null" pkt 490(define (read-pkt i) 491 (define len-bstr (read-bytes 4 i)) 492 (cond 493 [(eof-object? len-bstr) eof] 494 [else 495 (unless (and (bytes? len-bstr) 496 (= 4 (bytes-length len-bstr))) 497 (raise-git-error 'git-checkout "error getting pkt length")) 498 (define len (string->number (bytes->string/utf-8 len-bstr #\?) 16)) 499 (unless len 500 (raise-git-error 'git-checkout "error getting pkt length\n length string: ~e" len-bstr)) 501 (cond 502 [(= len 0) eof] ; flush pkt 503 [else 504 (define payload-len (- len 4)) 505 (unless (payload-len . >= . 0) 506 (raise-git-error 'git-checkout "pkt length makes no sense\n length: ~a" len)) 507 (read-bytes-exactly 'payload payload-len i)])])) 508 509;; read a list of pkts until an empty packet is found 510(define (read-pkts i) 511 (define pkt (read-pkt i)) 512 (if (or (eof-object? pkt) 513 (equal? #"" pkt)) 514 null 515 (cons pkt (read-pkts i)))) 516 517;; ---------------------------------------- 518;; Packfile objects 519 520(struct object (location ; filename within tmp or position in small-object file 521 type ; 'blob, 'commit, etc.; see `type-num->sym` 522 [type-info #:mutable] ; #f, id, or object 523 id ; sha1 as bytes 524 [undelta #:mutable]) 525 #:prefab) 526 527;; read-packfile : input-port tmp-info status-proc (or/c #f (-> any)) -> (listof object) 528;; The `initial-eof-handler` argument should escape, if it's not #f 529(define (read-packfile i tmp status initial-eof-handler) 530 (define pack-bstr (read-bytes 4 i)) 531 (unless (equal? pack-bstr #"PACK") 532 (when (and (eof-object? pack-bstr) 533 initial-eof-handler) 534 (initial-eof-handler)) 535 (raise-git-error 'git-checkout "header error\n bytes: ~s" pack-bstr)) 536 (define vers (read-bytes 4 i)) 537 (unless (equal? vers #"\0\0\0\2") 538 (raise-git-error 'git-checkout "only version 2 supported")) 539 (define count-bstr (read-bytes-exactly 'count 4 i)) 540 (define count (integer-bytes->integer count-bstr #t #t)) 541 (define obj-stream-poses (make-hash)) ; for OBJ_OFS_DELTA references 542 (status "Getting ~a objects" count) 543 (for/list ([pos (in-range count)]) 544 (read-object i tmp obj-stream-poses))) 545 546(define OBJ_COMMIT 1) 547(define OBJ_TREE 2) 548(define OBJ_BLOB 3) 549(define OBJ_TAG 4) 550(define OBJ_OFS_DELTA 6) 551(define OBJ_REF_DELTA 7) 552 553(define type-num->sym (hash OBJ_COMMIT 'commit 554 OBJ_TREE 'tree 555 OBJ_BLOB 'blob 556 OBJ_TAG 'tag 557 OBJ_OFS_DELTA 'ofs-delta 558 OBJ_REF_DELTA 'ref-delta)) 559(define valid-types (for/list ([v (in-hash-values type-num->sym)]) v)) 560 561;; read-object : input-port tmp-info (hash-of integer obj) -> object 562(define (read-object i tmp obj-stream-poses) 563 (define obj-stream-pos (file-position i)) 564 (define c (read-byte-only 'type-and-size i)) 565 (define type (bitwise-and (arithmetic-shift c -4) #x7)) 566 (when (zero? type) (raise-git-error 'git-checkout "bad packfile type")) 567 (define init-len (bitwise-and c #xF)) 568 (define len 569 (if (msb-set? c) 570 (+ init-len (arithmetic-shift (read-integer i) 4)) 571 init-len)) 572 (define type-sym (hash-ref type-num->sym type)) 573 (define type-info 574 (case type-sym 575 [(ref-delta) 576 (read-bytes-exactly 'referenced-id 20 i)] 577 [(ofs-delta) 578 (define delta (read-offset-integer i)) 579 (hash-ref obj-stream-poses (- obj-stream-pos delta) 580 (lambda () (raise-git-error 'git-checkout "OBJ_OFS_DELTA object not found")))] 581 [else #f])) 582 (define obj 583 (save-object (lambda (o) (zlib-inflate i o)) len type-sym type-info tmp)) 584 (hash-set! obj-stream-poses obj-stream-pos obj) 585 obj) 586 587;; save-object : (output-port ->) integer symbol any tmp-info -> object 588(define (save-object write-data len type-sym type-info tmp) 589 (define filename (~a (case type-sym 590 [(ref-delta ofs-delta) "delta"] 591 [else "obj"]) 592 (increment-object-count! tmp))) 593 (define location 594 (call-with-output-object 595 tmp 596 filename 597 len 598 write-data)) 599 (construct-object location type-sym type-info len tmp)) 600 601;; To build an `object`, we need to construct a SHA-1 from the object 602;; content, which is in a file in `tmp` 603(define (construct-object filename type-sym type-info size tmp) 604 (object filename type-sym type-info 605 (call-with-input-object 606 tmp 607 filename 608 (lambda (i) 609 (define prefix (~a type-sym " " size "\0")) 610 (sha1-bytes (input-port-append #f 611 (open-input-string prefix) 612 i)))) 613 #f)) 614 615;; rewrite-deltas : (listof object) tmp status -> (hashof bytes object) 616;; Given a mapping from ids to objects, combine each "delta" file with 617;; a referenced object to create a new object file. The deltas, 618;; referenced objects, and generated objects all are in `tmp`. The 619;; result is an id-to-object mapping that includes all the given 620;; objects plus the generated ones. 621(define (rewrite-deltas objs tmp status) 622 (status "Applying deltas") 623 (define ids (hash-copy 624 (for/hash ([obj (in-list objs)]) 625 (values (object-id obj) obj)))) 626 (for ([obj (in-list objs)]) 627 (case (object-type obj) 628 [(ref-delta ofs-delta) 629 (define base-obj-id (if (eq? (object-type obj) 'ref-delta) 630 ;; Base object is referenced directly: 631 (object-type-info obj) 632 ;; We have to find the object generated by another 633 ;; "object", where the "object" may be a delta: 634 (let ([v (object-type-info obj)]) 635 (set-object-type-info! obj (object-id v)) 636 (case (object-type v) 637 [(ref-delta ofs-delta) (object-undelta v)] 638 [else (object-id v)])))) 639 (define base-obj (hash-ref ids base-obj-id)) 640 (define new-filename (~a "obj" (increment-object-count! tmp))) 641 (call-with-input-object 642 tmp 643 ;; the delta file: 644 (object-location obj) 645 (lambda (i) 646 (call-with-input-object 647 tmp 648 ;; apply delta to this base object: 649 (object-location base-obj) 650 (lambda (src-in) 651 (define src-len (read-integer i)) 652 (define dest-len (read-integer i)) 653 (define location 654 (call-with-output-object 655 tmp 656 ;; write to this new object: 657 new-filename 658 dest-len 659 (lambda (o) 660 ;; Each delta command is either "copy" or "insert" 661 (let loop () 662 (define c (read-byte i)) 663 (cond 664 [(eof-object? c) 665 (void)] 666 [(msb-set? c) 667 ;; Copy 668 (define src-offset (read-number-by-bits i (bitwise-and c #xF))) 669 (define raw-src-len (read-number-by-bits i (bitwise-and (arithmetic-shift c -4) 670 #x7))) 671 (define src-len (if (zero? raw-src-len) #x10000 raw-src-len)) 672 (file-position src-in src-offset) 673 (copy-port-n src-in o src-len) 674 (loop)] 675 [else 676 ;; Insert 677 (copy-port-n i o c) 678 (loop)]))))) 679 ;; Add the generated object to our table: 680 (define new-obj (construct-object location (object-type base-obj) #f 681 dest-len tmp)) 682 (hash-set! ids (object-id new-obj) new-obj) 683 ;; Record undelta id: 684 (set-object-undelta! obj (object-id new-obj))))))])) 685 ids) 686 687;; ---------------------------------------- 688;; Finding a commit id 689 690(define (find-commit-as-reference ref obj-ids fail-not-found) 691 (define rx (id-ref->regexp ref)) 692 (define matches 693 (for/list ([(id obj) (in-hash obj-ids)] 694 #:when (eq? 'commit (object-type obj)) 695 #:when (regexp-match? rx (bytes->hex-string id))) 696 (bytes->hex-string id))) 697 (cond 698 [(= 1 (length matches)) (car matches)] 699 [(null? matches) 700 (if fail-not-found 701 (fail-not-found) 702 (raise-git-error 'git-checkout "no commit found matching id: ~a" ref))] 703 [else 704 (raise-git-error 'git-checkout "found multiple commits matching id: ~a" ref)])) 705 706(define (id-ref->regexp ref) 707 (regexp (~a "^" (regexp-quote (string-downcase ref))))) 708 709;; ---------------------------------------- 710;; Extract a checkout tree 711 712;; extract-commit-tree : bytes (hash/c bytes object) tmp-info path -> void 713;; Extract the designated commit to `dest-dir`, using objects from `tmp` 714(define (extract-commit-tree obj-id obj-ids tmp dest-dir strict-links?) 715 (define obj (hash-ref obj-ids obj-id)) 716 (case (object-type obj) 717 [(commit) 718 (define-values (tree-id-str parent-id-strs) 719 (call-with-input-object 720 tmp 721 (object-location obj) 722 (lambda (i) 723 (extract-commit-info i obj-id)))) 724 (define tree-id (hex-string->bytes tree-id-str)) 725 (extract-tree tree-id obj-ids tmp dest-dir strict-links?)] 726 [(tag) 727 (define commit-id-bstr 728 (call-with-input-object 729 tmp 730 (object-location obj) 731 (lambda (i) 732 (define m (regexp-try-match #px"^object ([0-9a-fA-F]{40})" i)) 733 (unless m 734 (raise-git-error 'git-checkout "cannot extract commit from tag file for ~s" 735 (bytes->hex-string obj-id))) 736 (cadr m)))) 737 (define commit-id (hex-string->bytes (bytes->string/utf-8 commit-id-bstr))) 738 (extract-commit-tree commit-id obj-ids tmp dest-dir strict-links?)] 739 [(tree) 740 (extract-tree obj-id obj-ids tmp dest-dir strict-links?)] 741 [else 742 (raise-git-error 'git-checkout "cannot extract tree from ~a: ~s" 743 (object-type obj) 744 (bytes->hex-string obj-id))])) 745 746;; extract-commit-info: input-port bytes -> string (listof string) 747;; Returns the commit's tree and parent ids. 748;; The `obj-id` argument is used for error reporting, only. 749(define (extract-commit-info i obj-id) 750 (define m (regexp-try-match #px"^tree ([0-9a-fA-F]{40})" i)) 751 (unless m 752 (raise-git-error 'git-checkout 753 (~a "cannot extract tree from commit file for ~s\n" 754 " content starts: ~s") 755 (bytes->hex-string obj-id) 756 (peek-bytes 64 0 i))) 757 (values 758 ;; tree id string: 759 (bytes->string/utf-8 (cadr m)) 760 ;; Loop for parent ids strings: 761 (let loop () 762 (define m (regexp-try-match #px"^\nparent ([0-9a-fA-F]{40})" i)) 763 (if m 764 (cons (bytes->string/utf-8 (cadr m)) 765 (loop)) 766 null)))) 767 768;; extract-tree : bytes (hash/c bytes object) tmp-info path -> void 769;; Extract the designated tree to `dest-dir`, using objects from `tmp` 770(define (extract-tree tree-id obj-ids tmp dest-dir strict-links?) 771 (make-directory* dest-dir) 772 (define tree-obj (hash-ref obj-ids tree-id)) 773 (call-with-input-object 774 tmp 775 (object-location tree-obj) 776 (lambda (i) 777 (let loop () 778 (define-values (id mode fn) (extract-tree-entry i)) 779 (when id 780 (define (this-object-location) 781 (object-location (hash-ref obj-ids id))) 782 (define (copy-this-object perms) 783 (copy-object tmp 784 (this-object-location) 785 perms 786 (build-path dest-dir fn))) 787 (case (datum-intern-literal mode) 788 [(#"100755") #"755" 789 (copy-this-object #o755)] 790 [(#"100644" #"644") 791 (copy-this-object #o644)] 792 [(#"40000" #"040000") 793 (extract-tree id obj-ids tmp (build-path dest-dir fn) strict-links?)] 794 [(#"120000") 795 (define target (bytes->path (object->bytes tmp (this-object-location)))) 796 (when strict-links? 797 (check-unpack-path 'git-checkout target #:kind "link")) 798 (make-file-or-directory-link target (build-path dest-dir fn))] 799 [(#"160000") 800 ;; submodule; just make a directory placeholder 801 (make-directory* (build-path dest-dir fn))] 802 [else 803 (raise-git-error 'extract-tree "unknown mode: ~s" mode)]) 804 (loop)))))) 805 806;; extract-tree-entry: input-port -> bytes-or-#f bytes-or-#f path-or-#f 807(define (extract-tree-entry i) 808 (define m (regexp-try-match #px"^([0-7]{3,6}) ([^\0]+)\0" i)) 809 (cond 810 [m 811 (define id (read-bytes-exactly 'id 20 i)) 812 (define mode (cadr m)) 813 (define fn (bytes->path-element (caddr m))) 814 (values id mode fn)] 815 [else 816 (values #f #f #f)])) 817 818;; ---------------------------------------- 819;; ``Dumb'' HTTP(S) server support 820 821;; read-dumb-objects : (listof string) (hash-of string object) 822;; symbol string boolean integer string 823;; status maybe-save-objects 824;; -> (hash-of string object) 825;; Read the package files available on the server, then round up 826;; any additional loose objects that we'll need. 827(define (read-dumb-objects id-strs tmp 828 transport host verify? port repo 829 status maybe-save-objects) 830 (define conn (http-conn)) 831 (http-conn-open! conn 832 host 833 #:ssl? (if (eq? transport 'https) 834 (ssl-context verify?) 835 #f) 836 #:port port 837 #:auto-reconnect? #t) 838 839 (define packfiles 840 (get-packfile-list conn repo)) 841 842 (define packed-objects 843 (for/fold ([objects (hash)]) ([packfile (in-list packfiles)]) 844 (read-dumb-packfile packfile objects tmp conn repo status))) 845 846 (maybe-save-objects packed-objects "packed-objs") 847 848 (status "Downloading loose objects") 849 (define objects 850 (read-dumb-loose-objects id-strs packed-objects (make-hash) 851 tmp conn repo status)) 852 853 (http-conn-close! conn) 854 855 (for/hash ([obj (in-hash-values objects)]) 856 (values (object-id obj) obj))) 857 858;; get-packfile-list : conn string -> (listof string) 859;; Get a list of packfiles available from the server 860(define (get-packfile-list conn repo) 861 (define-values (status-line headers i) 862 (http-conn-sendrecv! conn 863 (~a "/" repo "/objects/info/packs"))) 864 (check-status status-line "error getting packfile list") 865 866 (for/list ([l (in-lines i)] 867 #:unless (equal? l "")) 868 (define m (regexp-match #rx"^P (.*)" l)) 869 (unless m (raise-git-error 'git-checkout "error parsing packfile list line\n line: ~e" l)) 870 (cadr m))) 871 872;; read-dumb-packfile : string (hashof string object) tmp conn strung status 873;; -> (hashof string object) 874;; Read a packfile and apply its deltas, producing an updated mapping of objects 875;; that we have unpacked so far. 876(define (read-dumb-packfile packfile objects tmp conn repo status) 877 (define-values (status-line headers i) 878 (http-conn-sendrecv! conn 879 (~a "/" repo "/objects/pack/" packfile))) 880 (check-status status-line (~a "error getting packfile " packfile)) 881 882 (define obj-list (read-packfile i tmp status #f)) 883 (define obj-ids (rewrite-deltas obj-list tmp status)) 884 885 ;; Add new objects to hash table: 886 (for/fold ([objects objects]) ([obj (in-hash-values obj-ids)]) 887 (hash-set objects (bytes->hex-string (object-id obj)) obj))) 888 889;; read-dumb-loose-objects : (listof string) (hash-of string object) 890;; (mutable-hash-of string #t) 891;; conn string status 892;; -> (hash-of string object) 893;; Traverse the tree, looking for extra objects (not supplied by a packfile) 894;; that we need to download 895(define (read-dumb-loose-objects id-strs objects seen tmp conn repo status) 896 (for/fold ([objects objects]) ([id-str (in-list id-strs)]) 897 (cond 898 [(hash-ref seen id-str #f) objects] 899 [else 900 (define obj 901 (cond 902 [(hash-ref objects id-str #f) 903 => (lambda (obj) obj)] 904 [else 905 (define-values (status-line headers compressed-i) 906 (http-conn-sendrecv! conn 907 (~a "/" repo 908 "/objects/" (substring id-str 0 2) 909 "/" (substring id-str 2)))) 910 (check-status status-line (format "error getting object ~a" id-str)) 911 912 ;; Set up decompression of stream: 913 (define-values (i decompressed-o) (make-pipe 4096)) 914 (define exn #f) 915 (define inflate-thread 916 (thread (lambda () 917 (dynamic-wind 918 void 919 (lambda () 920 (with-handlers ([values (lambda (x) (set! exn x))]) 921 (zlib-inflate compressed-i decompressed-o))) 922 (lambda () 923 (close-output-port decompressed-o) 924 (close-input-port compressed-i)))))) 925 926 ;; Parse the object description: 927 (define header-m (regexp-try-match #rx#"^[^\0]*\0" i)) 928 (unless header-m 929 (raise-git-error 'git-checkout "bad initial line for object content")) 930 (define header (car header-m)) 931 (define header-len (bytes-length header)) 932 (define type-sym (string->symbol 933 (bytes->string/utf-8 (car (regexp-match #rx"^[^ ]*" header))))) 934 (define data-len (string->number 935 (bytes->string/utf-8 (cadr (or (regexp-match #rx"[^ ]* ([0-9]+)" header) 936 '(#"" #"")))))) 937 (unless (memq type-sym valid-types) 938 (raise-git-error 'git-checkout "bad type: ~e" type-sym)) 939 940 (define obj 941 (save-object (lambda (o) (copy-port i o)) 942 data-len type-sym #f tmp)) 943 944 ;; Just in case: 945 (kill-thread inflate-thread) 946 (close-input-port compressed-i) 947 (when exn (raise exn)) 948 949 obj])) 950 951 ;; Add the (potentially) new object to out table: 952 (define new-objects (hash-set objects id-str obj)) 953 (hash-set! seen id-str #t) 954 955 ;; Inspect the new object, looking for additional objects to download: 956 (define id (object-id obj)) 957 (define (call-with-content proc) 958 (call-with-input-object 959 tmp 960 (object-location obj) 961 proc)) 962 (define more-id-strs 963 (case (object-type obj) 964 [(commit) 965 (define-values (tree parents) 966 (call-with-content (lambda (i) (extract-commit-info i id)))) 967 (cons tree parents)] 968 [(tree) 969 (call-with-content 970 (lambda (i) 971 (let loop () 972 (define-values (content-id mode fn) (extract-tree-entry i)) 973 (cond 974 [(not content-id) null] 975 [(equal? mode #"160000") 976 ;; don't try to get a submodule commit 977 (loop)] 978 [else 979 (cons (bytes->hex-string content-id) 980 (loop))]))))] 981 [else 982 null])) 983 984 (read-dumb-loose-objects more-id-strs new-objects seen 985 tmp conn repo status)]))) 986 987;; check-status : string string -> any 988;; Check an HTTP status result and complain if there's a problem 989(define (check-status status-line msg) 990 (define status (let ([m (regexp-match #rx"^[^ ]* ([0-9]+)" status-line)]) 991 (and m (string->number (bytes->string/utf-8 (cadr m)))))) 992 (unless (memv status '(200)) 993 (raise-git-error 'git-checkout "~a\n server respone: ~a" 994 msg 995 status-line))) 996 997;; ---------------------------------------- 998;; Temporary directory & database 999 1000(struct tmp-info (dir small-i small-o [pos #:mutable] [flush? #:mutable] [obj-counter #:mutable])) 1001 1002;; make-tmp-info : path -> tmp-info 1003(define (make-tmp-info tmp-dir #:fresh? [fresh? #f]) 1004 (define-values (i o) (open-input-output-file 1005 (build-path tmp-dir "objs-small") 1006 #:exists (if fresh? 'truncate 'update))) 1007 (file-stream-buffer-mode i 'none) 1008 (tmp-info tmp-dir i o 0 #f 0)) 1009 1010;; close-tmp-info : tmp-info -> void 1011(define (close-tmp-info tmp) 1012 (close-input-port (tmp-info-small-i tmp)) 1013 (close-output-port (tmp-info-small-o tmp))) 1014 1015;; increment-object-count! : tmp-info -> integer 1016(define (increment-object-count! tmp) 1017 (define n (add1 (tmp-info-obj-counter tmp))) 1018 (set-tmp-info-obj-counter! tmp n) 1019 n) 1020 1021;; call-with-output-object : tmp-info string natural (output-port -> any) -> location 1022(define (call-with-output-object tmp filename len proc) 1023 (define (check-len got-len) 1024 (unless (= len got-len) 1025 (raise-git-error 'git-checkout "size mismatch\n expected: ~a\n received: ~a" 1026 len 1027 got-len))) 1028 (cond 1029 [(len . < . 256) 1030 (define location (tmp-info-pos tmp)) 1031 (define s (open-output-bytes)) 1032 (proc s) 1033 (let ([bstr (get-output-bytes s)]) 1034 (check-len (bytes-length bstr)) 1035 (define o (tmp-info-small-o tmp)) 1036 (file-position o location) 1037 (write-bytes bstr o) 1038 (set-tmp-info-pos! tmp (+ len (tmp-info-pos tmp))) 1039 (set-tmp-info-flush?! tmp #t)) 1040 (cons location len)] 1041 [else 1042 (define path (build-path (tmp-info-dir tmp) filename)) 1043 (call-with-output-file* path proc #:exists 'truncate) 1044 (check-len (file-size path)) 1045 filename])) 1046 1047;; call-with-input-object : tmp-info location (input-port -> X) -> X 1048(define (call-with-input-object tmp location proc) 1049 (cond 1050 [(pair? location) 1051 (define bstr (object->bytes tmp location)) 1052 (proc (open-input-bytes bstr))] 1053 [else 1054 (call-with-input-file* (build-path (tmp-info-dir tmp) location) proc)])) 1055 1056;; copy-object : tmp-info location integer path -> void 1057(define (copy-object tmp location perms dest-file) 1058 (cond 1059 [(pair? location) 1060 (define bstr (object->bytes tmp location)) 1061 (call-with-output-file* 1062 dest-file 1063 #:exists 'truncate 1064 (lambda (o) (write-bytes bstr o)))] 1065 [else 1066 (copy-file (build-path (tmp-info-dir tmp) location) 1067 dest-file 1068 #t)]) 1069 (unless (equal? 'windows (system-type 'os)) 1070 (file-or-directory-permissions dest-file perms))) 1071 1072;; object->bytes : tmp-info location -> bytes 1073(define (object->bytes tmp location) 1074 (cond 1075 [(pair? location) 1076 (when (tmp-info-flush? tmp) 1077 (flush-output (tmp-info-small-o tmp)) 1078 (set-tmp-info-flush?! tmp #f)) 1079 (define i (tmp-info-small-i tmp)) 1080 (file-position i (car location)) 1081 (read-bytes (cdr location) i)] 1082 [else 1083 (file->bytes (build-path (tmp-info-dir tmp) location))])) 1084 1085;; ---------------------------------------- 1086;; Utils 1087 1088(define (read-bytes-exactly what len i) 1089 (define bstr (read-bytes len i)) 1090 (unless (and (bytes? bstr) 1091 (= (bytes-length bstr) len)) 1092 (raise-git-error 'git-checkout (~a "error getting bytes for ~a\n" 1093 " expected length: ~a\n" 1094 " got length: ~a") 1095 what 1096 len 1097 (if (eof-object? bstr) 1098 eof 1099 (bytes-length bstr)))) 1100 bstr) 1101 1102(define (read-byte-only what i) 1103 (define c (read-byte i)) 1104 (unless (byte? c) 1105 (raise-git-error 'git-checkout "expected to get a byte for ~a, got enf-of-file" what)) 1106 c) 1107 1108;; copy-port-n : input-port output-port natural -> void 1109(define (copy-port-n i o n) 1110 (cond 1111 [(n . <= . 4096) 1112 (define bstr (read-bytes n i)) 1113 (unless (and (bytes? bstr) 1114 (= (bytes-length bstr) n)) 1115 (raise-git-error 'git-checkout "not enough bytes during copy")) 1116 (write-bytes bstr o)] 1117 [else 1118 (copy-port-n i o 4096) 1119 (copy-port-n i o (- n 4096))])) 1120 1121(define (msb-set? c) 1122 (bitwise-bit-set? c 7)) 1123 1124;; A common integer encoding is a sequence of 7-bit 1125;; piecs of the number, where the most-significant bit 1126;; indicates whether the number continues 1127(define (read-integer i) 1128 (let loop ([amt 0] [shift 0]) 1129 (define c (read-byte i)) 1130 (cond 1131 [(eof-object? c) amt] 1132 [else 1133 (define new-amt (+ amt 1134 (arithmetic-shift (bitwise-and c #x7F) shift))) 1135 (if (msb-set? c) 1136 (loop new-amt (+ shift 7)) 1137 new-amt)]))) 1138 1139;; Similar to read-integer, but for (negative) offsets 1140(define (read-offset-integer i) 1141 (define c (read-byte i)) 1142 (cond 1143 [(eof-object? c) 0] 1144 [else 1145 (define delta (bitwise-and c #x7F)) 1146 (cond 1147 [(not (msb-set? c)) delta] 1148 [else 1149 (let loop ([delta delta]) 1150 (define c (read-byte i)) 1151 (cond 1152 [(eof-object? c) delta] 1153 [else 1154 (let ([delta (+ (arithmetic-shift (+ delta 1) 7) 1155 (bitwise-and c #x7F))]) 1156 (if (msb-set? c) 1157 (loop delta) 1158 delta))]))])])) 1159 1160;; Another number format, where a bitmap `n` indicates 1161;; when to read a byte 1162(define (read-number-by-bits i n) 1163 (cond 1164 [(zero? n) 0] 1165 [else 1166 (+ (if (bitwise-bit-set? n 0) 1167 (read-byte i) 1168 0) 1169 (arithmetic-shift (read-number-by-bits i (arithmetic-shift n -1)) 1170 8))])) 1171 1172;; ADLER32 implementation 1173;; https://www.ietf.org/rfc/rfc1950.txt 1174(define (adler32-through-ports in out) 1175 (define ADLER 65521) 1176 (define bstr (make-bytes 4096)) 1177 (let loop ([s1 1] [s2 0]) 1178 (define n (read-bytes! bstr in)) 1179 (cond 1180 [(eof-object? n) 1181 (bitwise-ior (arithmetic-shift s2 16) s1)] 1182 [else 1183 (write-bytes bstr out 0 n) 1184 (define-values (new-s1 new-s2) 1185 (for/fold ([s1 s1] 1186 [s2 s2]) 1187 ([bits (in-bytes bstr 0 n)]) 1188 (define a (modulo (+ s1 bits) ADLER)) 1189 (define b (modulo (+ s2 a) ADLER)) 1190 (values a b))) 1191 (loop new-s1 new-s2)]))) 1192 1193;; zlib-inflate : input-port output-port 1194;; Reads compressed data from `i`, writes uncompressed to `o` 1195(define (zlib-inflate i o) 1196 (define cmf (read-byte-only 'zlib-cmf i)) 1197 (define flg (read-byte-only 'zlib-flag i)) 1198 (unless (= 8 (bitwise-and cmf #xF)) 1199 (raise-git-error 'git-checkout "compression is not `deflate`")) 1200 (when (bitwise-bit-set? flg 5) 1201 ;; read dictid 1202 (read-bytes-exactly 'dictid 4 i)) 1203 ;; Include adler32 checksum in the pipeline, writing to `o`: 1204 (define-values (checksum-in checksum-out) (make-pipe 4096)) 1205 (define uncompressed-adler #f) 1206 (define checksum-thread 1207 (thread 1208 (lambda () (set! uncompressed-adler (adler32-through-ports checksum-in o))))) 1209 ;; Inflate, sending output to checksum (and then to `o`): 1210 (inflate i checksum-out) 1211 (close-output-port checksum-out) 1212 (sync checksum-thread) 1213 ;; Verify checksum 1214 (define adler (read-bytes-exactly 'adler-checksum 4 i)) 1215 (unless (= (integer-bytes->integer adler #f #t) 1216 uncompressed-adler) 1217 (raise-git-error 'git-checkout "adler32 checksum failed")) 1218 (void)) 1219 1220;; ---------------------------------------- 1221 1222(module+ main 1223 (require racket/cmdline) 1224 1225 (define depth 1) 1226 (define ref "master") 1227 (define tmp-dir #f) 1228 (define transport 'git) 1229 (define status-printf 1230 (lambda args 1231 (apply printf args) 1232 (flush-output))) 1233 1234 (define-values (host repo dest) 1235 (command-line 1236 #:once-any 1237 [("--git") "Use the Git transport (the default)" 1238 (set! transport 'git)] 1239 [("--http") "Use the \"smart\" HTTP transport" 1240 (set! transport 'http)] 1241 [("--https") "Use the \"smart\" HTTPS transport" 1242 (set! transport 'https)] 1243 #:once-each 1244 [("--depth") d "Commit depth of <d> (default is 1, 0 means \"all\")" 1245 (set! depth (string->number d)) 1246 (unless (exact-nonnegative-integer? depth) 1247 (raise-user-error 'git-checkout "bad depth: ~a" d))] 1248 [("--ref") branch/tag/commit "Checkout specified commit" 1249 (set! ref branch/tag/commit)] 1250 [("--tmp") dir "Write temporary files to <dir>" 1251 (set! tmp-dir dir)] 1252 [("-u" "--username") username "Username used to authenticate over HTTP(S)" 1253 (current-git-username username)] 1254 [("-p" "--password") password "Password used to authenticate over HTTP(S)" 1255 (current-git-password password)] 1256 [("--quiet") "Suppress status printouts" 1257 (set! status-printf void)] 1258 #:args (host repo dest) 1259 (values host repo dest))) 1260 1261 (git-checkout host repo 1262 #:transport transport 1263 #:dest-dir dest 1264 #:tmp-dir tmp-dir 1265 #:ref ref 1266 #:depth (if (eq? 0 depth) #f depth) 1267 #:status-printf status-printf)) 1268