1#lang racket/base
2(require racket/tcp
3         racket/format
4         racket/file
5         racket/port
6         racket/string
7         file/gunzip
8         file/private/check-path
9         openssl/sha1
10         net/base64
11         net/url
12         net/head
13         net/http-client
14         (only-in net/url-connect current-https-protocol))
15
16;; Stefan Saasen's "Reimplementing 'git clone' in Haskell from the bottom up"
17;;  http://stefan.saasen.me/articles/git-clone-in-haskell-from-the-bottom-up/
18;; provided many helpful hints for this implementation.
19
20(provide git-checkout
21         current-git-username
22         current-git-password
23         (struct-out exn:fail:git))
24
25(define-logger git-checkout)
26
27(define current-git-username (make-parameter #f))
28(define current-git-password (make-parameter #f))
29
30(struct exn:fail:git exn:fail () #:transparent)
31
32(define (raise-git-error name fmt . vals)
33  (raise (exn:fail:git (apply format (string-append "~s: " fmt) name vals)
34                       (current-continuation-marks))))
35
36;; Like `git clone`, but producing just the checkout
37(define (git-checkout host
38                      repo
39                      #:dest-dir dest-dir ; #f => only find checkout
40                      #:transport [transport 'git]
41                      #:ref [ref/head 'head]
42                      #:initial-search-ref [initial-search-ref "master"]
43                      #:depth [given-depth 1]
44                      #:status-printf [status-printf (lambda args
45                                                       (apply printf args)
46                                                       (flush-output))]
47                      #:initial-error [initial-error #f]
48                      #:tmp-dir [given-tmp-dir #f]
49                      #:clean-tmp-dir? [clean-tmp-dir? (not given-tmp-dir)]
50                      #:verify-server? [verify? #t]
51                      #:port [given-port #f]
52                      #:strict-links? [strict-links? #f]
53                      #:username [username (current-git-username)]
54                      #:password [password (current-git-password)])
55  (let retry-loop ([given-depth given-depth] [try-limit-depth (and given-depth 8)] [try-search-only-ref initial-search-ref])
56    (define tmp-dir (or given-tmp-dir
57                        (make-temporary-file "git~a" 'directory)))
58    (define port (or given-port (case transport
59                                  [(git) 9418]
60                                  [(http) 80]
61                                  [(https) 443])))
62
63    (define (status fmt . args)
64      (define msg (apply format fmt args))
65      (status-printf "~a\n" msg)
66      (log-git-checkout-info msg))
67
68    (status "Contacting ~a" host)
69    (define-values (i o dumb-protocol?)
70      (initial-connect transport host verify? port repo status username password))
71    ((let/ec esc
72       (dynamic-wind
73        void
74        (lambda ()
75          (status "Getting refs~a" (if dumb-protocol? " [dumb protocol]" ""))
76          (write-pkt o
77                     "git-upload-pack " "/" repo "\0"
78                     "host=" host "\0")
79          (define pkts (if dumb-protocol?
80                           ;; dumb protocol provides plain lines:
81                           (for/list ([l (in-lines i)]) (string-append l "\n"))
82                           ;; smart protocol provides packets:
83                           (read-pkts i)))
84          (unless (pair? pkts)
85            (raise-git-error 'git-checkout "no initial pkts from the server"))
86
87          ;; Parse server's initial reply
88          (define server-capabilities (parse-server-capabilities (car pkts)))
89          (define refs ; (list (list <name> <ID>) ...)
90            (parse-initial-refs pkts initial-error))
91
92          (define ref (head->ref ref/head
93                                 server-capabilities dumb-protocol?
94                                 transport host verify? port repo username password
95                                 status))
96
97          ;; Find the commits needed for `ref`:
98          (define-values (ref-commit    ; #f or an ID string
99                          want-commits) ; list of ID string
100            (select-commits ref refs server-capabilities status try-search-only-ref repo))
101
102          (unless dest-dir
103            (write-pkt o) ; clean termination
104            ((esc (lambda ()
105                    ;; If we get this far and `ref-commit` is #f,
106                    ;; then `ref` looks like a commit
107                    (or ref-commit ref)))))
108
109          (define depth (and given-depth
110                             (or ref-commit (and try-limit-depth
111                                                 (eq? given-depth 1)))
112                             (cond
113                               [(member "shallow" server-capabilities)
114                                (if ref-commit
115                                    given-depth
116                                    try-limit-depth)]
117                               [else
118                                (status "Server does not support `shallow`")
119                                #f])))
120
121          (unless dumb-protocol?
122            ;; Tell the server which commits we need
123            (set!-values (i o) (want-step transport host port repo i o))
124            (for ([want-commit (in-list want-commits)]
125                  [pos (in-naturals)])
126              (write-pkt o "want " want-commit (if (zero? pos) " " "") "\n"))
127            (when depth
128              (write-pkt o "deepen " depth "\n"))
129            (write-pkt o)
130
131            ;; Tell the server that we're ready for the objects
132            (write-pkt o "done\n")
133            (set!-values (i o) (done-step transport host verify? port repo username password i o))
134
135            (when depth
136              ;; If we wrote `deepen`, then the server replies with `shallow`s.
137              ;; Note that these were available before writing `done` in the
138              ;; case of the 'git transport, but it works here for all transports.
139              (let loop ()
140                (define r (read-pkt i))
141                (cond
142                 [(eof-object? r)
143                  (void)]
144                 [(regexp-match? #rx"^shallow " r)
145                  (loop)]
146                 [else
147                  (raise-git-error 'git-checkout "expected shallow, got ~s" r)])))
148
149            ;; Tell the server that we're ready for the objects
150            (define nak (read-pkt i))
151            (unless (or (eof-object? nak) ; happens when `want-commits` is empty
152                        (equal? #"NAK\n" nak))
153              (raise-git-error 'git-checkout "expected NAK, got ~s" nak)))
154
155          (make-directory* tmp-dir)
156          (define tmp (make-tmp-info tmp-dir #:fresh? #t))
157
158          (define (maybe-save-objects objs fn)
159            (unless clean-tmp-dir?
160              (call-with-output-file*
161               (build-path tmp-dir fn)
162               #:exists 'truncate
163               (lambda (o) (write objs o) (newline o)))))
164
165          (dynamic-wind
166           void
167           (lambda ()
168             (define obj-ids
169               (cond
170                [dumb-protocol?
171                 (read-dumb-objects want-commits tmp
172                                    transport host verify? port repo
173                                    status maybe-save-objects)]
174                [else
175                 ;; Read packfile pbjects, which are written into
176                 ;; `tmp-dir`. If `depth` gives the server trouble,
177                 ;; we might get an EOF, in which case we'll try again:
178                 (define objs
179                   (read-packfile i tmp status
180                                  (and depth
181                                       (lambda ()
182                                         (esc (lambda ()
183                                                (status "Unexpected EOF; retrying without depth")
184                                                (retry-loop #f #f #f)))))))
185                 (maybe-save-objects objs "objs")
186
187                 ;; Convert deltas into full objects within `tmp`:
188                 (rewrite-deltas objs tmp status)]))
189
190             (maybe-save-objects obj-ids "all-objs")
191
192             (define commit
193               (or ref-commit
194                   (find-commit-as-reference ref obj-ids
195                                             (and (or try-search-only-ref
196                                                      (and try-limit-depth
197                                                           (eqv? depth try-limit-depth)))
198                                                  (lambda ()
199                                                    (esc (lambda ()
200                                                           (cond
201                                                             [(and depth (eqv? depth try-limit-depth)
202                                                                   (try-limit-depth . < . 32))
203                                                              (status "no matching commit found; trying deeper search")
204                                                              (retry-loop given-depth (* try-limit-depth 2) try-search-only-ref)]
205                                                             [else
206                                                              (status "no matching commit found; trying broader search")
207                                                              (retry-loop given-depth #f #f)]))))))))
208
209             ;; Extract the tree from the packfile objects:
210             (status "Extracting tree to ~a" dest-dir)
211             (extract-commit-tree (hex-string->bytes commit)
212                                  obj-ids tmp dest-dir
213                                  strict-links?)
214
215             ;; Done; return checkout id
216             (lambda () commit))
217           (lambda ()
218             (status "Cleaning up")
219             (close-tmp-info tmp)
220             (when clean-tmp-dir?
221               (delete-directory/files tmp-dir)))))
222        (lambda ()
223          (close-input-port i)
224          (close-output-port o)))))))
225
226;; ----------------------------------------
227;; Transports: git, http, and https
228
229(define (http-request-headers username password)
230  ;; bitbucket.org seems to require a "git" value for "User-Agent",
231  ;; otherwise it returns a "broken link" web page
232  (define base-headers '("User-Agent: git/1.9"))
233  ;; include an Authorization header if credentials are provided
234  (if (and username password)
235      (cons (~a "Authorization: Basic " (base64-encode (string->bytes/utf-8 (~a username ":" password)) #""))
236            base-headers)
237      base-headers))
238
239;; initial-connect: transport-sym string bool natural string status-proc string string
240;;                  -> (values input-port output-port boolean)
241;;  Contacts the server and returns an output port for writing
242;;  the request (ignored if not needed for the the transport)
243;;  and an input port from reading the available references. The
244;;  boolean result indicates whether the protocol is "dumb".
245(define (initial-connect transport host verify? port repo status username password)
246  (case transport
247    [(git)
248     (define-values (i o) (tcp-or-tunnel-connect "git" host port))
249     (values i o #f)]
250    [(http https)
251     (define url-str
252       (~a transport "://" host ":" port "/" repo
253           "/info/refs?service=git-upload-pack"))
254     (define-values (i headers)
255       (parameterize ([current-https-protocol (ssl-context verify?)])
256         (get-pure-port/headers (string->url url-str)
257                                (http-request-headers username password)
258                                #:redirections 5)))
259     (define ok? #f)
260     (dynamic-wind
261      void
262      (lambda ()
263        (define dumb?
264          (cond
265           [(equal? (extract-field "Content-Type" headers)
266                    "application/x-git-upload-pack-advertisement")
267            ;; "smart" protocol
268            (unless (regexp-match-peek #px#"^[0-9a-f]{4}#" i)
269              (raise-git-error 'git-checkout (~a "error reading repository content;\n"
270                                                 " response is not consistent with the Git protocol\n"
271                                                 "  initial portion: ~s")
272                               (read-bytes 640 i)))
273            (define pkt (read-pkt i))
274            (define term-pkt (read-pkt i))
275            (unless (eof-object? term-pkt)
276              (raise-git-error 'git-checkout (~a "expected a null packet, received something else\n"
277                                                 "  packet: ~s")
278                               term-pkt))
279            #f]
280           [else
281            ;; "dumb" protocol
282            #t]))
283        (set! ok? #t)
284        (values i (open-output-nowhere) dumb?))
285      (lambda ()
286        (unless ok? (close-input-port i))))]
287    [else
288     (raise-git-error 'git-checkout "unrecognized transport\n  given: ~e" transport)]))
289
290;; want-step: transport-sym string natural string input-port output-port
291;;            -> (values input-port output-port)
292;;  Replaces the connection, if appropriate to the transport, for
293;;  writing the wanted references.
294(define (want-step transport host port repo i o)
295  (case transport
296    [(git) (values i o)]
297    [(http https)
298     (close-input-port i)
299     (values (open-input-bytes #"") (open-output-bytes))]))
300
301;; done-step: transport-sym string bool natural string string string input-port output-port
302;;            -> (values input-port output-port)
303;;  Replaces the connection, if appropriate to the transport, after
304;;  writing the wanted references and before reading the server's
305;;  response.
306(define (done-step transport host verify? port repo username password i o)
307  (case transport
308    [(git) (values i o)]
309    [(http https)
310     (define s (get-output-bytes o))
311     (define i
312       (parameterize ([current-https-protocol (ssl-context verify?)])
313         (post-pure-port
314          (string->url
315           (~a transport "://" host ":" port "/" repo
316               "/git-upload-pack"))
317          s
318          (append
319           (http-request-headers username password)
320           (list "Content-Type: application/x-git-upload-pack-request")))))
321     (values i (open-output-nowhere))]))
322
323;; converts 'head to a branch/tag/commit ref
324(define (head->ref ref/head
325                   server-capabilities dumb-protocol?
326                   transport host verify? port repo username password
327                   status)
328  (cond
329    [(eq? ref/head 'head)
330     (or
331      ;; Git 1.8.5 and later (smart protocol) maps HEAD in capabilities
332      (for/or ([cap (in-list server-capabilities)])
333        (define m (regexp-match #rx"^symref=HEAD:(.*)$" cap))
334        (and m (refspec->ref (cadr m))))
335      ;; dumb protocol: fetch the "HEAD" reference
336      (case (and dumb-protocol? transport)
337        [(http https)
338         (status "Getting HEAD")
339         (define i
340           (parameterize ([current-https-protocol (ssl-context verify?)])
341             (get-pure-port
342              (string->url
343               (~a transport "://" host ":" port "/" repo
344                   "/HEAD"))
345              (append
346               (http-request-headers username password)
347               (list "Content-Type: application/x-git-upload-pack-request")))))
348         (define s (port->string i))
349         (define m (regexp-match #rx"(?m:^ref: (.*)$)" s))
350         (and m (refspec->ref (cadr m)))]
351        [else #f])
352      ;; If all else fails, keep 'head and try to match "HEAD" in refs
353      'head)]
354    [else ref/head]))
355
356(define (ssl-context verify?)
357  (cond
358   [(or (not verify?)
359        (getenv "GIT_SSL_NO_VERIFY"))
360    (current-https-protocol)]
361   [else
362    'secure]))
363
364(define (refspec->ref refspec)
365  (cond
366    [(regexp-match #rx"^refs/(?:heads|tags)/(.*)$" refspec)
367     => (lambda (m) (cadr m))]
368    [else refspec]))
369
370;; ----------------------------------------
371
372;; parse-server-capabilities : bytes -> (listof string)
373(define (parse-server-capabilities first-pkt)
374  (let ([m (regexp-match #rx#"\0(.*)\n$" first-pkt)])
375    (cond
376     [m (string-split (bytes->string/utf-8 (cadr m)))]
377     [else null])))
378
379;; parse-initial-refs : (listof bytes) -> (listof (list bytes string))
380;;  In each element of the returned list, first item is
381;;  the branch or tag name, second is the ID
382(define (parse-initial-refs pkts initial-error)
383  (filter
384   values
385   (for/list ([pkt (in-list pkts)])
386     (define m (regexp-match #px#"^([0-9a-fA-F]{40})[ \t]([^\0\n]+)[\0\n]" pkt))
387     (unless m
388       (when initial-error (initial-error))
389       (raise-git-error 'git-checkout "could not parse ref pkt\n  pkt: ~s" pkt))
390     (define name (caddr m))
391     (define id (bytes->string/utf-8 (cadr m)))
392     (cond
393      [(regexp-match? #rx#"\\^{}$" name)
394       ;; drop parent references
395       #f]
396      [else (list name id)]))))
397
398;; select-commits : (or/c string 'head) (listof (list bytes string)) ....
399;;                  -> (values string-or-#f (listof string))
400;;  Convert the user's request `ref`, which is a branch or tag or ID,
401;;  into a specific ID --- if we can determine it from the server's
402;;  initial response. If we can, the list of requested IDs will be
403;;  just that one. Otherwise, we'll have to return a list of all
404;;  IDs, and then we'll look for the reference later.
405(define (select-commits ref refs server-capabilities status try-search-only-ref repo)
406  (define ref-looks-like-id? (and (string? ref)
407                                  (regexp-match? #rx"^[0-9a-f]+$" ref)))
408  (define ref-rx (cond
409                   [(eq? ref 'head)
410                    ;; some servers report "HEAD" early, and we can
411                    ;; expect it early in the list; if that fails,
412                    ;; fall back to trying a "master" branch:
413                    #"^HEAD|refs/heads/master$"]
414                   [else (branch-or-tag->regexp ref)]))
415
416  (define ref-commit
417    (or
418     ;; Search list of branch and tag names:
419     (for/or ([ref (in-list refs)])
420       (and (regexp-match? ref-rx (car ref))
421            (cadr ref)))
422     ;; Try matching the references as a commit/tag ID of a branch or tag:
423     (and (string? ref)
424          (let ([rx (id-ref->regexp ref)])
425            (for/or ([a-ref (in-list refs)])
426              (and (regexp-match? rx (cadr a-ref))
427                   (begin
428                     (status "Commit id ~s matches ~a" ref (car a-ref))
429                     (cadr a-ref))))))))
430  (define want-commits
431    (cond
432     [ref-commit (list ref-commit)]
433     [ref-looks-like-id?
434      (cond
435        [try-search-only-ref
436         (status "Requested reference looks like commit id; try within ~a" try-search-only-ref)
437         (define master-rx (branch-or-tag->regexp try-search-only-ref))
438         (cond
439           [(for/or ([ref (in-list refs)])
440              (regexp-match? master-rx (car ref)))
441            (define-values (master-ref-commit want-commits)
442              (select-commits try-search-only-ref refs '() status #f repo))
443            want-commits]
444           [else
445            (status "There does not appear to be a ~a branch or tag, however" try-search-only-ref)
446            ;; the branch to try appears not to exist
447            null])]
448        [else
449         (status "Requested reference looks like commit id; getting all commits")
450         (for/list ([ref (in-list refs)])
451           (cadr ref))])]
452     [else
453      (raise-git-error 'git "could not find requested reference\n  reference: ~a\n  repo: ~a" ref repo)]))
454
455  (values ref-commit want-commits))
456
457(define (branch-or-tag->regexp ref)
458  (byte-regexp (bytes-append
459                #"^refs/(?:heads|tags)/"
460                (regexp-quote (string->bytes/utf-8 ref))
461                #"$")))
462
463;; ----------------------------------------
464;; A "pkt" is the basic unit of communication in many parts
465;; of the git protocol. The first four bytes specify the
466;; length of the package (including those bytes).
467
468;; write-pkt : output-port any ... -> void
469;;  `display`s each argument to create a package
470(define (write-pkt o . args)
471  (define s (open-output-bytes))
472  (for ([arg (in-list args)])
473    (display arg s))
474  (define msg (get-output-bytes s))
475  (define len (bytes-length msg))
476  (define full-msg
477    (cond
478     [(zero? len) #"0000"]
479     [else
480      (define len-bstr (string->bytes/utf-8 (format "~x" (+ 4 len))))
481      (bytes-append (make-bytes (- 4 (bytes-length len-bstr))
482                                (char->integer #\0))
483                    len-bstr
484                    msg)]))
485  (write-bytes full-msg o)
486  (flush-output o))
487
488;; read-pkt : input-port -> bstr-or-eof
489;;  Reads one pkt, returning eof of the special "null" pkt
490(define (read-pkt i)
491  (define len-bstr (read-bytes 4 i))
492  (cond
493   [(eof-object? len-bstr) eof]
494   [else
495    (unless (and (bytes? len-bstr)
496                 (= 4 (bytes-length len-bstr)))
497      (raise-git-error 'git-checkout "error getting pkt length"))
498    (define len (string->number (bytes->string/utf-8 len-bstr #\?) 16))
499    (unless len
500      (raise-git-error 'git-checkout "error getting pkt length\n  length string: ~e" len-bstr))
501    (cond
502     [(= len 0) eof] ; flush pkt
503     [else
504      (define payload-len (- len 4))
505      (unless (payload-len . >= . 0)
506        (raise-git-error 'git-checkout "pkt length makes no sense\n  length: ~a" len))
507      (read-bytes-exactly 'payload payload-len i)])]))
508
509;; read a list of pkts until an empty packet is found
510(define (read-pkts i)
511  (define pkt (read-pkt i))
512  (if (or (eof-object? pkt)
513          (equal? #"" pkt))
514      null
515      (cons pkt (read-pkts i))))
516
517;; ----------------------------------------
518;; Packfile objects
519
520(struct object (location  ; filename within tmp or position in small-object file
521                type      ; 'blob, 'commit, etc.; see `type-num->sym`
522                [type-info #:mutable] ; #f, id, or object
523                id        ; sha1 as bytes
524                [undelta #:mutable])
525  #:prefab)
526
527;; read-packfile : input-port tmp-info status-proc (or/c #f (-> any)) -> (listof object)
528;;  The `initial-eof-handler` argument should escape, if it's not #f
529(define (read-packfile i tmp status initial-eof-handler)
530  (define pack-bstr (read-bytes 4 i))
531  (unless (equal? pack-bstr #"PACK")
532    (when (and (eof-object? pack-bstr)
533               initial-eof-handler)
534      (initial-eof-handler))
535    (raise-git-error 'git-checkout "header error\n  bytes: ~s" pack-bstr))
536  (define vers (read-bytes 4 i))
537  (unless (equal? vers #"\0\0\0\2")
538    (raise-git-error 'git-checkout "only version 2 supported"))
539  (define count-bstr (read-bytes-exactly 'count 4 i))
540  (define count (integer-bytes->integer count-bstr #t #t))
541  (define obj-stream-poses (make-hash)) ; for OBJ_OFS_DELTA references
542  (status "Getting ~a objects" count)
543  (for/list ([pos (in-range count)])
544    (read-object i tmp obj-stream-poses)))
545
546(define OBJ_COMMIT 1)
547(define OBJ_TREE 2)
548(define OBJ_BLOB 3)
549(define OBJ_TAG 4)
550(define OBJ_OFS_DELTA 6)
551(define OBJ_REF_DELTA 7)
552
553(define type-num->sym (hash OBJ_COMMIT 'commit
554                            OBJ_TREE 'tree
555                            OBJ_BLOB 'blob
556                            OBJ_TAG 'tag
557                            OBJ_OFS_DELTA 'ofs-delta
558                            OBJ_REF_DELTA 'ref-delta))
559(define valid-types (for/list ([v (in-hash-values type-num->sym)]) v))
560
561;; read-object : input-port tmp-info (hash-of integer obj) -> object
562(define (read-object i tmp obj-stream-poses)
563  (define obj-stream-pos (file-position i))
564  (define c (read-byte-only 'type-and-size i))
565  (define type (bitwise-and (arithmetic-shift c -4) #x7))
566  (when (zero? type) (raise-git-error 'git-checkout "bad packfile type"))
567  (define init-len (bitwise-and c #xF))
568  (define len
569    (if (msb-set? c)
570        (+ init-len (arithmetic-shift (read-integer i) 4))
571        init-len))
572  (define type-sym (hash-ref type-num->sym type))
573  (define type-info
574    (case type-sym
575     [(ref-delta)
576      (read-bytes-exactly 'referenced-id 20 i)]
577     [(ofs-delta)
578      (define delta (read-offset-integer i))
579      (hash-ref obj-stream-poses (- obj-stream-pos delta)
580                (lambda () (raise-git-error 'git-checkout "OBJ_OFS_DELTA object not found")))]
581     [else #f]))
582  (define obj
583    (save-object (lambda (o) (zlib-inflate i o)) len type-sym type-info tmp))
584  (hash-set! obj-stream-poses obj-stream-pos obj)
585  obj)
586
587;; save-object : (output-port ->) integer symbol any tmp-info -> object
588(define (save-object write-data len type-sym type-info tmp)
589  (define filename (~a (case type-sym
590                        [(ref-delta ofs-delta) "delta"]
591                        [else "obj"])
592                       (increment-object-count! tmp)))
593  (define location
594    (call-with-output-object
595     tmp
596     filename
597     len
598     write-data))
599  (construct-object location type-sym type-info len tmp))
600
601;; To build an `object`, we need to construct a SHA-1 from the object
602;; content, which is in a file in `tmp`
603(define (construct-object filename type-sym type-info size tmp)
604  (object filename type-sym type-info
605          (call-with-input-object
606           tmp
607           filename
608           (lambda (i)
609             (define prefix (~a type-sym " " size "\0"))
610             (sha1-bytes (input-port-append #f
611                                            (open-input-string prefix)
612                                            i))))
613          #f))
614
615;; rewrite-deltas : (listof object) tmp status -> (hashof bytes object)
616;; Given a mapping from ids to objects, combine each "delta" file with
617;; a referenced object to create a new object file. The deltas,
618;; referenced objects, and generated objects all are in `tmp`. The
619;; result is an id-to-object mapping that includes all the given
620;; objects plus the generated ones.
621(define (rewrite-deltas objs tmp status)
622  (status "Applying deltas")
623  (define ids (hash-copy
624               (for/hash ([obj (in-list objs)])
625                 (values (object-id obj) obj))))
626  (for ([obj (in-list objs)])
627    (case (object-type obj)
628      [(ref-delta ofs-delta)
629       (define base-obj-id (if (eq? (object-type obj) 'ref-delta)
630                               ;; Base object is referenced directly:
631                               (object-type-info obj)
632                               ;; We have to find the object generated by another
633                               ;; "object", where the "object" may be a delta:
634                               (let ([v (object-type-info obj)])
635                                 (set-object-type-info! obj (object-id v))
636                                 (case (object-type v)
637                                   [(ref-delta ofs-delta) (object-undelta v)]
638                                   [else (object-id v)]))))
639       (define base-obj (hash-ref ids base-obj-id))
640       (define new-filename (~a "obj" (increment-object-count! tmp)))
641       (call-with-input-object
642        tmp
643        ;; the delta file:
644        (object-location obj)
645        (lambda (i)
646          (call-with-input-object
647           tmp
648           ;; apply delta to this base object:
649           (object-location base-obj)
650           (lambda (src-in)
651             (define src-len (read-integer i))
652             (define dest-len (read-integer i))
653             (define location
654               (call-with-output-object
655                tmp
656                ;; write to this new object:
657                new-filename
658                dest-len
659                (lambda (o)
660                  ;; Each delta command is either "copy" or "insert"
661                  (let loop ()
662                    (define c (read-byte i))
663                    (cond
664                     [(eof-object? c)
665                      (void)]
666                     [(msb-set? c)
667                      ;; Copy
668                      (define src-offset (read-number-by-bits i (bitwise-and c #xF)))
669                      (define raw-src-len (read-number-by-bits i (bitwise-and (arithmetic-shift c -4)
670                                                                              #x7)))
671                      (define src-len (if (zero? raw-src-len) #x10000 raw-src-len))
672                      (file-position src-in src-offset)
673                      (copy-port-n src-in o src-len)
674                      (loop)]
675                     [else
676                      ;; Insert
677                      (copy-port-n i o c)
678                      (loop)])))))
679             ;; Add the generated object to our table:
680             (define new-obj (construct-object location (object-type base-obj) #f
681                                               dest-len tmp))
682             (hash-set! ids (object-id new-obj) new-obj)
683             ;; Record undelta id:
684             (set-object-undelta! obj (object-id new-obj))))))]))
685  ids)
686
687;; ----------------------------------------
688;; Finding a commit id
689
690(define (find-commit-as-reference ref obj-ids fail-not-found)
691  (define rx (id-ref->regexp ref))
692  (define matches
693    (for/list ([(id obj) (in-hash obj-ids)]
694               #:when (eq? 'commit (object-type obj))
695               #:when (regexp-match? rx (bytes->hex-string id)))
696      (bytes->hex-string id)))
697  (cond
698   [(= 1 (length matches)) (car matches)]
699   [(null? matches)
700    (if fail-not-found
701        (fail-not-found)
702        (raise-git-error 'git-checkout "no commit found matching id: ~a" ref))]
703   [else
704    (raise-git-error 'git-checkout "found multiple commits matching id: ~a" ref)]))
705
706(define (id-ref->regexp ref)
707  (regexp (~a "^" (regexp-quote (string-downcase ref)))))
708
709;; ----------------------------------------
710;; Extract a checkout tree
711
712;; extract-commit-tree : bytes (hash/c bytes object) tmp-info path -> void
713;;  Extract the designated commit to `dest-dir`, using objects from `tmp`
714(define (extract-commit-tree obj-id obj-ids tmp dest-dir strict-links?)
715  (define obj (hash-ref obj-ids obj-id))
716  (case (object-type obj)
717    [(commit)
718     (define-values (tree-id-str parent-id-strs)
719       (call-with-input-object
720        tmp
721        (object-location obj)
722        (lambda (i)
723          (extract-commit-info i obj-id))))
724     (define tree-id (hex-string->bytes tree-id-str))
725     (extract-tree tree-id obj-ids tmp dest-dir strict-links?)]
726    [(tag)
727     (define commit-id-bstr
728       (call-with-input-object
729        tmp
730        (object-location obj)
731        (lambda (i)
732          (define m (regexp-try-match #px"^object ([0-9a-fA-F]{40})" i))
733          (unless m
734            (raise-git-error 'git-checkout "cannot extract commit from tag file for ~s"
735                             (bytes->hex-string obj-id)))
736          (cadr m))))
737     (define commit-id (hex-string->bytes (bytes->string/utf-8 commit-id-bstr)))
738     (extract-commit-tree commit-id obj-ids tmp dest-dir strict-links?)]
739    [(tree)
740     (extract-tree obj-id obj-ids tmp dest-dir strict-links?)]
741    [else
742     (raise-git-error 'git-checkout "cannot extract tree from ~a: ~s"
743                      (object-type obj)
744                      (bytes->hex-string obj-id))]))
745
746;; extract-commit-info: input-port bytes -> string (listof string)
747;;  Returns the commit's tree and parent ids.
748;;  The `obj-id` argument is used for error reporting, only.
749(define (extract-commit-info i obj-id)
750  (define m (regexp-try-match #px"^tree ([0-9a-fA-F]{40})" i))
751  (unless m
752    (raise-git-error 'git-checkout
753                     (~a "cannot extract tree from commit file for ~s\n"
754                         "  content starts: ~s")
755                     (bytes->hex-string obj-id)
756                     (peek-bytes 64 0 i)))
757  (values
758   ;; tree id string:
759   (bytes->string/utf-8 (cadr m))
760   ;; Loop for parent ids strings:
761   (let loop ()
762     (define m (regexp-try-match #px"^\nparent ([0-9a-fA-F]{40})" i))
763     (if m
764         (cons (bytes->string/utf-8 (cadr m))
765               (loop))
766         null))))
767
768;; extract-tree : bytes (hash/c bytes object) tmp-info path -> void
769;;  Extract the designated tree to `dest-dir`, using objects from `tmp`
770(define (extract-tree tree-id obj-ids tmp dest-dir strict-links?)
771  (make-directory* dest-dir)
772  (define tree-obj (hash-ref obj-ids tree-id))
773  (call-with-input-object
774   tmp
775   (object-location tree-obj)
776   (lambda (i)
777     (let loop ()
778       (define-values (id mode fn) (extract-tree-entry i))
779       (when id
780         (define (this-object-location)
781           (object-location (hash-ref obj-ids id)))
782         (define (copy-this-object perms)
783           (copy-object tmp
784                        (this-object-location)
785                        perms
786                        (build-path dest-dir fn)))
787         (case (datum-intern-literal mode)
788          [(#"100755") #"755"
789           (copy-this-object #o755)]
790          [(#"100644" #"644")
791           (copy-this-object #o644)]
792          [(#"40000" #"040000")
793           (extract-tree id obj-ids tmp (build-path dest-dir fn) strict-links?)]
794          [(#"120000")
795           (define target (bytes->path (object->bytes tmp (this-object-location))))
796           (when strict-links?
797             (check-unpack-path 'git-checkout target #:kind "link"))
798           (make-file-or-directory-link target (build-path dest-dir fn))]
799          [(#"160000")
800           ;; submodule; just make a directory placeholder
801           (make-directory* (build-path dest-dir fn))]
802          [else
803           (raise-git-error 'extract-tree "unknown mode: ~s" mode)])
804         (loop))))))
805
806;; extract-tree-entry: input-port -> bytes-or-#f bytes-or-#f path-or-#f
807(define (extract-tree-entry i)
808  (define m (regexp-try-match #px"^([0-7]{3,6}) ([^\0]+)\0" i))
809  (cond
810   [m
811    (define id (read-bytes-exactly 'id 20 i))
812    (define mode (cadr m))
813    (define fn (bytes->path-element (caddr m)))
814    (values id mode fn)]
815   [else
816    (values #f #f #f)]))
817
818;; ----------------------------------------
819;; ``Dumb'' HTTP(S) server support
820
821;; read-dumb-objects : (listof string) (hash-of string object)
822;;                     symbol string boolean integer string
823;;                     status maybe-save-objects
824;;                     -> (hash-of string object)
825;;  Read the package files available on the server, then round up
826;;  any additional loose objects that we'll need.
827(define (read-dumb-objects id-strs tmp
828                           transport host verify? port repo
829                           status maybe-save-objects)
830  (define conn (http-conn))
831  (http-conn-open! conn
832                   host
833                   #:ssl? (if (eq? transport 'https)
834                              (ssl-context verify?)
835                              #f)
836                   #:port port
837                   #:auto-reconnect? #t)
838
839  (define packfiles
840    (get-packfile-list conn repo))
841
842  (define packed-objects
843    (for/fold ([objects (hash)]) ([packfile (in-list packfiles)])
844      (read-dumb-packfile packfile objects tmp conn repo status)))
845
846  (maybe-save-objects packed-objects "packed-objs")
847
848  (status "Downloading loose objects")
849  (define objects
850    (read-dumb-loose-objects id-strs packed-objects (make-hash)
851                             tmp conn repo status))
852
853  (http-conn-close! conn)
854
855  (for/hash ([obj (in-hash-values objects)])
856    (values (object-id obj) obj)))
857
858;; get-packfile-list : conn string -> (listof string)
859;;  Get a list of packfiles available from the server
860(define (get-packfile-list conn repo)
861  (define-values (status-line headers i)
862    (http-conn-sendrecv! conn
863                         (~a "/" repo "/objects/info/packs")))
864  (check-status status-line "error getting packfile list")
865
866  (for/list ([l (in-lines i)]
867             #:unless (equal? l ""))
868    (define m (regexp-match #rx"^P (.*)" l))
869    (unless m (raise-git-error 'git-checkout "error parsing packfile list line\n  line: ~e" l))
870    (cadr m)))
871
872;; read-dumb-packfile : string (hashof string object) tmp conn strung status
873;;   -> (hashof string object)
874;; Read a packfile and apply its deltas, producing an updated mapping of objects
875;; that we have unpacked so far.
876(define (read-dumb-packfile packfile objects tmp conn repo status)
877  (define-values (status-line headers i)
878    (http-conn-sendrecv! conn
879                         (~a "/" repo "/objects/pack/" packfile)))
880  (check-status status-line (~a "error getting packfile " packfile))
881
882  (define obj-list (read-packfile i tmp status #f))
883  (define obj-ids (rewrite-deltas obj-list tmp status))
884
885  ;; Add new objects to hash table:
886  (for/fold ([objects objects]) ([obj (in-hash-values obj-ids)])
887    (hash-set objects (bytes->hex-string (object-id obj)) obj)))
888
889;; read-dumb-loose-objects : (listof string) (hash-of string object)
890;;                           (mutable-hash-of string #t)
891;;                           conn string status
892;;                           -> (hash-of string object)
893;;  Traverse the tree, looking for extra objects (not supplied by a packfile)
894;;  that we need to download
895(define (read-dumb-loose-objects id-strs objects seen tmp conn repo status)
896  (for/fold ([objects objects]) ([id-str (in-list id-strs)])
897    (cond
898     [(hash-ref seen id-str #f) objects]
899     [else
900      (define obj
901        (cond
902         [(hash-ref objects id-str #f)
903          => (lambda (obj) obj)]
904         [else
905          (define-values (status-line headers compressed-i)
906            (http-conn-sendrecv! conn
907                                 (~a "/" repo
908                                     "/objects/" (substring id-str 0 2)
909                                     "/" (substring id-str 2))))
910          (check-status status-line (format "error getting object ~a" id-str))
911
912          ;; Set up decompression of stream:
913          (define-values (i decompressed-o) (make-pipe 4096))
914          (define exn #f)
915          (define inflate-thread
916            (thread (lambda ()
917                      (dynamic-wind
918                       void
919                       (lambda ()
920                         (with-handlers ([values (lambda (x) (set! exn x))])
921                           (zlib-inflate compressed-i decompressed-o)))
922                       (lambda ()
923                         (close-output-port decompressed-o)
924                         (close-input-port compressed-i))))))
925
926          ;; Parse the object description:
927          (define header-m (regexp-try-match #rx#"^[^\0]*\0" i))
928          (unless header-m
929            (raise-git-error 'git-checkout "bad initial line for object content"))
930          (define header (car header-m))
931          (define header-len (bytes-length header))
932          (define type-sym (string->symbol
933                            (bytes->string/utf-8 (car (regexp-match #rx"^[^ ]*" header)))))
934          (define data-len (string->number
935                            (bytes->string/utf-8 (cadr (or (regexp-match #rx"[^ ]* ([0-9]+)" header)
936                                                           '(#"" #""))))))
937          (unless (memq type-sym valid-types)
938            (raise-git-error 'git-checkout "bad type: ~e" type-sym))
939
940          (define obj
941            (save-object (lambda (o) (copy-port i o))
942                         data-len type-sym #f tmp))
943
944          ;; Just in case:
945          (kill-thread inflate-thread)
946          (close-input-port compressed-i)
947          (when exn (raise exn))
948
949          obj]))
950
951      ;; Add the (potentially) new object to out table:
952      (define new-objects (hash-set objects id-str obj))
953      (hash-set! seen id-str #t)
954
955      ;; Inspect the new object, looking for additional objects to download:
956      (define id (object-id obj))
957      (define (call-with-content proc)
958        (call-with-input-object
959         tmp
960         (object-location obj)
961         proc))
962      (define more-id-strs
963        (case (object-type obj)
964          [(commit)
965           (define-values (tree parents)
966             (call-with-content (lambda (i) (extract-commit-info i id))))
967           (cons tree parents)]
968          [(tree)
969           (call-with-content
970            (lambda (i)
971              (let loop ()
972                (define-values (content-id mode fn) (extract-tree-entry i))
973                (cond
974                 [(not content-id) null]
975                 [(equal? mode #"160000")
976                  ;; don't try to get a submodule commit
977                  (loop)]
978                 [else
979                  (cons (bytes->hex-string content-id)
980                        (loop))]))))]
981          [else
982           null]))
983
984      (read-dumb-loose-objects more-id-strs new-objects seen
985                               tmp conn repo status)])))
986
987;; check-status : string string -> any
988;;  Check an HTTP status result and complain if there's a problem
989(define (check-status status-line msg)
990  (define status (let ([m (regexp-match #rx"^[^ ]* ([0-9]+)" status-line)])
991                   (and m (string->number (bytes->string/utf-8 (cadr m))))))
992  (unless (memv status '(200))
993    (raise-git-error 'git-checkout "~a\n  server respone: ~a"
994                     msg
995                     status-line)))
996
997;; ----------------------------------------
998;; Temporary directory & database
999
1000(struct tmp-info (dir small-i small-o [pos #:mutable] [flush? #:mutable] [obj-counter #:mutable]))
1001
1002;; make-tmp-info : path -> tmp-info
1003(define (make-tmp-info tmp-dir #:fresh? [fresh? #f])
1004  (define-values (i o) (open-input-output-file
1005                        (build-path tmp-dir "objs-small")
1006                        #:exists (if fresh? 'truncate 'update)))
1007  (file-stream-buffer-mode i 'none)
1008  (tmp-info tmp-dir i o 0 #f 0))
1009
1010;; close-tmp-info : tmp-info -> void
1011(define (close-tmp-info tmp)
1012  (close-input-port (tmp-info-small-i tmp))
1013  (close-output-port (tmp-info-small-o tmp)))
1014
1015;; increment-object-count! : tmp-info -> integer
1016(define (increment-object-count! tmp)
1017  (define n (add1 (tmp-info-obj-counter tmp)))
1018  (set-tmp-info-obj-counter! tmp n)
1019  n)
1020
1021;; call-with-output-object : tmp-info string natural (output-port -> any) -> location
1022(define (call-with-output-object tmp filename len proc)
1023  (define (check-len got-len)
1024    (unless (= len got-len)
1025      (raise-git-error 'git-checkout "size mismatch\n  expected: ~a\n  received: ~a"
1026                       len
1027                       got-len)))
1028  (cond
1029   [(len . < . 256)
1030    (define location (tmp-info-pos tmp))
1031    (define s (open-output-bytes))
1032    (proc s)
1033    (let ([bstr (get-output-bytes s)])
1034      (check-len (bytes-length bstr))
1035      (define o (tmp-info-small-o tmp))
1036      (file-position o location)
1037      (write-bytes bstr o)
1038      (set-tmp-info-pos! tmp (+ len (tmp-info-pos tmp)))
1039      (set-tmp-info-flush?! tmp #t))
1040    (cons location len)]
1041   [else
1042    (define path (build-path (tmp-info-dir tmp) filename))
1043    (call-with-output-file* path proc #:exists 'truncate)
1044    (check-len (file-size path))
1045    filename]))
1046
1047;; call-with-input-object : tmp-info location (input-port -> X) -> X
1048(define (call-with-input-object tmp location proc)
1049  (cond
1050   [(pair? location)
1051    (define bstr (object->bytes tmp location))
1052    (proc (open-input-bytes bstr))]
1053   [else
1054    (call-with-input-file* (build-path (tmp-info-dir tmp) location) proc)]))
1055
1056;; copy-object : tmp-info location integer path -> void
1057(define (copy-object tmp location perms dest-file)
1058  (cond
1059   [(pair? location)
1060    (define bstr (object->bytes tmp location))
1061    (call-with-output-file*
1062     dest-file
1063     #:exists 'truncate
1064     (lambda (o) (write-bytes bstr o)))]
1065   [else
1066    (copy-file (build-path (tmp-info-dir tmp) location)
1067               dest-file
1068               #t)])
1069  (unless (equal? 'windows (system-type 'os))
1070    (file-or-directory-permissions dest-file perms)))
1071
1072;; object->bytes : tmp-info location -> bytes
1073(define (object->bytes tmp location)
1074  (cond
1075   [(pair? location)
1076    (when (tmp-info-flush? tmp)
1077      (flush-output (tmp-info-small-o tmp))
1078      (set-tmp-info-flush?! tmp #f))
1079    (define i (tmp-info-small-i tmp))
1080    (file-position i (car location))
1081    (read-bytes (cdr location) i)]
1082   [else
1083    (file->bytes (build-path (tmp-info-dir tmp) location))]))
1084
1085;; ----------------------------------------
1086;; Utils
1087
1088(define (read-bytes-exactly what len i)
1089  (define bstr (read-bytes len i))
1090  (unless (and (bytes? bstr)
1091               (= (bytes-length bstr) len))
1092    (raise-git-error 'git-checkout (~a "error getting bytes for ~a\n"
1093                                       "  expected length: ~a\n"
1094                                       "  got length: ~a")
1095                     what
1096                     len
1097                     (if (eof-object? bstr)
1098                         eof
1099                         (bytes-length bstr))))
1100  bstr)
1101
1102(define (read-byte-only what i)
1103  (define c (read-byte i))
1104  (unless (byte? c)
1105    (raise-git-error 'git-checkout "expected to get a byte for ~a, got enf-of-file" what))
1106  c)
1107
1108;; copy-port-n : input-port output-port natural -> void
1109(define (copy-port-n i o n)
1110  (cond
1111   [(n . <= . 4096)
1112    (define bstr (read-bytes n i))
1113    (unless (and (bytes? bstr)
1114                 (= (bytes-length bstr) n))
1115      (raise-git-error 'git-checkout "not enough bytes during copy"))
1116    (write-bytes bstr o)]
1117   [else
1118    (copy-port-n i o 4096)
1119    (copy-port-n i o (- n 4096))]))
1120
1121(define (msb-set? c)
1122  (bitwise-bit-set? c 7))
1123
1124;; A common integer encoding is a sequence of 7-bit
1125;; piecs of the number, where the most-significant bit
1126;; indicates whether the number continues
1127(define (read-integer i)
1128  (let loop ([amt 0] [shift 0])
1129    (define c (read-byte i))
1130    (cond
1131     [(eof-object? c) amt]
1132     [else
1133      (define new-amt (+ amt
1134                         (arithmetic-shift (bitwise-and c #x7F) shift)))
1135      (if (msb-set? c)
1136          (loop new-amt (+ shift 7))
1137          new-amt)])))
1138
1139;; Similar to read-integer, but for (negative) offsets
1140(define (read-offset-integer i)
1141  (define c (read-byte i))
1142  (cond
1143   [(eof-object? c) 0]
1144   [else
1145    (define delta (bitwise-and c #x7F))
1146    (cond
1147     [(not (msb-set? c)) delta]
1148     [else
1149      (let loop ([delta delta])
1150        (define c (read-byte i))
1151        (cond
1152         [(eof-object? c) delta]
1153         [else
1154          (let ([delta (+ (arithmetic-shift (+ delta 1) 7)
1155                          (bitwise-and c #x7F))])
1156            (if (msb-set? c)
1157                (loop delta)
1158                delta))]))])]))
1159
1160;; Another number format, where a bitmap `n` indicates
1161;; when to read a byte
1162(define (read-number-by-bits i n)
1163  (cond
1164   [(zero? n) 0]
1165   [else
1166    (+ (if (bitwise-bit-set? n 0)
1167           (read-byte i)
1168           0)
1169       (arithmetic-shift (read-number-by-bits i (arithmetic-shift n -1))
1170                         8))]))
1171
1172;; ADLER32 implementation
1173;; https://www.ietf.org/rfc/rfc1950.txt
1174(define (adler32-through-ports in out)
1175  (define ADLER 65521)
1176  (define bstr (make-bytes 4096))
1177  (let loop ([s1 1] [s2 0])
1178    (define n (read-bytes! bstr in))
1179    (cond
1180      [(eof-object? n)
1181       (bitwise-ior (arithmetic-shift s2 16) s1)]
1182      [else
1183       (write-bytes bstr out 0 n)
1184       (define-values (new-s1 new-s2)
1185         (for/fold ([s1 s1]
1186                    [s2 s2])
1187                   ([bits (in-bytes bstr 0 n)])
1188           (define a (modulo (+ s1 bits) ADLER))
1189           (define b (modulo (+ s2 a) ADLER))
1190           (values a b)))
1191       (loop new-s1 new-s2)])))
1192
1193;; zlib-inflate : input-port output-port
1194;;  Reads compressed data from `i`, writes uncompressed to `o`
1195(define (zlib-inflate i o)
1196  (define cmf (read-byte-only 'zlib-cmf i))
1197  (define flg (read-byte-only 'zlib-flag i))
1198  (unless (= 8 (bitwise-and cmf #xF))
1199    (raise-git-error 'git-checkout "compression is not `deflate`"))
1200  (when (bitwise-bit-set? flg 5)
1201    ;; read dictid
1202    (read-bytes-exactly 'dictid 4 i))
1203  ;; Include adler32 checksum in the pipeline, writing to `o`:
1204  (define-values (checksum-in checksum-out) (make-pipe 4096))
1205  (define uncompressed-adler #f)
1206  (define checksum-thread
1207    (thread
1208     (lambda () (set! uncompressed-adler (adler32-through-ports checksum-in o)))))
1209  ;; Inflate, sending output to checksum (and then to `o`):
1210  (inflate i checksum-out)
1211  (close-output-port checksum-out)
1212  (sync checksum-thread)
1213  ;; Verify checksum
1214  (define adler (read-bytes-exactly 'adler-checksum 4 i))
1215  (unless (= (integer-bytes->integer adler #f #t)
1216             uncompressed-adler)
1217    (raise-git-error 'git-checkout "adler32 checksum failed"))
1218  (void))
1219
1220;; ----------------------------------------
1221
1222(module+ main
1223  (require racket/cmdline)
1224
1225  (define depth 1)
1226  (define ref "master")
1227  (define tmp-dir #f)
1228  (define transport 'git)
1229  (define status-printf
1230    (lambda args
1231      (apply printf args)
1232      (flush-output)))
1233
1234  (define-values (host repo dest)
1235    (command-line
1236     #:once-any
1237     [("--git") "Use the Git transport (the default)"
1238      (set! transport 'git)]
1239     [("--http") "Use the \"smart\" HTTP transport"
1240      (set! transport 'http)]
1241     [("--https") "Use the \"smart\" HTTPS transport"
1242      (set! transport 'https)]
1243     #:once-each
1244     [("--depth") d "Commit depth of <d> (default is 1, 0 means \"all\")"
1245      (set! depth (string->number d))
1246      (unless (exact-nonnegative-integer? depth)
1247        (raise-user-error 'git-checkout "bad depth: ~a" d))]
1248     [("--ref") branch/tag/commit "Checkout specified commit"
1249      (set! ref branch/tag/commit)]
1250     [("--tmp") dir "Write temporary files to <dir>"
1251      (set! tmp-dir dir)]
1252     [("-u" "--username") username "Username used to authenticate over HTTP(S)"
1253      (current-git-username username)]
1254     [("-p" "--password") password "Password used to authenticate over HTTP(S)"
1255      (current-git-password password)]
1256     [("--quiet") "Suppress status printouts"
1257      (set! status-printf void)]
1258     #:args (host repo dest)
1259     (values host repo dest)))
1260
1261  (git-checkout host repo
1262                #:transport transport
1263                #:dest-dir dest
1264                #:tmp-dir tmp-dir
1265                #:ref ref
1266                #:depth (if (eq? 0 depth) #f depth)
1267                #:status-printf status-printf))
1268