1# tar.tcl --
2#
3#       Creating, extracting, and listing posix tar archives
4#
5# Copyright (c) 2004    Aaron Faupell <afaupell@users.sourceforge.net>
6# Copyright (c) 2013    Andreas Kupries <andreas_kupries@users.sourceforge.net>
7#                       (GNU tar @LongLink support).
8#
9# See the file "license.terms" for information on usage and redistribution
10# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
11#
12# RCS: @(#) $Id: tar.tcl,v 1.17 2012/09/11 17:22:24 andreas_kupries Exp $
13
14package require Tcl 8.4
15package provide tar 0.11
16
17namespace eval ::tar {}
18
19proc ::tar::parseOpts {acc opts} {
20    array set flags $acc
21    foreach {x y} $acc {upvar $x $x}
22
23    set len [llength $opts]
24    set i 0
25    while {$i < $len} {
26        set name [string trimleft [lindex $opts $i] -]
27        if {![info exists flags($name)]} {
28	    return -errorcode {TAR INVALID OPTION} \
29		-code error "unknown option \"$name\""
30	}
31        if {$flags($name) == 1} {
32            set $name [lindex $opts [expr {$i + 1}]]
33            incr i $flags($name)
34        } elseif {$flags($name) > 1} {
35            set $name [lrange $opts [expr {$i + 1}] [expr {$i + $flags($name)}]]
36            incr i $flags($name)
37        } else {
38            set $name 1
39        }
40        incr i
41    }
42}
43
44proc ::tar::pad {size} {
45    set pad [expr {512 - ($size % 512)}]
46    if {$pad == 512} {return 0}
47    return $pad
48}
49
50proc ::tar::seekorskip {ch off wh} {
51    if {[tell $ch] < 0} {
52	if {$wh!="current"} {
53	    return -code error -errorcode [list TAR INVALID WHENCE $wh] \
54		"WHENCE=$wh not supported on non-seekable channel $ch"
55	}
56	skip $ch $off
57	return
58    }
59    seek $ch $off $wh
60    return
61}
62
63proc ::tar::skip {ch skipover} {
64    while {$skipover > 0} {
65	set requested $skipover
66
67	# Limit individual skips to 64K, as a compromise between speed
68	# of skipping (Number of read requests), and memory usage
69	# (Note how skipped block is read into memory!). While the
70	# read data is immediately discarded it still generates memory
71	# allocation traffic, gets copied, etc. Trying to skip the
72	# block in one go without the limit may cause us to run out of
73	# (virtual) memory, or just induce swapping, for nothing.
74
75	if {$requested > 65536} {
76	    set requested 65536
77	}
78
79	set skipped [string length [read $ch $requested]]
80
81	# Stop in short read into the end of the file.
82	if {!$skipped && [eof $ch]} break
83
84	# Keep track of how much is (not) skipped yet.
85	incr skipover -$skipped
86    }
87    return
88}
89
90proc ::tar::readHeader {data} {
91    binary scan $data a100a8a8a8a12a12a8a1a100a6a2a32a32a8a8a155 \
92                      name mode uid gid size mtime cksum type \
93                      linkname magic version uname gname devmajor devminor prefix
94
95    foreach x {name type linkname} {
96        set $x [string trim [set $x] "\x00"]
97    }
98    foreach x {uid gid size mtime cksum} {
99        set $x [format %d 0[string trim [set $x] " \x00"]]
100    }
101    set mode [string trim $mode " \x00"]
102
103    if {$magic == "ustar "} {
104        # gnu tar
105        # not fully supported
106        foreach x {uname gname prefix} {
107            set $x [string trim [set $x] "\x00"]
108        }
109        foreach x {devmajor devminor} {
110            set $x [format %d 0[string trim [set $x] " \x00"]]
111        }
112    } elseif {$magic == "ustar\x00"} {
113        # posix tar
114        foreach x {uname gname prefix} {
115            set $x [string trim [set $x] "\x00"]
116        }
117        foreach x {devmajor devminor} {
118            set $x [format %d 0[string trim [set $x] " \x00"]]
119        }
120    } else {
121        # old style tar
122        foreach x {uname gname devmajor devminor prefix} { set $x {} }
123        if {$type == ""} {
124            if {[string match */ $name]} {
125                set type 5
126            } else {
127                set type 0
128            }
129        }
130    }
131
132    return [list name $name mode $mode uid $uid gid $gid size $size mtime $mtime \
133                 cksum $cksum type $type linkname $linkname magic $magic \
134                 version $version uname $uname gname $gname devmajor $devmajor \
135                 devminor $devminor prefix $prefix]
136}
137
138proc ::tar::contents {file args} {
139    set chan 0
140    parseOpts {chan 0} $args
141    if {$chan} {
142	set fh $file
143    } else {
144	set fh [::open $file]
145	fconfigure $fh -encoding binary -translation lf -eofchar {}
146    }
147    set ret {}
148    while {![eof $fh]} {
149        array set header [readHeader [read $fh 512]]
150	HandleLongLink $fh header
151        if {$header(name) == ""} break
152	if {$header(prefix) != ""} {append header(prefix) /}
153        lappend ret $header(prefix)$header(name)
154        seekorskip $fh [expr {$header(size) + [pad $header(size)]}] current
155    }
156    if {!$chan} {
157	close $fh
158    }
159    return $ret
160}
161
162proc ::tar::stat {tar {file {}} args} {
163    set chan 0
164    parseOpts {chan 0} $args
165    if {$chan} {
166	set fh $tar
167    } else {
168	set fh [::open $tar]
169	fconfigure $fh -encoding binary -translation lf -eofchar {}
170    }
171    set ret {}
172    while {![eof $fh]} {
173        array set header [readHeader [read $fh 512]]
174	HandleLongLink $fh header
175        if {$header(name) == ""} break
176	if {$header(prefix) != ""} {append header(prefix) /}
177        seekorskip $fh [expr {$header(size) + [pad $header(size)]}] current
178        if {$file != "" && "$header(prefix)$header(name)" != $file} {continue}
179        set header(type) [string map {0 file 5 directory 3 characterSpecial 4 blockSpecial 6 fifo 2 link} $header(type)]
180        set header(mode) [string range $header(mode) 2 end]
181        lappend ret $header(prefix)$header(name) [list mode $header(mode) uid $header(uid) gid $header(gid) \
182                    size $header(size) mtime $header(mtime) type $header(type) linkname $header(linkname) \
183                    uname $header(uname) gname $header(gname) devmajor $header(devmajor) devminor $header(devminor)]
184    }
185    if {!$chan} {
186	close $fh
187    }
188    return $ret
189}
190
191proc ::tar::get {tar file args} {
192    set chan 0
193    parseOpts {chan 0} $args
194    if {$chan} {
195	set fh $tar
196    } else {
197	set fh [::open $tar]
198	fconfigure $fh -encoding binary -translation lf -eofchar {}
199    }
200    while {![eof $fh]} {
201	set data [read $fh 512]
202        array set header [readHeader $data]
203	HandleLongLink $fh header
204        if {$header(name) eq ""} break
205	if {$header(prefix) ne ""} {append header(prefix) /}
206        set name [string trimleft $header(prefix)$header(name) /]
207        if {$name eq $file} {
208            set file [read $fh $header(size)]
209            if {!$chan} {
210		close $fh
211	    }
212            return $file
213        }
214        seekorskip $fh [expr {$header(size) + [pad $header(size)]}] current
215    }
216    if {!$chan} {
217	close $fh
218    }
219    return -code error -errorcode {TAR MISSING FILE} \
220	"Tar \"$tar\": File \"$file\" not found"
221}
222
223proc ::tar::untar {tar args} {
224    set nooverwrite 0
225    set data 0
226    set nomtime 0
227    set noperms 0
228    set chan 0
229    parseOpts {dir 1 file 1 glob 1 nooverwrite 0 nomtime 0 noperms 0 chan 0} $args
230    if {![info exists dir]} {set dir [pwd]}
231    set pattern *
232    if {[info exists file]} {
233        set pattern [string map {* \\* ? \\? \\ \\\\ \[ \\\[ \] \\\]} $file]
234    } elseif {[info exists glob]} {
235        set pattern $glob
236    }
237
238    set ret {}
239    if {$chan} {
240	set fh $tar
241    } else {
242	set fh [::open $tar]
243	fconfigure $fh -encoding binary -translation lf -eofchar {}
244    }
245    while {![eof $fh]} {
246        array set header [readHeader [read $fh 512]]
247	HandleLongLink $fh header
248        if {$header(name) == ""} break
249	if {$header(prefix) != ""} {append header(prefix) /}
250        set name [string trimleft $header(prefix)$header(name) /]
251        if {![string match $pattern $name] || ($nooverwrite && [file exists $name])} {
252            seekorskip $fh [expr {$header(size) + [pad $header(size)]}] current
253            continue
254        }
255
256        set name [file join $dir $name]
257        if {![file isdirectory [file dirname $name]]} {
258            file mkdir [file dirname $name]
259            lappend ret [file dirname $name] {}
260        }
261        if {[string match {[0346]} $header(type)]} {
262            if {[catch {::open $name w+} new]} {
263                # sometimes if we dont have write permission we can still delete
264                catch {file delete -force $name}
265                set new [::open $name w+]
266            }
267            fconfigure $new -encoding binary -translation lf -eofchar {}
268            fcopy $fh $new -size $header(size)
269            close $new
270            lappend ret $name $header(size)
271        } elseif {$header(type) == 5} {
272            file mkdir $name
273            lappend ret $name {}
274        } elseif {[string match {[12]} $header(type)] && $::tcl_platform(platform) == "unix"} {
275            catch {file delete $name}
276            if {![catch {file link [string map {1 -hard 2 -symbolic} $header(type)] $name $header(linkname)}]} {
277                lappend ret $name {}
278            }
279        }
280        seekorskip $fh [pad $header(size)] current
281        if {![file exists $name]} continue
282
283        if {$::tcl_platform(platform) == "unix"} {
284            if {!$noperms} {
285                catch {file attributes $name -permissions 0[string range $header(mode) 2 end]}
286            }
287            catch {file attributes $name -owner $header(uid) -group $header(gid)}
288            catch {file attributes $name -owner $header(uname) -group $header(gname)}
289        }
290        if {!$nomtime} {
291            file mtime $name $header(mtime)
292        }
293    }
294    if {!$chan} {
295	close $fh
296    }
297    return $ret
298}
299
300##
301 # ::tar::statFile
302 #
303 # Returns stat info about a filesystem object, in the form of an info
304 # dictionary like that returned by ::tar::readHeader.
305 #
306 # The mode, uid, gid, mtime, and type entries are always present.
307 # The size and linkname entries are present if relevant for this type
308 # of object. The uname and gname entries are present if the OS supports
309 # them. No devmajor or devminor entry is present.
310 ##
311
312proc ::tar::statFile {name followlinks} {
313    if {$followlinks} {
314        file stat $name stat
315    } else {
316        file lstat $name stat
317    }
318
319    set ret {}
320
321    if {$::tcl_platform(platform) == "unix"} {
322        lappend ret mode 1[file attributes $name -permissions]
323        lappend ret uname [file attributes $name -owner]
324        lappend ret gname [file attributes $name -group]
325        if {$stat(type) == "link"} {
326            lappend ret linkname [file link $name]
327        }
328    } else {
329        lappend ret mode [lindex {100644 100755} [expr {$stat(type) == "directory"}]]
330    }
331
332    lappend ret  uid $stat(uid)  gid $stat(gid)  mtime $stat(mtime) \
333      type $stat(type)
334
335    if {$stat(type) == "file"} {lappend ret size $stat(size)}
336
337    return $ret
338}
339
340##
341 # ::tar::formatHeader
342 #
343 # Opposite operation to ::tar::readHeader; takes a file name and info
344 # dictionary as arguments, returns a corresponding (POSIX-tar) header.
345 #
346 # The following dictionary entries must be present:
347 #   mode
348 #   type
349 #
350 # The following dictionary entries are used if present, otherwise
351 # the indicated default is used:
352 #   uid       0
353 #   gid       0
354 #   size      0
355 #   mtime     [clock seconds]
356 #   linkname  {}
357 #   uname     {}
358 #   gname     {}
359 #
360 # All other dictionary entries, including devmajor and devminor, are
361 # presently ignored.
362 ##
363
364proc ::tar::formatHeader {name info} {
365    array set A {
366        linkname ""
367        uname ""
368        gname ""
369        size 0
370        gid  0
371        uid  0
372    }
373    set A(mtime) [clock seconds]
374    array set A $info
375    array set A {devmajor "" devminor ""}
376
377    set type [string map {file 0 directory 5 characterSpecial 3 \
378      blockSpecial 4 fifo 6 link 2 socket A} $A(type)]
379
380    set osize  [format %o $A(size)]
381    set ogid   [format %o $A(gid)]
382    set ouid   [format %o $A(uid)]
383    set omtime [format %o $A(mtime)]
384
385    set name [string trimleft $name /]
386    if {[string length $name] > 255} {
387        return -code error -errorcode {TAR BAD PATH LENGTH} \
388	    "path name over 255 chars"
389    } elseif {[string length $name] > 100} {
390	set common [string range $name end-99 154]
391	if {[set splitpoint [string first / $common]] == -1} {
392	    return -code error -errorcode {TAR BAD PATH UNSPLITTABLE} \
393		"path name cannot be split into prefix and name"
394	}
395	set prefix [string range $name 0 end-100][string range $common 0 $splitpoint-1]
396	set name   [string range $common $splitpoint+1 end][string range $name 155 end]
397    } else {
398        set prefix ""
399    }
400
401    set header [binary format a100A8A8A8A12A12A8a1a100A6a2a32a32a8a8a155a12 \
402                              $name $A(mode)\x00 $ouid\x00 $ogid\x00\
403                              $osize\x00 $omtime\x00 {} $type \
404                              $A(linkname) ustar\x00 00 $A(uname) $A(gname)\
405                              $A(devmajor) $A(devminor) $prefix {}]
406
407    binary scan $header c* tmp
408    set cksum 0
409    foreach x $tmp {incr cksum $x}
410
411    return [string replace $header 148 155 [binary format A8 [format %o $cksum]\x00]]
412}
413
414
415proc ::tar::recurseDirs {files followlinks} {
416    foreach x $files {
417        if {[file isdirectory $x] && ([file type $x] != "link" || $followlinks)} {
418            if {[set more [glob -dir $x -nocomplain *]] != ""} {
419                eval lappend files [recurseDirs $more $followlinks]
420            } else {
421                lappend files $x
422            }
423        }
424    }
425    return $files
426}
427
428proc ::tar::writefile {in out followlinks name} {
429     puts -nonewline $out [formatHeader $name [statFile $in $followlinks]]
430     set size 0
431     if {[file type $in] == "file" || ($followlinks && [file type $in] == "link")} {
432         set in [::open $in]
433         fconfigure $in -encoding binary -translation lf -eofchar {}
434         set size [fcopy $in $out]
435         close $in
436     }
437     puts -nonewline $out [string repeat \x00 [pad $size]]
438}
439
440proc ::tar::create {tar files args} {
441    set dereference 0
442    set chan 0
443    parseOpts {dereference 0 chan 0} $args
444
445    if {$chan} {
446	set fh $tar
447    } else {
448	set fh [::open $tar w+]
449	fconfigure $fh -encoding binary -translation lf -eofchar {}
450    }
451    foreach x [recurseDirs $files $dereference] {
452        writefile $x $fh $dereference $x
453    }
454    puts -nonewline $fh [string repeat \x00 1024]
455
456    if {!$chan} {
457	close $fh
458    }
459    return $tar
460}
461
462proc ::tar::add {tar files args} {
463    set dereference 0
464    set prefix ""
465    set quick 0
466    parseOpts {dereference 0 prefix 1 quick 0} $args
467
468    set fh [::open $tar r+]
469    fconfigure $fh -encoding binary -translation lf -eofchar {}
470
471    if {$quick} then {
472        seek $fh -1024 end
473    } else {
474        set data [read $fh 512]
475        while {[regexp {[^\0]} $data]} {
476            array set header [readHeader $data]
477            seek $fh [expr {$header(size) + [pad $header(size)]}] current
478            set data [read $fh 512]
479        }
480        seek $fh -512 current
481    }
482
483    foreach x [recurseDirs $files $dereference] {
484        writefile $x $fh $dereference $prefix$x
485    }
486    puts -nonewline $fh [string repeat \x00 1024]
487
488    close $fh
489    return $tar
490}
491
492proc ::tar::remove {tar files} {
493    set n 0
494    while {[file exists $tar$n.tmp]} {incr n}
495    set tfh [::open $tar$n.tmp w]
496    set fh [::open $tar r]
497
498    fconfigure $fh  -encoding binary -translation lf -eofchar {}
499    fconfigure $tfh -encoding binary -translation lf -eofchar {}
500
501    while {![eof $fh]} {
502        array set header [readHeader [read $fh 512]]
503        if {$header(name) == ""} {
504            puts -nonewline $tfh [string repeat \x00 1024]
505            break
506        }
507	if {$header(prefix) != ""} {append header(prefix) /}
508        set name $header(prefix)$header(name)
509        set len [expr {$header(size) + [pad $header(size)]}]
510        if {[lsearch $files $name] > -1} {
511            seek $fh $len current
512        } else {
513            seek $fh -512 current
514            fcopy $fh $tfh -size [expr {$len + 512}]
515        }
516    }
517
518    close $fh
519    close $tfh
520
521    file rename -force $tar$n.tmp $tar
522}
523
524proc ::tar::HandleLongLink {fh hv} {
525    upvar 1 $hv header thelongname thelongname
526
527    # @LongName Part I.
528    if {$header(type) == "L"} {
529	# Size == Length of name. Read it, and pad to full 512
530	# size.  After that is a regular header for the actual
531	# file, where we have to insert the name. This is handled
532	# by the next iteration and the part II below.
533	set thelongname [string trimright [read $fh $header(size)] \000]
534	seekorskip $fh [pad $header(size)] current
535	return -code continue
536    }
537    # Not supported yet: type 'K' for LongLink (long symbolic links).
538
539    # @LongName, part II, get data from previous entry, if defined.
540    if {[info exists thelongname]} {
541	set header(name) $thelongname
542	# Prevent leakage to further entries.
543	unset thelongname
544    }
545
546    return
547}
548