1# -*- tcl -*-
2# ### ### ### ######### ######### #########
3## Copyright (c) 2008-2012 ActiveState Software Inc., Andreas Kupries
4##                    2016 Andreas Kupries
5## BSD License
6##
7# Package providing commands for the decoding of basic zip-file
8# structures.
9
10package require Tcl 8.4
11package require fileutil::magic::filetype ; # Tcllib. File type determination via magic constants
12package require fileutil::decode 0.2.1    ; # Framework for easy decoding of files.
13namespace eval ::zipfile::decode {}
14if {[package vcompare $tcl_patchLevel "8.6"] < 0} {
15  # Only needed pre-8.6
16  package require Trf                       ; # Wrapper to zlib
17  package require zlibtcl                   ; # Zlib usage. No commands, access through Trf
18  set ::zipfile::decode::native_zip_functs 0
19} else {
20  set ::zipfile::decode::native_zip_functs 1
21}
22namespace eval ::zipfile::decode {
23    namespace import ::fileutil::decode::*
24}
25
26# ### ### ### ######### ######### #########
27## Convenience command, decode and copy to dir
28
29proc ::zipfile::decode::unzipfile {in out} {
30    zipfile::decode::open  $in
31    set                     zd [zipfile::decode::archive]
32    zipfile::decode::unzip $zd $out
33    zipfile::decode::close
34    return
35}
36
37## Convenience command, decode and return list of contained paths.
38proc ::zipfile::decode::content {in} {
39    zipfile::decode::open $in
40    set zd [zipfile::decode::archive]
41    set f [files $zd]
42    zipfile::decode::close
43    return $f
44}
45
46# ### ### ### ######### ######### #########
47##
48
49proc ::zipfile::decode::iszip {fname} {
50    if {[catch {
51	LocateEnd $fname
52    } msg]} {
53	return 0
54    }
55    return 1
56}
57
58proc ::zipfile::decode::open {fname} {
59    variable eoa
60    if {[catch {
61	set eoa [LocateEnd $fname]
62    } msg]} {
63	Error "\"$fname\" is not a zip file" BAD ARCHIVE
64    }
65    fileutil::decode::open $fname
66    return
67}
68
69proc ::zipfile::decode::close {} {
70    variable eoa
71    unset eoa
72    fileutil::decode::close
73    return
74}
75
76# ### ### ### ######### ######### #########
77##
78
79proc ::zipfile::decode::comment {zdict} {
80    array set _ $zdict
81    return $_(comment)
82}
83
84proc ::zipfile::decode::files {zdict} {
85    array set _ $zdict
86    array set f $_(files)
87    return [array names f]
88}
89
90proc ::zipfile::decode::hasfile {zdict fname} {
91    array set _ $zdict
92    array set f $_(files)
93    return [info exists f($fname)]
94}
95
96proc ::zipfile::decode::copyfile {zdict src dst} {
97    array set _ $zdict
98    array set f $_(files)
99
100    if {![info exists f($src)]} {
101	Error "File \"$src\" not known" BAD PATH
102    }
103
104    array set     fd $f($src)
105    CopyFile $src fd $dst
106    return
107}
108
109proc ::zipfile::decode::getfile {zdict src} {
110    array set _ $zdict
111    array set f $_(files)
112
113    if {![info exists f($src)]} {
114	Error "File \"$src\" not known" BAD PATH
115    }
116
117    array set fd $f($src)
118    return [GetFile $src fd]
119}
120
121proc ::zipfile::decode::unzip {zdict dst} {
122    array set _ $zdict
123    array set f $_(files)
124
125    foreach src [array names f] {
126	array set     fd $f($src)
127	CopyFile $src fd [file join $dst $src]
128
129	unset fd
130    }
131    return
132}
133
134proc ::zipfile::decode::CopyFile {src fdv dst} {
135    upvar 1 $fdv fd
136
137    file mkdir [file dirname $dst]
138
139    if {[string match */ $src]} {
140	# Entry is a directory. Just create.
141	file mkdir $dst
142	return
143    }
144
145    # Create files. Empty files are a special case, we have
146    # nothing to decompress.
147
148    if {$fd(ucsize) == 0} {
149	::close [::open $dst w] ; # touch
150	return
151    }
152
153    # non-empty files, work depends on type of compression.
154
155    switch -exact -- $fd(cm) {
156	uncompressed {
157	    go     $fd(fileloc)
158	    nbytes $fd(csize)
159
160	    set out [::open $dst w]
161	    fconfigure $out -translation binary -encoding binary -eofchar {}
162	    puts -nonewline $out [getval]
163	    ::close $out
164	}
165	deflate {
166	    go     $fd(fileloc)
167	    nbytes $fd(csize)
168
169	    set out [::open $dst w]
170	    fconfigure $out -translation binary -encoding binary -eofchar {}
171            if {$::zipfile::decode::native_zip_functs} {
172              puts -nonewline $out \
173		[zlib inflate [getval]]
174            } else {
175              puts -nonewline $out \
176		[zip -mode decompress -nowrap 1 -- \
177		     [getval]]
178            }
179	    ::close $out
180	}
181	default {
182	    Error "Unable to handle file \"$src\" compressed with method \"$fd(cm)\"" \
183		BAD COMPRESSION
184	}
185    }
186
187    if {
188	($::tcl_platform(platform) ne "windows") &&
189	($fd(efattr) != 0)
190    } {
191	# On unix take the permissions encoded in the external
192	# attributes and apply them to the new file. If there are
193	# permission. A value of 0 indicates an older teabag where
194	# the encoder did not yet support permissions. These we do not
195	# change from the sustem defaults. Permissions are in the
196	# lower 9 bits of the MSW.
197
198	file attributes $dst -permissions \
199	    [string map {0 --- 1 --x 2 -w- 3 -wx 4 r-- 5 r-x 6 rw- 7 rwx} \
200		 [format %03o [expr {($fd(efattr) >> 16) & 0x1ff}]]]
201    }
202
203    # FUTURE: Run crc checksum on created file and compare to the
204    # ......: stored information.
205
206    return
207}
208
209proc ::zipfile::decode::GetFile {src fdv} {
210    # See also CopyFile for similar code.
211    # TODO: Check with CopyFile for refactoring opportunity
212
213    upvar 1 $fdv fd
214
215    # Entry is a directory.
216    if {[string match */ $src]} {return {}}
217
218    # Empty files are a special case, we have
219    # nothing to decompress.
220
221    if {$fd(ucsize) == 0} {return {}}
222
223    # non-empty files, work depends on type of compression.
224
225    switch -exact -- $fd(cm) {
226	uncompressed {
227	    go     $fd(fileloc)
228	    nbytes $fd(csize)
229	    return [getval]
230	}
231	deflate {
232	    go     $fd(fileloc)
233	    nbytes $fd(csize)
234	    return [zip -mode decompress -nowrap 1 -- [getval]]
235	}
236	default {
237	    Error "Unable to handle file \"$src\" compressed with method \"$fd(cm)\"" \
238		BAD COMPRESSION
239	}
240    }
241
242    # FUTURE: Run crc checksum on created file and compare to the
243    # ......: stored information.
244
245    return {}
246}
247
248# ### ### ### ######### ######### #########
249##
250
251proc ::zipfile::decode::tag {etag} {
252    mark
253    long-le
254    return [match 0x${etag}4b50] ; # 'PK x y', little-endian integer.
255}
256
257proc ::zipfile::decode::localfileheader {} {
258    clear
259    putloc @
260    if {![tag 0403]} {clear ; return 0}
261
262    short-le ; unsigned ; recode VER ; put vnte      ; # version needed to extract
263    short-le ; unsigned ;              put gpbf      ; # general purpose bitflag
264    short-le ; unsigned ; recode CM  ; put cm        ; # compression method
265    short-le ; unsigned ;              put lmft      ; # last mod file time
266    short-le ; unsigned ;              put lmfd      ; # last mod file date
267    long-le  ; unsigned ;              put crc       ; # crc32                  | zero's here imply non-seekable,
268    long-le  ; unsigned ;              put csize     ; # compressed file size   | data is in a DDS behind the stored
269    long-le  ; unsigned ;              put ucsize    ; # uncompressed file size | file.
270    short-le ; unsigned ;              put fnamelen  ; # file name length
271    short-le ; unsigned ;              put efieldlen ; # extra field length
272
273    array set hdr [get]
274    clear
275
276    nbytes $hdr(fnamelen) ; put fname
277    putloc                      efieldloc
278    skip $hdr(efieldlen)
279    putloc                      fileloc
280
281    array set hdr [get]
282    clear
283
284    set hdr(gpbf) [GPBF $hdr(gpbf) $hdr(cm)]
285    setbuf [array get hdr]
286    return 1
287}
288
289proc ::zipfile::decode::centralfileheader {} {
290    clear
291    putloc @
292    if {![tag 0201]} {clear ; return 0}
293
294    # The items marked with ++ do not exist in the local file
295    # header. Everything else exists in the local file header as well,
296    # and has to match that information.
297
298    clear
299    short-le ; unsigned ; recode VER ; put vmb         ; # ++ version made by
300    short-le ; unsigned ; recode VER ; put vnte        ; #    version needed to extract
301    short-le ; unsigned ;              put gpbf        ; #    general purpose bitflag
302    short-le ; unsigned ; recode CM  ; put cm          ; #    compression method
303    short-le ; unsigned ;              put lmft        ; #    last mod file time
304    short-le ; unsigned ;              put lmfd        ; #    last mod file date
305    long-le  ; unsigned ;              put crc         ; #    crc32                  | zero's here imply non-seekable,
306    long-le  ; unsigned ;              put csize       ; #    compressed file size   | data is in a DDS behind the stored
307    long-le  ; unsigned ;              put ucsize      ; #    uncompressed file size | file.
308    short-le ; unsigned ;              put fnamelen    ; #    file name length
309    short-le ; unsigned ;              put efieldlen2  ; #    extra field length
310    short-le ; unsigned ;              put fcommentlen ; # ++ file comment length
311    short-le ; unsigned ;              put dns         ; # ++ disk number start
312    short-le ; unsigned ; recode IFA ; put ifattr      ; # ++ internal file attributes
313    long-le  ; unsigned ;              put efattr      ; # ++ external file attributes
314    long-le  ; unsigned ;              put localloc    ; # ++ relative offset of local file header
315
316    array set hdr [get]
317    clear
318
319    nbytes $hdr(fnamelen)    ; put fname
320    putloc                         efieldloc2
321    skip $hdr(efieldlen2)
322    nbytes $hdr(fcommentlen) ; put comment
323
324    array set hdr [get]
325    clear
326
327    set hdr(gpbf) [GPBF $hdr(gpbf) $hdr(cm)]
328    setbuf [array get hdr]
329    return 1
330}
331
332## NOT USED
333proc ::zipfile::decode::datadescriptor {} {
334    if {![tag 0807]} {return 0}
335
336    clear
337    long-le  ; unsigned ; put crc    ; # crc32
338    long-le  ; unsigned ; put csize  ; # compressed file size
339    long-le  ; unsigned ; put ucsize ; # uncompressed file size
340
341    return 1
342}
343
344proc ::zipfile::decode::endcentralfiledir {} {
345    clear
346    putloc ecdloc
347    if {![tag 0605]} {clear ; return 0}
348
349    short-le ; unsigned ; put nd         ; #
350    short-le ; unsigned ; put ndscd      ; #
351    short-le ; unsigned ; put tnecdd     ; #
352    short-le ; unsigned ; put tnecd      ; #
353    long-le  ; unsigned ; put sizecd     ; #
354    long-le  ; unsigned ; put ocd        ; #
355    short-le ; unsigned ; put commentlen ; # archive comment length
356
357    array set hdr [get] ; clear
358
359    nbytes $hdr(commentlen) ; put comment
360
361    array set hdr [get] ; clear
362
363    setbuf [array get hdr]
364    return 1
365}
366
367## NOT USED
368proc ::zipfile::decode::afile {} {
369    if {![localfileheader]} {return 0}
370
371    array set hdr [get]
372    if {($hdr(ucsize) == 0) || ($hdr(csize) > 0)} {
373	# The header entry specifies either
374	# 1. A zero-length file (possibly a directory entry), or
375	# 2. a non-empty file (compressed size > 0).
376	# In both cases we can skip the file contents directly.
377	# In both cases there should be no data descriptor behind
378	# we contents, but we check nevertheless. If there is its
379	# data overrides the current size and crc info.
380
381	skip $hdr(csize)
382
383	if {[datadescriptor]} {
384	    array set hdr [get]
385	    set hdr(ddpresent) 1
386	    setbuf [array get hdr]
387	}
388    } else {
389	Error "Search data descriptor. Not Yet Implemented" INCOMPLETE
390    }
391    return 1
392}
393
394proc ::zipfile::decode::archive {} {
395    variable eoa
396    array set cb $eoa
397
398    # Position us at the beginning of CFH, using the data provided to
399    # us by 'LocateEnd', called during 'open'.
400
401    go [expr {$cb(base) + $cb(coff)}]
402
403    array set fn {}
404
405    set nentries 0
406    while {[centralfileheader]} {
407	array set _ [set data [get]] ; clear
408
409	#parray _
410
411	# Use the information found in the CFH entry to locate and
412	# read the associated LFH. We explicitly remember where we are
413	# in the file because mark/rewind is only one level and the
414	# LFH command already used that up.
415
416	set here [at]
417	go [expr {$cb(base) + $_(localloc)}]
418	if {![localfileheader]} {
419	    ArchiveError "Directory entry without file." DIR WITHOUT FILE
420	}
421
422	array set lh [get] ; clear
423	go $here
424
425	# Compare the information in the CFH entry and associated
426	# LFH. Should match.
427
428	if {![hdrmatch lh _]} {
429	    ArchiveError "File/Dir Header mismatch." HEADER MISMATCH FILE/DIR
430	}
431
432	# Merge local and central data.
433	array set lh $data
434
435	set fn($_(fname)) [array get lh]
436	unset lh _
437	incr nentries
438    }
439
440    if {![endcentralfiledir]} {
441	ArchiveError "Bad closure." BAD CLOSURE
442    }
443
444    array set _ [get] ; clear
445
446    #parray _
447    #puts \#$nentries
448
449    if {$nentries != $_(tnecd)} {
450	ArchiveError "\#Files ($_(tnecd)) does not match \#Actual files ($nentries)" \
451	    MISMATCH COUNTS
452    }
453
454    set _(files) [array get fn]
455    return [array get _]
456}
457
458proc ::zipfile::decode::hdrmatch {lhv chv} {
459    upvar 1 $lhv lh $chv ch
460
461    #puts ______________________________________________
462    #parray lh
463    #parray ch
464
465    foreach key {
466	vnte gpbf cm lmft lmfd fnamelen fname
467    } {
468	if {$lh($key) != $ch($key)} {return 0}
469    }
470
471    if {[lsearch -exact $lh(gpbf) dd] < 0} {
472	# Compare the central and local size information only if the
473	# latter is not provided by a DDS. Which we haven't read.
474	# Because in that case the LFH information is uniformly 0, not
475	# known at the time of writing.
476
477	foreach key {
478	    crc csize ucsize
479	} {
480	    if {$lh($key) != $ch($key)} {return 0}
481	}
482    }
483
484    return 1
485}
486
487
488# ### ### ### ######### ######### #########
489##
490
491proc ::zipfile::decode::IFA {v} {
492    if {$v & 0x1} {
493	return text
494    } else {
495	return binary
496    }
497}
498
499# ### ### ### ######### ######### #########
500##
501
502namespace eval ::zipfile::decode {
503    variable  vhost
504    array set vhost {
505	0  FAT		1  Amiga
506	2  VMS		3  Unix
507	4  VM/CMS	5  Atari
508	6  HPFS		7  Macintosh
509	8  Z-System	9  CP/M
510	10 TOPS-20	11 NTFS
511	12 SMS/QDOS	13 {Acorn RISC OS}
512	14 VFAT		15 MVS
513	16 BeOS		17 Tandem
514    }
515}
516
517proc ::zipfile::decode::VER {v} {
518    variable vhost
519    set u [expr {($v & 0xff00) >> 16}]
520    set l [expr {($v & 0x00ff)}]
521
522    set major [expr {$l / 10}]
523    set minor [expr {$l % 10}]
524
525    return [list $vhost($u) ${major}.$minor]
526}
527
528# ### ### ### ######### ######### #########
529##
530
531namespace eval ::zipfile::decode {
532    variable  cm
533    array set cm {
534	0  uncompressed	1  shrink
535	2  {reduce 1}	3  {reduce 2}
536	4  {reduce 3}	5  {reduce 4}
537	6  implode	7  reserved
538	8  deflate	9  reserved
539	10 implode-pkware-dcl
540    }
541}
542
543proc ::zipfile::decode::CM {v} {
544    variable cm
545    return $cm($v)
546}
547
548# ### ### ### ######### ######### #########
549##
550
551namespace eval ::zipfile::decode {
552    variable  gbits
553    array set gbits {
554	0,1         encrypted
555	1,0,implode 4k-window
556	1,1,implode 8k-window
557	2,0,implode 2fano
558	2,1,implode 3fano
559	3,1         dd
560	5,1         patched
561
562	deflate,0 normal
563	deflate,1 maximum
564	deflate,2 fast
565	deflate,3 superfast
566   }
567}
568
569proc ::zipfile::decode::GPBF {v cm} {
570    variable gbits
571    set res {}
572
573    if {$cm eq "deflate"} {
574	# bit 1, 2 are treated together for deflate
575
576	lappend res $gbits($cm,[expr {($v >> 1) & 0x3}])
577    }
578
579    set bit 0
580    while {$v > 0} {
581	set odd [expr {$v % 2 == 1}]
582	if {[info exists gbits($bit,$odd,$cm)]} {
583	    lappend res $gbits($bit,$odd,$cm)
584	} elseif {[info exists gbits($bit,$odd)]} {
585	    lappend res $gbits($bit,$odd)
586	}
587	set v [expr {$v >> 1}]
588	incr bit
589    }
590
591    return $res
592}
593
594# ### ### ### ######### ######### #########
595
596proc ::zipfile::decode::ArchiveError {msg args} {
597    # Inlined "Error" -- Avoided eval/linsert dance
598    set code [linsert $args 0 ZIP DECODE BAD ARCHIVE]
599    return -code error -errorcode $code  "Bad zip file. $msg"
600}
601
602proc ::zipfile::decode::Error {msg args} {
603    set code [linsert $args 0 ZIP DECODE]
604    return -code error -errorcode $code $msg
605}
606
607# ### ### ### ######### ######### #########
608
609## Decode the zip file by locating its end (of the central file
610## header). The higher levels will then use the information
611## inside to locate and read the CFH. No scanning from the beginning
612## This piece of code lifted from tclvs/library/zipvfs (v 1.0.3).
613
614proc ::zipfile::decode::LocateEnd {path} {
615    set fd [::open $path r]
616    fconfigure $fd -translation binary ;#-buffering none
617
618    array set cb {}
619
620    # [SF Tclvfs Bug 1003574]. Do not seek over beginning of file.
621    seek $fd 0 end
622
623    # Just looking in the last 512 bytes may be enough to handle zip
624    # archives without comments, however for archives which have
625    # comments the chunk may start at an arbitrary distance from the
626    # end of the file. So if we do not find the header immediately we
627    # have to extend the range of our search, possibly until we have a
628    # large part of the archive in memory. We can fail only after the
629    # whole file has been searched.
630
631    set sz  [tell $fd]
632    set len 512
633    set at  512
634    while {1} {
635	if {$sz < $at} {set n -$sz} else {set n -$at}
636
637	seek $fd $n end
638	set hdr [read $fd $len]
639
640	# We are using 'string last' as we are searching the first
641	# from the end, which is the last from the beginning. See [SF
642	# Bug 2256740]. A zip archive stored in a zip archive can
643	# confuse the unmodified code, triggering on the magic
644	# sequence for the inner, uncompressed archive.
645
646	set pos [string last "PK\05\06" $hdr]
647	if {$pos == -1} {
648	    if {$at >= $sz} {
649		ArchiveError "No header found" HEADER MISSING
650	    }
651
652	    # after the 1st iteration we force an overlap with last
653	    # buffer to ensure that the pattern we look for is not
654	    # split at a buffer boundary, nor the header itself
655
656	    set len 540
657	    incr at 512
658	} else {
659	    break
660	}
661    }
662
663    set hdrlen [string length $hdr]
664    set hdr    [string range $hdr [expr {$pos + 4}] [expr {$pos + 21}]]
665    set pos    [expr {wide([tell $fd]) + $pos - $hdrlen}]
666
667    if {$pos < 0} {
668	set pos 0
669    }
670
671    binary scan $hdr ssssiis _ _ _ _ cb(csize) cb(coff) _
672
673    # Compute base for situations where ZIP file has been appended to
674    # another media (e.g. EXE). We can do this because
675    # (a) The expected location is stored in ECFH.   (-> cb(coff))
676    # (b) We know the actual location of EFCH.       (-> pos)
677    # (c) We know the size of CFH                    (-> cb(csize))
678    # (d) The CFH comes directly before the EFCH.
679    # (e) Items b...d provide us with the actual location of CFH, as (b)-(c).
680    # Thus the difference between (e) and (d) is the base in question.
681
682    set base [expr { $pos - $cb(csize) - $cb(coff) }]
683    if {$base < 0} {
684        set base 0
685    }
686    set cb(base) $base
687
688    if {$cb(coff) < 0} {
689	set cb(base) [expr {wide($cb(base)) - 4294967296}]
690	set cb(coff) [expr {wide($cb(coff)) + 4294967296}]
691    }
692
693    #--------------
694    ::close $fd
695    return [array get cb]
696}
697
698# ### ### ### ######### ######### #########
699## Ready
700package provide zipfile::decode 0.7.1
701return
702