1# -*- tcl -*- 2# ### ### ### ######### ######### ######### 3## Copyright (c) 2008-2012 ActiveState Software Inc., Andreas Kupries 4## 2016 Andreas Kupries 5## BSD License 6## 7# Package providing commands for the decoding of basic zip-file 8# structures. 9 10package require Tcl 8.4 11package require fileutil::magic::filetype ; # Tcllib. File type determination via magic constants 12package require fileutil::decode 0.2.1 ; # Framework for easy decoding of files. 13namespace eval ::zipfile::decode {} 14if {[package vcompare $tcl_patchLevel "8.6"] < 0} { 15 # Only needed pre-8.6 16 package require Trf ; # Wrapper to zlib 17 package require zlibtcl ; # Zlib usage. No commands, access through Trf 18 set ::zipfile::decode::native_zip_functs 0 19} else { 20 set ::zipfile::decode::native_zip_functs 1 21} 22namespace eval ::zipfile::decode { 23 namespace import ::fileutil::decode::* 24} 25 26# ### ### ### ######### ######### ######### 27## Convenience command, decode and copy to dir 28 29proc ::zipfile::decode::unzipfile {in out} { 30 zipfile::decode::open $in 31 set zd [zipfile::decode::archive] 32 zipfile::decode::unzip $zd $out 33 zipfile::decode::close 34 return 35} 36 37## Convenience command, decode and return list of contained paths. 38proc ::zipfile::decode::content {in} { 39 zipfile::decode::open $in 40 set zd [zipfile::decode::archive] 41 set f [files $zd] 42 zipfile::decode::close 43 return $f 44} 45 46# ### ### ### ######### ######### ######### 47## 48 49proc ::zipfile::decode::iszip {fname} { 50 if {[catch { 51 LocateEnd $fname 52 } msg]} { 53 return 0 54 } 55 return 1 56} 57 58proc ::zipfile::decode::open {fname} { 59 variable eoa 60 if {[catch { 61 set eoa [LocateEnd $fname] 62 } msg]} { 63 Error "\"$fname\" is not a zip file" BAD ARCHIVE 64 } 65 fileutil::decode::open $fname 66 return 67} 68 69proc ::zipfile::decode::close {} { 70 variable eoa 71 unset eoa 72 fileutil::decode::close 73 return 74} 75 76# ### ### ### ######### ######### ######### 77## 78 79proc ::zipfile::decode::comment {zdict} { 80 array set _ $zdict 81 return $_(comment) 82} 83 84proc ::zipfile::decode::files {zdict} { 85 array set _ $zdict 86 array set f $_(files) 87 return [array names f] 88} 89 90proc ::zipfile::decode::hasfile {zdict fname} { 91 array set _ $zdict 92 array set f $_(files) 93 return [info exists f($fname)] 94} 95 96proc ::zipfile::decode::copyfile {zdict src dst} { 97 array set _ $zdict 98 array set f $_(files) 99 100 if {![info exists f($src)]} { 101 Error "File \"$src\" not known" BAD PATH 102 } 103 104 array set fd $f($src) 105 CopyFile $src fd $dst 106 return 107} 108 109proc ::zipfile::decode::getfile {zdict src} { 110 array set _ $zdict 111 array set f $_(files) 112 113 if {![info exists f($src)]} { 114 Error "File \"$src\" not known" BAD PATH 115 } 116 117 array set fd $f($src) 118 return [GetFile $src fd] 119} 120 121proc ::zipfile::decode::unzip {zdict dst} { 122 array set _ $zdict 123 array set f $_(files) 124 125 foreach src [array names f] { 126 array set fd $f($src) 127 CopyFile $src fd [file join $dst $src] 128 129 unset fd 130 } 131 return 132} 133 134proc ::zipfile::decode::CopyFile {src fdv dst} { 135 upvar 1 $fdv fd 136 137 file mkdir [file dirname $dst] 138 139 if {[string match */ $src]} { 140 # Entry is a directory. Just create. 141 file mkdir $dst 142 return 143 } 144 145 # Create files. Empty files are a special case, we have 146 # nothing to decompress. 147 148 if {$fd(ucsize) == 0} { 149 ::close [::open $dst w] ; # touch 150 return 151 } 152 153 # non-empty files, work depends on type of compression. 154 155 switch -exact -- $fd(cm) { 156 uncompressed { 157 go $fd(fileloc) 158 nbytes $fd(csize) 159 160 set out [::open $dst w] 161 fconfigure $out -translation binary -encoding binary -eofchar {} 162 puts -nonewline $out [getval] 163 ::close $out 164 } 165 deflate { 166 go $fd(fileloc) 167 nbytes $fd(csize) 168 169 set out [::open $dst w] 170 fconfigure $out -translation binary -encoding binary -eofchar {} 171 if {$::zipfile::decode::native_zip_functs} { 172 puts -nonewline $out \ 173 [zlib inflate [getval]] 174 } else { 175 puts -nonewline $out \ 176 [zip -mode decompress -nowrap 1 -- \ 177 [getval]] 178 } 179 ::close $out 180 } 181 default { 182 Error "Unable to handle file \"$src\" compressed with method \"$fd(cm)\"" \ 183 BAD COMPRESSION 184 } 185 } 186 187 if { 188 ($::tcl_platform(platform) ne "windows") && 189 ($fd(efattr) != 0) 190 } { 191 # On unix take the permissions encoded in the external 192 # attributes and apply them to the new file. If there are 193 # permission. A value of 0 indicates an older teabag where 194 # the encoder did not yet support permissions. These we do not 195 # change from the sustem defaults. Permissions are in the 196 # lower 9 bits of the MSW. 197 198 file attributes $dst -permissions \ 199 [string map {0 --- 1 --x 2 -w- 3 -wx 4 r-- 5 r-x 6 rw- 7 rwx} \ 200 [format %03o [expr {($fd(efattr) >> 16) & 0x1ff}]]] 201 } 202 203 # FUTURE: Run crc checksum on created file and compare to the 204 # ......: stored information. 205 206 return 207} 208 209proc ::zipfile::decode::GetFile {src fdv} { 210 # See also CopyFile for similar code. 211 # TODO: Check with CopyFile for refactoring opportunity 212 213 upvar 1 $fdv fd 214 215 # Entry is a directory. 216 if {[string match */ $src]} {return {}} 217 218 # Empty files are a special case, we have 219 # nothing to decompress. 220 221 if {$fd(ucsize) == 0} {return {}} 222 223 # non-empty files, work depends on type of compression. 224 225 switch -exact -- $fd(cm) { 226 uncompressed { 227 go $fd(fileloc) 228 nbytes $fd(csize) 229 return [getval] 230 } 231 deflate { 232 go $fd(fileloc) 233 nbytes $fd(csize) 234 return [zip -mode decompress -nowrap 1 -- [getval]] 235 } 236 default { 237 Error "Unable to handle file \"$src\" compressed with method \"$fd(cm)\"" \ 238 BAD COMPRESSION 239 } 240 } 241 242 # FUTURE: Run crc checksum on created file and compare to the 243 # ......: stored information. 244 245 return {} 246} 247 248# ### ### ### ######### ######### ######### 249## 250 251proc ::zipfile::decode::tag {etag} { 252 mark 253 long-le 254 return [match 0x${etag}4b50] ; # 'PK x y', little-endian integer. 255} 256 257proc ::zipfile::decode::localfileheader {} { 258 clear 259 putloc @ 260 if {![tag 0403]} {clear ; return 0} 261 262 short-le ; unsigned ; recode VER ; put vnte ; # version needed to extract 263 short-le ; unsigned ; put gpbf ; # general purpose bitflag 264 short-le ; unsigned ; recode CM ; put cm ; # compression method 265 short-le ; unsigned ; put lmft ; # last mod file time 266 short-le ; unsigned ; put lmfd ; # last mod file date 267 long-le ; unsigned ; put crc ; # crc32 | zero's here imply non-seekable, 268 long-le ; unsigned ; put csize ; # compressed file size | data is in a DDS behind the stored 269 long-le ; unsigned ; put ucsize ; # uncompressed file size | file. 270 short-le ; unsigned ; put fnamelen ; # file name length 271 short-le ; unsigned ; put efieldlen ; # extra field length 272 273 array set hdr [get] 274 clear 275 276 nbytes $hdr(fnamelen) ; put fname 277 putloc efieldloc 278 skip $hdr(efieldlen) 279 putloc fileloc 280 281 array set hdr [get] 282 clear 283 284 set hdr(gpbf) [GPBF $hdr(gpbf) $hdr(cm)] 285 setbuf [array get hdr] 286 return 1 287} 288 289proc ::zipfile::decode::centralfileheader {} { 290 clear 291 putloc @ 292 if {![tag 0201]} {clear ; return 0} 293 294 # The items marked with ++ do not exist in the local file 295 # header. Everything else exists in the local file header as well, 296 # and has to match that information. 297 298 clear 299 short-le ; unsigned ; recode VER ; put vmb ; # ++ version made by 300 short-le ; unsigned ; recode VER ; put vnte ; # version needed to extract 301 short-le ; unsigned ; put gpbf ; # general purpose bitflag 302 short-le ; unsigned ; recode CM ; put cm ; # compression method 303 short-le ; unsigned ; put lmft ; # last mod file time 304 short-le ; unsigned ; put lmfd ; # last mod file date 305 long-le ; unsigned ; put crc ; # crc32 | zero's here imply non-seekable, 306 long-le ; unsigned ; put csize ; # compressed file size | data is in a DDS behind the stored 307 long-le ; unsigned ; put ucsize ; # uncompressed file size | file. 308 short-le ; unsigned ; put fnamelen ; # file name length 309 short-le ; unsigned ; put efieldlen2 ; # extra field length 310 short-le ; unsigned ; put fcommentlen ; # ++ file comment length 311 short-le ; unsigned ; put dns ; # ++ disk number start 312 short-le ; unsigned ; recode IFA ; put ifattr ; # ++ internal file attributes 313 long-le ; unsigned ; put efattr ; # ++ external file attributes 314 long-le ; unsigned ; put localloc ; # ++ relative offset of local file header 315 316 array set hdr [get] 317 clear 318 319 nbytes $hdr(fnamelen) ; put fname 320 putloc efieldloc2 321 skip $hdr(efieldlen2) 322 nbytes $hdr(fcommentlen) ; put comment 323 324 array set hdr [get] 325 clear 326 327 set hdr(gpbf) [GPBF $hdr(gpbf) $hdr(cm)] 328 setbuf [array get hdr] 329 return 1 330} 331 332## NOT USED 333proc ::zipfile::decode::datadescriptor {} { 334 if {![tag 0807]} {return 0} 335 336 clear 337 long-le ; unsigned ; put crc ; # crc32 338 long-le ; unsigned ; put csize ; # compressed file size 339 long-le ; unsigned ; put ucsize ; # uncompressed file size 340 341 return 1 342} 343 344proc ::zipfile::decode::endcentralfiledir {} { 345 clear 346 putloc ecdloc 347 if {![tag 0605]} {clear ; return 0} 348 349 short-le ; unsigned ; put nd ; # 350 short-le ; unsigned ; put ndscd ; # 351 short-le ; unsigned ; put tnecdd ; # 352 short-le ; unsigned ; put tnecd ; # 353 long-le ; unsigned ; put sizecd ; # 354 long-le ; unsigned ; put ocd ; # 355 short-le ; unsigned ; put commentlen ; # archive comment length 356 357 array set hdr [get] ; clear 358 359 nbytes $hdr(commentlen) ; put comment 360 361 array set hdr [get] ; clear 362 363 setbuf [array get hdr] 364 return 1 365} 366 367## NOT USED 368proc ::zipfile::decode::afile {} { 369 if {![localfileheader]} {return 0} 370 371 array set hdr [get] 372 if {($hdr(ucsize) == 0) || ($hdr(csize) > 0)} { 373 # The header entry specifies either 374 # 1. A zero-length file (possibly a directory entry), or 375 # 2. a non-empty file (compressed size > 0). 376 # In both cases we can skip the file contents directly. 377 # In both cases there should be no data descriptor behind 378 # we contents, but we check nevertheless. If there is its 379 # data overrides the current size and crc info. 380 381 skip $hdr(csize) 382 383 if {[datadescriptor]} { 384 array set hdr [get] 385 set hdr(ddpresent) 1 386 setbuf [array get hdr] 387 } 388 } else { 389 Error "Search data descriptor. Not Yet Implemented" INCOMPLETE 390 } 391 return 1 392} 393 394proc ::zipfile::decode::archive {} { 395 variable eoa 396 array set cb $eoa 397 398 # Position us at the beginning of CFH, using the data provided to 399 # us by 'LocateEnd', called during 'open'. 400 401 go [expr {$cb(base) + $cb(coff)}] 402 403 array set fn {} 404 405 set nentries 0 406 while {[centralfileheader]} { 407 array set _ [set data [get]] ; clear 408 409 #parray _ 410 411 # Use the information found in the CFH entry to locate and 412 # read the associated LFH. We explicitly remember where we are 413 # in the file because mark/rewind is only one level and the 414 # LFH command already used that up. 415 416 set here [at] 417 go [expr {$cb(base) + $_(localloc)}] 418 if {![localfileheader]} { 419 ArchiveError "Directory entry without file." DIR WITHOUT FILE 420 } 421 422 array set lh [get] ; clear 423 go $here 424 425 # Compare the information in the CFH entry and associated 426 # LFH. Should match. 427 428 if {![hdrmatch lh _]} { 429 ArchiveError "File/Dir Header mismatch." HEADER MISMATCH FILE/DIR 430 } 431 432 # Merge local and central data. 433 array set lh $data 434 435 set fn($_(fname)) [array get lh] 436 unset lh _ 437 incr nentries 438 } 439 440 if {![endcentralfiledir]} { 441 ArchiveError "Bad closure." BAD CLOSURE 442 } 443 444 array set _ [get] ; clear 445 446 #parray _ 447 #puts \#$nentries 448 449 if {$nentries != $_(tnecd)} { 450 ArchiveError "\#Files ($_(tnecd)) does not match \#Actual files ($nentries)" \ 451 MISMATCH COUNTS 452 } 453 454 set _(files) [array get fn] 455 return [array get _] 456} 457 458proc ::zipfile::decode::hdrmatch {lhv chv} { 459 upvar 1 $lhv lh $chv ch 460 461 #puts ______________________________________________ 462 #parray lh 463 #parray ch 464 465 foreach key { 466 vnte gpbf cm lmft lmfd fnamelen fname 467 } { 468 if {$lh($key) != $ch($key)} {return 0} 469 } 470 471 if {[lsearch -exact $lh(gpbf) dd] < 0} { 472 # Compare the central and local size information only if the 473 # latter is not provided by a DDS. Which we haven't read. 474 # Because in that case the LFH information is uniformly 0, not 475 # known at the time of writing. 476 477 foreach key { 478 crc csize ucsize 479 } { 480 if {$lh($key) != $ch($key)} {return 0} 481 } 482 } 483 484 return 1 485} 486 487 488# ### ### ### ######### ######### ######### 489## 490 491proc ::zipfile::decode::IFA {v} { 492 if {$v & 0x1} { 493 return text 494 } else { 495 return binary 496 } 497} 498 499# ### ### ### ######### ######### ######### 500## 501 502namespace eval ::zipfile::decode { 503 variable vhost 504 array set vhost { 505 0 FAT 1 Amiga 506 2 VMS 3 Unix 507 4 VM/CMS 5 Atari 508 6 HPFS 7 Macintosh 509 8 Z-System 9 CP/M 510 10 TOPS-20 11 NTFS 511 12 SMS/QDOS 13 {Acorn RISC OS} 512 14 VFAT 15 MVS 513 16 BeOS 17 Tandem 514 } 515} 516 517proc ::zipfile::decode::VER {v} { 518 variable vhost 519 set u [expr {($v & 0xff00) >> 16}] 520 set l [expr {($v & 0x00ff)}] 521 522 set major [expr {$l / 10}] 523 set minor [expr {$l % 10}] 524 525 return [list $vhost($u) ${major}.$minor] 526} 527 528# ### ### ### ######### ######### ######### 529## 530 531namespace eval ::zipfile::decode { 532 variable cm 533 array set cm { 534 0 uncompressed 1 shrink 535 2 {reduce 1} 3 {reduce 2} 536 4 {reduce 3} 5 {reduce 4} 537 6 implode 7 reserved 538 8 deflate 9 reserved 539 10 implode-pkware-dcl 540 } 541} 542 543proc ::zipfile::decode::CM {v} { 544 variable cm 545 return $cm($v) 546} 547 548# ### ### ### ######### ######### ######### 549## 550 551namespace eval ::zipfile::decode { 552 variable gbits 553 array set gbits { 554 0,1 encrypted 555 1,0,implode 4k-window 556 1,1,implode 8k-window 557 2,0,implode 2fano 558 2,1,implode 3fano 559 3,1 dd 560 5,1 patched 561 562 deflate,0 normal 563 deflate,1 maximum 564 deflate,2 fast 565 deflate,3 superfast 566 } 567} 568 569proc ::zipfile::decode::GPBF {v cm} { 570 variable gbits 571 set res {} 572 573 if {$cm eq "deflate"} { 574 # bit 1, 2 are treated together for deflate 575 576 lappend res $gbits($cm,[expr {($v >> 1) & 0x3}]) 577 } 578 579 set bit 0 580 while {$v > 0} { 581 set odd [expr {$v % 2 == 1}] 582 if {[info exists gbits($bit,$odd,$cm)]} { 583 lappend res $gbits($bit,$odd,$cm) 584 } elseif {[info exists gbits($bit,$odd)]} { 585 lappend res $gbits($bit,$odd) 586 } 587 set v [expr {$v >> 1}] 588 incr bit 589 } 590 591 return $res 592} 593 594# ### ### ### ######### ######### ######### 595 596proc ::zipfile::decode::ArchiveError {msg args} { 597 # Inlined "Error" -- Avoided eval/linsert dance 598 set code [linsert $args 0 ZIP DECODE BAD ARCHIVE] 599 return -code error -errorcode $code "Bad zip file. $msg" 600} 601 602proc ::zipfile::decode::Error {msg args} { 603 set code [linsert $args 0 ZIP DECODE] 604 return -code error -errorcode $code $msg 605} 606 607# ### ### ### ######### ######### ######### 608 609## Decode the zip file by locating its end (of the central file 610## header). The higher levels will then use the information 611## inside to locate and read the CFH. No scanning from the beginning 612## This piece of code lifted from tclvs/library/zipvfs (v 1.0.3). 613 614proc ::zipfile::decode::LocateEnd {path} { 615 set fd [::open $path r] 616 fconfigure $fd -translation binary ;#-buffering none 617 618 array set cb {} 619 620 # [SF Tclvfs Bug 1003574]. Do not seek over beginning of file. 621 seek $fd 0 end 622 623 # Just looking in the last 512 bytes may be enough to handle zip 624 # archives without comments, however for archives which have 625 # comments the chunk may start at an arbitrary distance from the 626 # end of the file. So if we do not find the header immediately we 627 # have to extend the range of our search, possibly until we have a 628 # large part of the archive in memory. We can fail only after the 629 # whole file has been searched. 630 631 set sz [tell $fd] 632 set len 512 633 set at 512 634 while {1} { 635 if {$sz < $at} {set n -$sz} else {set n -$at} 636 637 seek $fd $n end 638 set hdr [read $fd $len] 639 640 # We are using 'string last' as we are searching the first 641 # from the end, which is the last from the beginning. See [SF 642 # Bug 2256740]. A zip archive stored in a zip archive can 643 # confuse the unmodified code, triggering on the magic 644 # sequence for the inner, uncompressed archive. 645 646 set pos [string last "PK\05\06" $hdr] 647 if {$pos == -1} { 648 if {$at >= $sz} { 649 ArchiveError "No header found" HEADER MISSING 650 } 651 652 # after the 1st iteration we force an overlap with last 653 # buffer to ensure that the pattern we look for is not 654 # split at a buffer boundary, nor the header itself 655 656 set len 540 657 incr at 512 658 } else { 659 break 660 } 661 } 662 663 set hdrlen [string length $hdr] 664 set hdr [string range $hdr [expr {$pos + 4}] [expr {$pos + 21}]] 665 set pos [expr {wide([tell $fd]) + $pos - $hdrlen}] 666 667 if {$pos < 0} { 668 set pos 0 669 } 670 671 binary scan $hdr ssssiis _ _ _ _ cb(csize) cb(coff) _ 672 673 # Compute base for situations where ZIP file has been appended to 674 # another media (e.g. EXE). We can do this because 675 # (a) The expected location is stored in ECFH. (-> cb(coff)) 676 # (b) We know the actual location of EFCH. (-> pos) 677 # (c) We know the size of CFH (-> cb(csize)) 678 # (d) The CFH comes directly before the EFCH. 679 # (e) Items b...d provide us with the actual location of CFH, as (b)-(c). 680 # Thus the difference between (e) and (d) is the base in question. 681 682 set base [expr { $pos - $cb(csize) - $cb(coff) }] 683 if {$base < 0} { 684 set base 0 685 } 686 set cb(base) $base 687 688 if {$cb(coff) < 0} { 689 set cb(base) [expr {wide($cb(base)) - 4294967296}] 690 set cb(coff) [expr {wide($cb(coff)) + 4294967296}] 691 } 692 693 #-------------- 694 ::close $fd 695 return [array get cb] 696} 697 698# ### ### ### ######### ######### ######### 699## Ready 700package provide zipfile::decode 0.7.1 701return 702