1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5/*
6Package zip provides support for reading and writing ZIP archives.
7
8See: https://www.pkware.com/appnote
9
10This package does not support disk spanning.
11
12A note about ZIP64:
13
14To be backwards compatible the FileHeader has both 32 and 64 bit Size
15fields. The 64 bit fields will always contain the correct value and
16for normal archives both fields will be the same. For files requiring
17the ZIP64 format the 32 bit fields will be 0xffffffff and the 64 bit
18fields must be used instead.
19*/
20package zip
21
22import (
23	"os"
24	"path"
25	"time"
26)
27
28// Compression methods.
29const (
30	Store   uint16 = 0 // no compression
31	Deflate uint16 = 8 // DEFLATE compressed
32)
33
34const (
35	fileHeaderSignature      = 0x04034b50
36	directoryHeaderSignature = 0x02014b50
37	directoryEndSignature    = 0x06054b50
38	directory64LocSignature  = 0x07064b50
39	directory64EndSignature  = 0x06064b50
40	dataDescriptorSignature  = 0x08074b50 // de-facto standard; required by OS X Finder
41	fileHeaderLen            = 30         // + filename + extra
42	directoryHeaderLen       = 46         // + filename + extra + comment
43	directoryEndLen          = 22         // + comment
44	dataDescriptorLen        = 16         // four uint32: descriptor signature, crc32, compressed size, size
45	dataDescriptor64Len      = 24         // descriptor with 8 byte sizes
46	directory64LocLen        = 20         //
47	directory64EndLen        = 56         // + extra
48
49	// Constants for the first byte in CreatorVersion.
50	creatorFAT    = 0
51	creatorUnix   = 3
52	creatorNTFS   = 11
53	creatorVFAT   = 14
54	creatorMacOSX = 19
55
56	// Version numbers.
57	zipVersion20 = 20 // 2.0
58	zipVersion45 = 45 // 4.5 (reads and writes zip64 archives)
59
60	// Limits for non zip64 files.
61	uint16max = (1 << 16) - 1
62	uint32max = (1 << 32) - 1
63
64	// Extra header IDs.
65	//
66	// IDs 0..31 are reserved for official use by PKWARE.
67	// IDs above that range are defined by third-party vendors.
68	// Since ZIP lacked high precision timestamps (nor a official specification
69	// of the timezone used for the date fields), many competing extra fields
70	// have been invented. Pervasive use effectively makes them "official".
71	//
72	// See http://mdfs.net/Docs/Comp/Archiving/Zip/ExtraField
73	zip64ExtraID       = 0x0001 // Zip64 extended information
74	ntfsExtraID        = 0x000a // NTFS
75	unixExtraID        = 0x000d // UNIX
76	extTimeExtraID     = 0x5455 // Extended timestamp
77	infoZipUnixExtraID = 0x5855 // Info-ZIP Unix extension
78)
79
80// FileHeader describes a file within a zip file.
81// See the zip spec for details.
82type FileHeader struct {
83	// Name is the name of the file.
84	//
85	// It must be a relative path, not start with a drive letter (such as "C:"),
86	// and must use forward slashes instead of back slashes. A trailing slash
87	// indicates that this file is a directory and should have no data.
88	//
89	// When reading zip files, the Name field is populated from
90	// the zip file directly and is not validated for correctness.
91	// It is the caller's responsibility to sanitize it as
92	// appropriate, including canonicalizing slash directions,
93	// validating that paths are relative, and preventing path
94	// traversal through filenames ("../../../").
95	Name string
96
97	// Comment is any arbitrary user-defined string shorter than 64KiB.
98	Comment string
99
100	// NonUTF8 indicates that Name and Comment are not encoded in UTF-8.
101	//
102	// By specification, the only other encoding permitted should be CP-437,
103	// but historically many ZIP readers interpret Name and Comment as whatever
104	// the system's local character encoding happens to be.
105	//
106	// This flag should only be set if the user intends to encode a non-portable
107	// ZIP file for a specific localized region. Otherwise, the Writer
108	// automatically sets the ZIP format's UTF-8 flag for valid UTF-8 strings.
109	NonUTF8 bool
110
111	CreatorVersion uint16
112	ReaderVersion  uint16
113	Flags          uint16
114
115	// Method is the compression method. If zero, Store is used.
116	Method uint16
117
118	// Modified is the modified time of the file.
119	//
120	// When reading, an extended timestamp is preferred over the legacy MS-DOS
121	// date field, and the offset between the times is used as the timezone.
122	// If only the MS-DOS date is present, the timezone is assumed to be UTC.
123	//
124	// When writing, an extended timestamp (which is timezone-agnostic) is
125	// always emitted. The legacy MS-DOS date field is encoded according to the
126	// location of the Modified time.
127	Modified     time.Time
128	ModifiedTime uint16 // Deprecated: Legacy MS-DOS date; use Modified instead.
129	ModifiedDate uint16 // Deprecated: Legacy MS-DOS time; use Modified instead.
130
131	CRC32              uint32
132	CompressedSize     uint32 // Deprecated: Use CompressedSize64 instead.
133	UncompressedSize   uint32 // Deprecated: Use UncompressedSize64 instead.
134	CompressedSize64   uint64
135	UncompressedSize64 uint64
136	Extra              []byte
137	ExternalAttrs      uint32 // Meaning depends on CreatorVersion
138}
139
140// FileInfo returns an os.FileInfo for the FileHeader.
141func (h *FileHeader) FileInfo() os.FileInfo {
142	return headerFileInfo{h}
143}
144
145// headerFileInfo implements os.FileInfo.
146type headerFileInfo struct {
147	fh *FileHeader
148}
149
150func (fi headerFileInfo) Name() string { return path.Base(fi.fh.Name) }
151func (fi headerFileInfo) Size() int64 {
152	if fi.fh.UncompressedSize64 > 0 {
153		return int64(fi.fh.UncompressedSize64)
154	}
155	return int64(fi.fh.UncompressedSize)
156}
157func (fi headerFileInfo) IsDir() bool        { return fi.Mode().IsDir() }
158func (fi headerFileInfo) ModTime() time.Time { return fi.fh.ModTime() }
159func (fi headerFileInfo) Mode() os.FileMode  { return fi.fh.Mode() }
160func (fi headerFileInfo) Sys() interface{}   { return fi.fh }
161
162// FileInfoHeader creates a partially-populated FileHeader from an
163// os.FileInfo.
164// Because os.FileInfo's Name method returns only the base name of
165// the file it describes, it may be necessary to modify the Name field
166// of the returned header to provide the full path name of the file.
167// If compression is desired, callers should set the FileHeader.Method
168// field; it is unset by default.
169func FileInfoHeader(fi os.FileInfo) (*FileHeader, error) {
170	size := fi.Size()
171	fh := &FileHeader{
172		Name:               fi.Name(),
173		UncompressedSize64: uint64(size),
174	}
175	fh.SetModTime(fi.ModTime())
176	fh.SetMode(fi.Mode())
177	if fh.UncompressedSize64 > uint32max {
178		fh.UncompressedSize = uint32max
179	} else {
180		fh.UncompressedSize = uint32(fh.UncompressedSize64)
181	}
182	return fh, nil
183}
184
185type directoryEnd struct {
186	diskNbr            uint32 // unused
187	dirDiskNbr         uint32 // unused
188	dirRecordsThisDisk uint64 // unused
189	directoryRecords   uint64
190	directorySize      uint64
191	directoryOffset    uint64 // relative to file
192	commentLen         uint16
193	comment            string
194}
195
196// timeZone returns a *time.Location based on the provided offset.
197// If the offset is non-sensible, then this uses an offset of zero.
198func timeZone(offset time.Duration) *time.Location {
199	const (
200		minOffset   = -12 * time.Hour  // E.g., Baker island at -12:00
201		maxOffset   = +14 * time.Hour  // E.g., Line island at +14:00
202		offsetAlias = 15 * time.Minute // E.g., Nepal at +5:45
203	)
204	offset = offset.Round(offsetAlias)
205	if offset < minOffset || maxOffset < offset {
206		offset = 0
207	}
208	return time.FixedZone("", int(offset/time.Second))
209}
210
211// msDosTimeToTime converts an MS-DOS date and time into a time.Time.
212// The resolution is 2s.
213// See: https://msdn.microsoft.com/en-us/library/ms724247(v=VS.85).aspx
214func msDosTimeToTime(dosDate, dosTime uint16) time.Time {
215	return time.Date(
216		// date bits 0-4: day of month; 5-8: month; 9-15: years since 1980
217		int(dosDate>>9+1980),
218		time.Month(dosDate>>5&0xf),
219		int(dosDate&0x1f),
220
221		// time bits 0-4: second/2; 5-10: minute; 11-15: hour
222		int(dosTime>>11),
223		int(dosTime>>5&0x3f),
224		int(dosTime&0x1f*2),
225		0, // nanoseconds
226
227		time.UTC,
228	)
229}
230
231// timeToMsDosTime converts a time.Time to an MS-DOS date and time.
232// The resolution is 2s.
233// See: https://msdn.microsoft.com/en-us/library/ms724274(v=VS.85).aspx
234func timeToMsDosTime(t time.Time) (fDate uint16, fTime uint16) {
235	fDate = uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9)
236	fTime = uint16(t.Second()/2 + t.Minute()<<5 + t.Hour()<<11)
237	return
238}
239
240// ModTime returns the modification time in UTC using the legacy
241// ModifiedDate and ModifiedTime fields.
242//
243// Deprecated: Use Modified instead.
244func (h *FileHeader) ModTime() time.Time {
245	return msDosTimeToTime(h.ModifiedDate, h.ModifiedTime)
246}
247
248// SetModTime sets the Modified, ModifiedTime, and ModifiedDate fields
249// to the given time in UTC.
250//
251// Deprecated: Use Modified instead.
252func (h *FileHeader) SetModTime(t time.Time) {
253	t = t.UTC() // Convert to UTC for compatibility
254	h.Modified = t
255	h.ModifiedDate, h.ModifiedTime = timeToMsDosTime(t)
256}
257
258const (
259	// Unix constants. The specification doesn't mention them,
260	// but these seem to be the values agreed on by tools.
261	s_IFMT   = 0xf000
262	s_IFSOCK = 0xc000
263	s_IFLNK  = 0xa000
264	s_IFREG  = 0x8000
265	s_IFBLK  = 0x6000
266	s_IFDIR  = 0x4000
267	s_IFCHR  = 0x2000
268	s_IFIFO  = 0x1000
269	s_ISUID  = 0x800
270	s_ISGID  = 0x400
271	s_ISVTX  = 0x200
272
273	msdosDir      = 0x10
274	msdosReadOnly = 0x01
275)
276
277// Mode returns the permission and mode bits for the FileHeader.
278func (h *FileHeader) Mode() (mode os.FileMode) {
279	switch h.CreatorVersion >> 8 {
280	case creatorUnix, creatorMacOSX:
281		mode = unixModeToFileMode(h.ExternalAttrs >> 16)
282	case creatorNTFS, creatorVFAT, creatorFAT:
283		mode = msdosModeToFileMode(h.ExternalAttrs)
284	}
285	if len(h.Name) > 0 && h.Name[len(h.Name)-1] == '/' {
286		mode |= os.ModeDir
287	}
288	return mode
289}
290
291// SetMode changes the permission and mode bits for the FileHeader.
292func (h *FileHeader) SetMode(mode os.FileMode) {
293	h.CreatorVersion = h.CreatorVersion&0xff | creatorUnix<<8
294	h.ExternalAttrs = fileModeToUnixMode(mode) << 16
295
296	// set MSDOS attributes too, as the original zip does.
297	if mode&os.ModeDir != 0 {
298		h.ExternalAttrs |= msdosDir
299	}
300	if mode&0200 == 0 {
301		h.ExternalAttrs |= msdosReadOnly
302	}
303}
304
305// isZip64 reports whether the file size exceeds the 32 bit limit
306func (h *FileHeader) isZip64() bool {
307	return h.CompressedSize64 >= uint32max || h.UncompressedSize64 >= uint32max
308}
309
310func msdosModeToFileMode(m uint32) (mode os.FileMode) {
311	if m&msdosDir != 0 {
312		mode = os.ModeDir | 0777
313	} else {
314		mode = 0666
315	}
316	if m&msdosReadOnly != 0 {
317		mode &^= 0222
318	}
319	return mode
320}
321
322func fileModeToUnixMode(mode os.FileMode) uint32 {
323	var m uint32
324	switch mode & os.ModeType {
325	default:
326		m = s_IFREG
327	case os.ModeDir:
328		m = s_IFDIR
329	case os.ModeSymlink:
330		m = s_IFLNK
331	case os.ModeNamedPipe:
332		m = s_IFIFO
333	case os.ModeSocket:
334		m = s_IFSOCK
335	case os.ModeDevice:
336		if mode&os.ModeCharDevice != 0 {
337			m = s_IFCHR
338		} else {
339			m = s_IFBLK
340		}
341	}
342	if mode&os.ModeSetuid != 0 {
343		m |= s_ISUID
344	}
345	if mode&os.ModeSetgid != 0 {
346		m |= s_ISGID
347	}
348	if mode&os.ModeSticky != 0 {
349		m |= s_ISVTX
350	}
351	return m | uint32(mode&0777)
352}
353
354func unixModeToFileMode(m uint32) os.FileMode {
355	mode := os.FileMode(m & 0777)
356	switch m & s_IFMT {
357	case s_IFBLK:
358		mode |= os.ModeDevice
359	case s_IFCHR:
360		mode |= os.ModeDevice | os.ModeCharDevice
361	case s_IFDIR:
362		mode |= os.ModeDir
363	case s_IFIFO:
364		mode |= os.ModeNamedPipe
365	case s_IFLNK:
366		mode |= os.ModeSymlink
367	case s_IFREG:
368		// nothing to do
369	case s_IFSOCK:
370		mode |= os.ModeSocket
371	}
372	if m&s_ISGID != 0 {
373		mode |= os.ModeSetgid
374	}
375	if m&s_ISUID != 0 {
376		mode |= os.ModeSetuid
377	}
378	if m&s_ISVTX != 0 {
379		mode |= os.ModeSticky
380	}
381	return mode
382}
383