1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5/*
6Package zip provides support for reading and writing ZIP archives.
7
8See: https://www.pkware.com/appnote
9
10This package does not support disk spanning.
11
12A note about ZIP64:
13
14To be backwards compatible the FileHeader has both 32 and 64 bit Size
15fields. The 64 bit fields will always contain the correct value and
16for normal archives both fields will be the same. For files requiring
17the ZIP64 format the 32 bit fields will be 0xffffffff and the 64 bit
18fields must be used instead.
19*/
20package zip
21
22import (
23	"io/fs"
24	"path"
25	"time"
26)
27
28// Compression methods.
29const (
30	Store   uint16 = 0 // no compression
31	Deflate uint16 = 8 // DEFLATE compressed
32)
33
34const (
35	fileHeaderSignature      = 0x04034b50
36	directoryHeaderSignature = 0x02014b50
37	directoryEndSignature    = 0x06054b50
38	directory64LocSignature  = 0x07064b50
39	directory64EndSignature  = 0x06064b50
40	dataDescriptorSignature  = 0x08074b50 // de-facto standard; required by OS X Finder
41	fileHeaderLen            = 30         // + filename + extra
42	directoryHeaderLen       = 46         // + filename + extra + comment
43	directoryEndLen          = 22         // + comment
44	dataDescriptorLen        = 16         // four uint32: descriptor signature, crc32, compressed size, size
45	dataDescriptor64Len      = 24         // descriptor with 8 byte sizes
46	directory64LocLen        = 20         //
47	directory64EndLen        = 56         // + extra
48
49	// Constants for the first byte in CreatorVersion.
50	creatorFAT    = 0
51	creatorUnix   = 3
52	creatorNTFS   = 11
53	creatorVFAT   = 14
54	creatorMacOSX = 19
55
56	// Version numbers.
57	zipVersion20 = 20 // 2.0
58	zipVersion45 = 45 // 4.5 (reads and writes zip64 archives)
59
60	// Limits for non zip64 files.
61	uint16max = (1 << 16) - 1
62	uint32max = (1 << 32) - 1
63
64	// Extra header IDs.
65	//
66	// IDs 0..31 are reserved for official use by PKWARE.
67	// IDs above that range are defined by third-party vendors.
68	// Since ZIP lacked high precision timestamps (nor a official specification
69	// of the timezone used for the date fields), many competing extra fields
70	// have been invented. Pervasive use effectively makes them "official".
71	//
72	// See http://mdfs.net/Docs/Comp/Archiving/Zip/ExtraField
73	zip64ExtraID       = 0x0001 // Zip64 extended information
74	ntfsExtraID        = 0x000a // NTFS
75	unixExtraID        = 0x000d // UNIX
76	extTimeExtraID     = 0x5455 // Extended timestamp
77	infoZipUnixExtraID = 0x5855 // Info-ZIP Unix extension
78)
79
80// FileHeader describes a file within a zip file.
81// See the zip spec for details.
82type FileHeader struct {
83	// Name is the name of the file.
84	//
85	// It must be a relative path, not start with a drive letter (such as "C:"),
86	// and must use forward slashes instead of back slashes. A trailing slash
87	// indicates that this file is a directory and should have no data.
88	//
89	// When reading zip files, the Name field is populated from
90	// the zip file directly and is not validated for correctness.
91	// It is the caller's responsibility to sanitize it as
92	// appropriate, including canonicalizing slash directions,
93	// validating that paths are relative, and preventing path
94	// traversal through filenames ("../../../").
95	Name string
96
97	// Comment is any arbitrary user-defined string shorter than 64KiB.
98	Comment string
99
100	// NonUTF8 indicates that Name and Comment are not encoded in UTF-8.
101	//
102	// By specification, the only other encoding permitted should be CP-437,
103	// but historically many ZIP readers interpret Name and Comment as whatever
104	// the system's local character encoding happens to be.
105	//
106	// This flag should only be set if the user intends to encode a non-portable
107	// ZIP file for a specific localized region. Otherwise, the Writer
108	// automatically sets the ZIP format's UTF-8 flag for valid UTF-8 strings.
109	NonUTF8 bool
110
111	CreatorVersion uint16
112	ReaderVersion  uint16
113	Flags          uint16
114
115	// Method is the compression method. If zero, Store is used.
116	Method uint16
117
118	// Modified is the modified time of the file.
119	//
120	// When reading, an extended timestamp is preferred over the legacy MS-DOS
121	// date field, and the offset between the times is used as the timezone.
122	// If only the MS-DOS date is present, the timezone is assumed to be UTC.
123	//
124	// When writing, an extended timestamp (which is timezone-agnostic) is
125	// always emitted. The legacy MS-DOS date field is encoded according to the
126	// location of the Modified time.
127	Modified     time.Time
128	ModifiedTime uint16 // Deprecated: Legacy MS-DOS date; use Modified instead.
129	ModifiedDate uint16 // Deprecated: Legacy MS-DOS time; use Modified instead.
130
131	CRC32              uint32
132	CompressedSize     uint32 // Deprecated: Use CompressedSize64 instead.
133	UncompressedSize   uint32 // Deprecated: Use UncompressedSize64 instead.
134	CompressedSize64   uint64
135	UncompressedSize64 uint64
136	Extra              []byte
137	ExternalAttrs      uint32 // Meaning depends on CreatorVersion
138}
139
140// FileInfo returns an fs.FileInfo for the FileHeader.
141func (h *FileHeader) FileInfo() fs.FileInfo {
142	return headerFileInfo{h}
143}
144
145// headerFileInfo implements fs.FileInfo.
146type headerFileInfo struct {
147	fh *FileHeader
148}
149
150func (fi headerFileInfo) Name() string { return path.Base(fi.fh.Name) }
151func (fi headerFileInfo) Size() int64 {
152	if fi.fh.UncompressedSize64 > 0 {
153		return int64(fi.fh.UncompressedSize64)
154	}
155	return int64(fi.fh.UncompressedSize)
156}
157func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() }
158func (fi headerFileInfo) ModTime() time.Time {
159	if fi.fh.Modified.IsZero() {
160		return fi.fh.ModTime()
161	}
162	return fi.fh.Modified.UTC()
163}
164func (fi headerFileInfo) Mode() fs.FileMode { return fi.fh.Mode() }
165func (fi headerFileInfo) Type() fs.FileMode { return fi.fh.Mode().Type() }
166func (fi headerFileInfo) Sys() interface{}  { return fi.fh }
167
168func (fi headerFileInfo) Info() (fs.FileInfo, error) { return fi, nil }
169
170// FileInfoHeader creates a partially-populated FileHeader from an
171// fs.FileInfo.
172// Because fs.FileInfo's Name method returns only the base name of
173// the file it describes, it may be necessary to modify the Name field
174// of the returned header to provide the full path name of the file.
175// If compression is desired, callers should set the FileHeader.Method
176// field; it is unset by default.
177func FileInfoHeader(fi fs.FileInfo) (*FileHeader, error) {
178	size := fi.Size()
179	fh := &FileHeader{
180		Name:               fi.Name(),
181		UncompressedSize64: uint64(size),
182	}
183	fh.SetModTime(fi.ModTime())
184	fh.SetMode(fi.Mode())
185	if fh.UncompressedSize64 > uint32max {
186		fh.UncompressedSize = uint32max
187	} else {
188		fh.UncompressedSize = uint32(fh.UncompressedSize64)
189	}
190	return fh, nil
191}
192
193type directoryEnd struct {
194	diskNbr            uint32 // unused
195	dirDiskNbr         uint32 // unused
196	dirRecordsThisDisk uint64 // unused
197	directoryRecords   uint64
198	directorySize      uint64
199	directoryOffset    uint64 // relative to file
200	commentLen         uint16
201	comment            string
202}
203
204// timeZone returns a *time.Location based on the provided offset.
205// If the offset is non-sensible, then this uses an offset of zero.
206func timeZone(offset time.Duration) *time.Location {
207	const (
208		minOffset   = -12 * time.Hour  // E.g., Baker island at -12:00
209		maxOffset   = +14 * time.Hour  // E.g., Line island at +14:00
210		offsetAlias = 15 * time.Minute // E.g., Nepal at +5:45
211	)
212	offset = offset.Round(offsetAlias)
213	if offset < minOffset || maxOffset < offset {
214		offset = 0
215	}
216	return time.FixedZone("", int(offset/time.Second))
217}
218
219// msDosTimeToTime converts an MS-DOS date and time into a time.Time.
220// The resolution is 2s.
221// See: https://msdn.microsoft.com/en-us/library/ms724247(v=VS.85).aspx
222func msDosTimeToTime(dosDate, dosTime uint16) time.Time {
223	return time.Date(
224		// date bits 0-4: day of month; 5-8: month; 9-15: years since 1980
225		int(dosDate>>9+1980),
226		time.Month(dosDate>>5&0xf),
227		int(dosDate&0x1f),
228
229		// time bits 0-4: second/2; 5-10: minute; 11-15: hour
230		int(dosTime>>11),
231		int(dosTime>>5&0x3f),
232		int(dosTime&0x1f*2),
233		0, // nanoseconds
234
235		time.UTC,
236	)
237}
238
239// timeToMsDosTime converts a time.Time to an MS-DOS date and time.
240// The resolution is 2s.
241// See: https://msdn.microsoft.com/en-us/library/ms724274(v=VS.85).aspx
242func timeToMsDosTime(t time.Time) (fDate uint16, fTime uint16) {
243	fDate = uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9)
244	fTime = uint16(t.Second()/2 + t.Minute()<<5 + t.Hour()<<11)
245	return
246}
247
248// ModTime returns the modification time in UTC using the legacy
249// ModifiedDate and ModifiedTime fields.
250//
251// Deprecated: Use Modified instead.
252func (h *FileHeader) ModTime() time.Time {
253	return msDosTimeToTime(h.ModifiedDate, h.ModifiedTime)
254}
255
256// SetModTime sets the Modified, ModifiedTime, and ModifiedDate fields
257// to the given time in UTC.
258//
259// Deprecated: Use Modified instead.
260func (h *FileHeader) SetModTime(t time.Time) {
261	t = t.UTC() // Convert to UTC for compatibility
262	h.Modified = t
263	h.ModifiedDate, h.ModifiedTime = timeToMsDosTime(t)
264}
265
266const (
267	// Unix constants. The specification doesn't mention them,
268	// but these seem to be the values agreed on by tools.
269	s_IFMT   = 0xf000
270	s_IFSOCK = 0xc000
271	s_IFLNK  = 0xa000
272	s_IFREG  = 0x8000
273	s_IFBLK  = 0x6000
274	s_IFDIR  = 0x4000
275	s_IFCHR  = 0x2000
276	s_IFIFO  = 0x1000
277	s_ISUID  = 0x800
278	s_ISGID  = 0x400
279	s_ISVTX  = 0x200
280
281	msdosDir      = 0x10
282	msdosReadOnly = 0x01
283)
284
285// Mode returns the permission and mode bits for the FileHeader.
286func (h *FileHeader) Mode() (mode fs.FileMode) {
287	switch h.CreatorVersion >> 8 {
288	case creatorUnix, creatorMacOSX:
289		mode = unixModeToFileMode(h.ExternalAttrs >> 16)
290	case creatorNTFS, creatorVFAT, creatorFAT:
291		mode = msdosModeToFileMode(h.ExternalAttrs)
292	}
293	if len(h.Name) > 0 && h.Name[len(h.Name)-1] == '/' {
294		mode |= fs.ModeDir
295	}
296	return mode
297}
298
299// SetMode changes the permission and mode bits for the FileHeader.
300func (h *FileHeader) SetMode(mode fs.FileMode) {
301	h.CreatorVersion = h.CreatorVersion&0xff | creatorUnix<<8
302	h.ExternalAttrs = fileModeToUnixMode(mode) << 16
303
304	// set MSDOS attributes too, as the original zip does.
305	if mode&fs.ModeDir != 0 {
306		h.ExternalAttrs |= msdosDir
307	}
308	if mode&0200 == 0 {
309		h.ExternalAttrs |= msdosReadOnly
310	}
311}
312
313// isZip64 reports whether the file size exceeds the 32 bit limit
314func (h *FileHeader) isZip64() bool {
315	return h.CompressedSize64 >= uint32max || h.UncompressedSize64 >= uint32max
316}
317
318func msdosModeToFileMode(m uint32) (mode fs.FileMode) {
319	if m&msdosDir != 0 {
320		mode = fs.ModeDir | 0777
321	} else {
322		mode = 0666
323	}
324	if m&msdosReadOnly != 0 {
325		mode &^= 0222
326	}
327	return mode
328}
329
330func fileModeToUnixMode(mode fs.FileMode) uint32 {
331	var m uint32
332	switch mode & fs.ModeType {
333	default:
334		m = s_IFREG
335	case fs.ModeDir:
336		m = s_IFDIR
337	case fs.ModeSymlink:
338		m = s_IFLNK
339	case fs.ModeNamedPipe:
340		m = s_IFIFO
341	case fs.ModeSocket:
342		m = s_IFSOCK
343	case fs.ModeDevice:
344		if mode&fs.ModeCharDevice != 0 {
345			m = s_IFCHR
346		} else {
347			m = s_IFBLK
348		}
349	}
350	if mode&fs.ModeSetuid != 0 {
351		m |= s_ISUID
352	}
353	if mode&fs.ModeSetgid != 0 {
354		m |= s_ISGID
355	}
356	if mode&fs.ModeSticky != 0 {
357		m |= s_ISVTX
358	}
359	return m | uint32(mode&0777)
360}
361
362func unixModeToFileMode(m uint32) fs.FileMode {
363	mode := fs.FileMode(m & 0777)
364	switch m & s_IFMT {
365	case s_IFBLK:
366		mode |= fs.ModeDevice
367	case s_IFCHR:
368		mode |= fs.ModeDevice | fs.ModeCharDevice
369	case s_IFDIR:
370		mode |= fs.ModeDir
371	case s_IFIFO:
372		mode |= fs.ModeNamedPipe
373	case s_IFLNK:
374		mode |= fs.ModeSymlink
375	case s_IFREG:
376		// nothing to do
377	case s_IFSOCK:
378		mode |= fs.ModeSocket
379	}
380	if m&s_ISGID != 0 {
381		mode |= fs.ModeSetgid
382	}
383	if m&s_ISUID != 0 {
384		mode |= fs.ModeSetuid
385	}
386	if m&s_ISVTX != 0 {
387		mode |= fs.ModeSticky
388	}
389	return mode
390}
391