1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package zip
6
7import (
8	"bufio"
9	"encoding/binary"
10	"errors"
11	"hash"
12	"hash/crc32"
13	"io"
14	"unicode/utf8"
15)
16
17var (
18	errLongName  = errors.New("zip: FileHeader.Name too long")
19	errLongExtra = errors.New("zip: FileHeader.Extra too long")
20)
21
22// Writer implements a zip file writer.
23type Writer struct {
24	cw          *countWriter
25	dir         []*header
26	last        *fileWriter
27	closed      bool
28	compressors map[uint16]Compressor
29	comment     string
30
31	// testHookCloseSizeOffset if non-nil is called with the size
32	// of offset of the central directory at Close.
33	testHookCloseSizeOffset func(size, offset uint64)
34}
35
36type header struct {
37	*FileHeader
38	offset uint64
39}
40
41// NewWriter returns a new Writer writing a zip file to w.
42func NewWriter(w io.Writer) *Writer {
43	return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}}
44}
45
46// SetOffset sets the offset of the beginning of the zip data within the
47// underlying writer. It should be used when the zip data is appended to an
48// existing file, such as a binary executable.
49// It must be called before any data is written.
50func (w *Writer) SetOffset(n int64) {
51	if w.cw.count != 0 {
52		panic("zip: SetOffset called after data was written")
53	}
54	w.cw.count = n
55}
56
57// Flush flushes any buffered data to the underlying writer.
58// Calling Flush is not normally necessary; calling Close is sufficient.
59func (w *Writer) Flush() error {
60	return w.cw.w.(*bufio.Writer).Flush()
61}
62
63// SetComment sets the end-of-central-directory comment field.
64// It can only be called before Close.
65func (w *Writer) SetComment(comment string) error {
66	if len(comment) > uint16max {
67		return errors.New("zip: Writer.Comment too long")
68	}
69	w.comment = comment
70	return nil
71}
72
73// Close finishes writing the zip file by writing the central directory.
74// It does not (and cannot) close the underlying writer.
75func (w *Writer) Close() error {
76	if w.last != nil && !w.last.closed {
77		if err := w.last.close(); err != nil {
78			return err
79		}
80		w.last = nil
81	}
82	if w.closed {
83		return errors.New("zip: writer closed twice")
84	}
85	w.closed = true
86
87	// write central directory
88	start := w.cw.count
89	for _, h := range w.dir {
90		var buf [directoryHeaderLen]byte
91		b := writeBuf(buf[:])
92		b.uint32(uint32(directoryHeaderSignature))
93		b.uint16(h.CreatorVersion)
94		b.uint16(h.ReaderVersion)
95		b.uint16(h.Flags)
96		b.uint16(h.Method)
97		b.uint16(h.ModifiedTime)
98		b.uint16(h.ModifiedDate)
99		b.uint32(h.CRC32)
100		if h.isZip64() || h.offset >= uint32max {
101			// the file needs a zip64 header. store maxint in both
102			// 32 bit size fields (and offset later) to signal that the
103			// zip64 extra header should be used.
104			b.uint32(uint32max) // compressed size
105			b.uint32(uint32max) // uncompressed size
106
107			// append a zip64 extra block to Extra
108			var buf [28]byte // 2x uint16 + 3x uint64
109			eb := writeBuf(buf[:])
110			eb.uint16(zip64ExtraID)
111			eb.uint16(24) // size = 3x uint64
112			eb.uint64(h.UncompressedSize64)
113			eb.uint64(h.CompressedSize64)
114			eb.uint64(h.offset)
115			h.Extra = append(h.Extra, buf[:]...)
116		} else {
117			b.uint32(h.CompressedSize)
118			b.uint32(h.UncompressedSize)
119		}
120
121		b.uint16(uint16(len(h.Name)))
122		b.uint16(uint16(len(h.Extra)))
123		b.uint16(uint16(len(h.Comment)))
124		b = b[4:] // skip disk number start and internal file attr (2x uint16)
125		b.uint32(h.ExternalAttrs)
126		if h.offset > uint32max {
127			b.uint32(uint32max)
128		} else {
129			b.uint32(uint32(h.offset))
130		}
131		if _, err := w.cw.Write(buf[:]); err != nil {
132			return err
133		}
134		if _, err := io.WriteString(w.cw, h.Name); err != nil {
135			return err
136		}
137		if _, err := w.cw.Write(h.Extra); err != nil {
138			return err
139		}
140		if _, err := io.WriteString(w.cw, h.Comment); err != nil {
141			return err
142		}
143	}
144	end := w.cw.count
145
146	records := uint64(len(w.dir))
147	size := uint64(end - start)
148	offset := uint64(start)
149
150	if f := w.testHookCloseSizeOffset; f != nil {
151		f(size, offset)
152	}
153
154	if records >= uint16max || size >= uint32max || offset >= uint32max {
155		var buf [directory64EndLen + directory64LocLen]byte
156		b := writeBuf(buf[:])
157
158		// zip64 end of central directory record
159		b.uint32(directory64EndSignature)
160		b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64)
161		b.uint16(zipVersion45)           // version made by
162		b.uint16(zipVersion45)           // version needed to extract
163		b.uint32(0)                      // number of this disk
164		b.uint32(0)                      // number of the disk with the start of the central directory
165		b.uint64(records)                // total number of entries in the central directory on this disk
166		b.uint64(records)                // total number of entries in the central directory
167		b.uint64(size)                   // size of the central directory
168		b.uint64(offset)                 // offset of start of central directory with respect to the starting disk number
169
170		// zip64 end of central directory locator
171		b.uint32(directory64LocSignature)
172		b.uint32(0)           // number of the disk with the start of the zip64 end of central directory
173		b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record
174		b.uint32(1)           // total number of disks
175
176		if _, err := w.cw.Write(buf[:]); err != nil {
177			return err
178		}
179
180		// store max values in the regular end record to signal that
181		// that the zip64 values should be used instead
182		records = uint16max
183		size = uint32max
184		offset = uint32max
185	}
186
187	// write end record
188	var buf [directoryEndLen]byte
189	b := writeBuf(buf[:])
190	b.uint32(uint32(directoryEndSignature))
191	b = b[4:]                        // skip over disk number and first disk number (2x uint16)
192	b.uint16(uint16(records))        // number of entries this disk
193	b.uint16(uint16(records))        // number of entries total
194	b.uint32(uint32(size))           // size of directory
195	b.uint32(uint32(offset))         // start of directory
196	b.uint16(uint16(len(w.comment))) // byte size of EOCD comment
197	if _, err := w.cw.Write(buf[:]); err != nil {
198		return err
199	}
200	if _, err := io.WriteString(w.cw, w.comment); err != nil {
201		return err
202	}
203
204	return w.cw.w.(*bufio.Writer).Flush()
205}
206
207// Create adds a file to the zip file using the provided name.
208// It returns a Writer to which the file contents should be written.
209// The file contents will be compressed using the Deflate method.
210// The name must be a relative path: it must not start with a drive
211// letter (e.g. C:) or leading slash, and only forward slashes are
212// allowed.
213// The file's contents must be written to the io.Writer before the next
214// call to Create, CreateHeader, or Close.
215func (w *Writer) Create(name string) (io.Writer, error) {
216	header := &FileHeader{
217		Name:   name,
218		Method: Deflate,
219	}
220	return w.CreateHeader(header)
221}
222
223// detectUTF8 reports whether s is a valid UTF-8 string, and whether the string
224// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII,
225// or any other common encoding).
226func detectUTF8(s string) (valid, require bool) {
227	for i := 0; i < len(s); {
228		r, size := utf8.DecodeRuneInString(s[i:])
229		i += size
230		// Officially, ZIP uses CP-437, but many readers use the system's
231		// local character encoding. Most encoding are compatible with a large
232		// subset of CP-437, which itself is ASCII-like.
233		//
234		// Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those
235		// characters with localized currency and overline characters.
236		if r < 0x20 || r > 0x7d || r == 0x5c {
237			if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) {
238				return false, false
239			}
240			require = true
241		}
242	}
243	return true, require
244}
245
246// CreateHeader adds a file to the zip archive using the provided FileHeader
247// for the file metadata. Writer takes ownership of fh and may mutate
248// its fields. The caller must not modify fh after calling CreateHeader.
249//
250// This returns a Writer to which the file contents should be written.
251// The file's contents must be written to the io.Writer before the next
252// call to Create, CreateHeader, or Close.
253func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
254	if w.last != nil && !w.last.closed {
255		if err := w.last.close(); err != nil {
256			return nil, err
257		}
258	}
259	if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh {
260		// See https://golang.org/issue/11144 confusion.
261		return nil, errors.New("archive/zip: invalid duplicate FileHeader")
262	}
263
264	fh.Flags |= 0x8 // we will write a data descriptor
265
266	// The ZIP format has a sad state of affairs regarding character encoding.
267	// Officially, the name and comment fields are supposed to be encoded
268	// in CP-437 (which is mostly compatible with ASCII), unless the UTF-8
269	// flag bit is set. However, there are several problems:
270	//
271	//	* Many ZIP readers still do not support UTF-8.
272	//	* If the UTF-8 flag is cleared, several readers simply interpret the
273	//	name and comment fields as whatever the local system encoding is.
274	//
275	// In order to avoid breaking readers without UTF-8 support,
276	// we avoid setting the UTF-8 flag if the strings are CP-437 compatible.
277	// However, if the strings require multibyte UTF-8 encoding and is a
278	// valid UTF-8 string, then we set the UTF-8 bit.
279	//
280	// For the case, where the user explicitly wants to specify the encoding
281	// as UTF-8, they will need to set the flag bit themselves.
282	utf8Valid1, utf8Require1 := detectUTF8(fh.Name)
283	utf8Valid2, utf8Require2 := detectUTF8(fh.Comment)
284	switch {
285	case fh.NonUTF8:
286		fh.Flags &^= 0x800
287	case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2):
288		fh.Flags |= 0x800
289	}
290
291	fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte
292	fh.ReaderVersion = zipVersion20
293
294	// If Modified is set, this takes precedence over MS-DOS timestamp fields.
295	if !fh.Modified.IsZero() {
296		// Contrary to the FileHeader.SetModTime method, we intentionally
297		// do not convert to UTC, because we assume the user intends to encode
298		// the date using the specified timezone. A user may want this control
299		// because many legacy ZIP readers interpret the timestamp according
300		// to the local timezone.
301		//
302		// The timezone is only non-UTC if a user directly sets the Modified
303		// field directly themselves. All other approaches sets UTC.
304		fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified)
305
306		// Use "extended timestamp" format since this is what Info-ZIP uses.
307		// Nearly every major ZIP implementation uses a different format,
308		// but at least most seem to be able to understand the other formats.
309		//
310		// This format happens to be identical for both local and central header
311		// if modification time is the only timestamp being encoded.
312		var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32)
313		mt := uint32(fh.Modified.Unix())
314		eb := writeBuf(mbuf[:])
315		eb.uint16(extTimeExtraID)
316		eb.uint16(5)  // Size: SizeOf(uint8) + SizeOf(uint32)
317		eb.uint8(1)   // Flags: ModTime
318		eb.uint32(mt) // ModTime
319		fh.Extra = append(fh.Extra, mbuf[:]...)
320	}
321
322	fw := &fileWriter{
323		zipw:      w.cw,
324		compCount: &countWriter{w: w.cw},
325		crc32:     crc32.NewIEEE(),
326	}
327	comp := w.compressor(fh.Method)
328	if comp == nil {
329		return nil, ErrAlgorithm
330	}
331	var err error
332	fw.comp, err = comp(fw.compCount)
333	if err != nil {
334		return nil, err
335	}
336	fw.rawCount = &countWriter{w: fw.comp}
337
338	h := &header{
339		FileHeader: fh,
340		offset:     uint64(w.cw.count),
341	}
342	w.dir = append(w.dir, h)
343	fw.header = h
344
345	if err := writeHeader(w.cw, fh); err != nil {
346		return nil, err
347	}
348
349	w.last = fw
350	return fw, nil
351}
352
353func writeHeader(w io.Writer, h *FileHeader) error {
354	const maxUint16 = 1<<16 - 1
355	if len(h.Name) > maxUint16 {
356		return errLongName
357	}
358	if len(h.Extra) > maxUint16 {
359		return errLongExtra
360	}
361
362	var buf [fileHeaderLen]byte
363	b := writeBuf(buf[:])
364	b.uint32(uint32(fileHeaderSignature))
365	b.uint16(h.ReaderVersion)
366	b.uint16(h.Flags)
367	b.uint16(h.Method)
368	b.uint16(h.ModifiedTime)
369	b.uint16(h.ModifiedDate)
370	b.uint32(0) // since we are writing a data descriptor crc32,
371	b.uint32(0) // compressed size,
372	b.uint32(0) // and uncompressed size should be zero
373	b.uint16(uint16(len(h.Name)))
374	b.uint16(uint16(len(h.Extra)))
375	if _, err := w.Write(buf[:]); err != nil {
376		return err
377	}
378	if _, err := io.WriteString(w, h.Name); err != nil {
379		return err
380	}
381	_, err := w.Write(h.Extra)
382	return err
383}
384
385// RegisterCompressor registers or overrides a custom compressor for a specific
386// method ID. If a compressor for a given method is not found, Writer will
387// default to looking up the compressor at the package level.
388func (w *Writer) RegisterCompressor(method uint16, comp Compressor) {
389	if w.compressors == nil {
390		w.compressors = make(map[uint16]Compressor)
391	}
392	w.compressors[method] = comp
393}
394
395func (w *Writer) compressor(method uint16) Compressor {
396	comp := w.compressors[method]
397	if comp == nil {
398		comp = compressor(method)
399	}
400	return comp
401}
402
403type fileWriter struct {
404	*header
405	zipw      io.Writer
406	rawCount  *countWriter
407	comp      io.WriteCloser
408	compCount *countWriter
409	crc32     hash.Hash32
410	closed    bool
411}
412
413func (w *fileWriter) Write(p []byte) (int, error) {
414	if w.closed {
415		return 0, errors.New("zip: write to closed file")
416	}
417	w.crc32.Write(p)
418	return w.rawCount.Write(p)
419}
420
421func (w *fileWriter) close() error {
422	if w.closed {
423		return errors.New("zip: file closed twice")
424	}
425	w.closed = true
426	if err := w.comp.Close(); err != nil {
427		return err
428	}
429
430	// update FileHeader
431	fh := w.header.FileHeader
432	fh.CRC32 = w.crc32.Sum32()
433	fh.CompressedSize64 = uint64(w.compCount.count)
434	fh.UncompressedSize64 = uint64(w.rawCount.count)
435
436	if fh.isZip64() {
437		fh.CompressedSize = uint32max
438		fh.UncompressedSize = uint32max
439		fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions
440	} else {
441		fh.CompressedSize = uint32(fh.CompressedSize64)
442		fh.UncompressedSize = uint32(fh.UncompressedSize64)
443	}
444
445	// Write data descriptor. This is more complicated than one would
446	// think, see e.g. comments in zipfile.c:putextended() and
447	// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588.
448	// The approach here is to write 8 byte sizes if needed without
449	// adding a zip64 extra in the local header (too late anyway).
450	var buf []byte
451	if fh.isZip64() {
452		buf = make([]byte, dataDescriptor64Len)
453	} else {
454		buf = make([]byte, dataDescriptorLen)
455	}
456	b := writeBuf(buf)
457	b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X
458	b.uint32(fh.CRC32)
459	if fh.isZip64() {
460		b.uint64(fh.CompressedSize64)
461		b.uint64(fh.UncompressedSize64)
462	} else {
463		b.uint32(fh.CompressedSize)
464		b.uint32(fh.UncompressedSize)
465	}
466	_, err := w.zipw.Write(buf)
467	return err
468}
469
470type countWriter struct {
471	w     io.Writer
472	count int64
473}
474
475func (w *countWriter) Write(p []byte) (int, error) {
476	n, err := w.w.Write(p)
477	w.count += int64(n)
478	return n, err
479}
480
481type nopCloser struct {
482	io.Writer
483}
484
485func (w nopCloser) Close() error {
486	return nil
487}
488
489type writeBuf []byte
490
491func (b *writeBuf) uint8(v uint8) {
492	(*b)[0] = v
493	*b = (*b)[1:]
494}
495
496func (b *writeBuf) uint16(v uint16) {
497	binary.LittleEndian.PutUint16(*b, v)
498	*b = (*b)[2:]
499}
500
501func (b *writeBuf) uint32(v uint32) {
502	binary.LittleEndian.PutUint32(*b, v)
503	*b = (*b)[4:]
504}
505
506func (b *writeBuf) uint64(v uint64) {
507	binary.LittleEndian.PutUint64(*b, v)
508	*b = (*b)[8:]
509}
510