1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package tar
6
7import (
8	"bytes"
9	"fmt"
10	"io"
11	"path"
12	"sort"
13	"strings"
14	"time"
15)
16
17// Writer provides sequential writing of a tar archive.
18// Write.WriteHeader begins a new file with the provided Header,
19// and then Writer can be treated as an io.Writer to supply that file's data.
20type Writer struct {
21	w    io.Writer
22	pad  int64      // Amount of padding to write after current file entry
23	curr fileWriter // Writer for current file entry
24	hdr  Header     // Shallow copy of Header that is safe for mutations
25	blk  block      // Buffer to use as temporary local storage
26
27	// err is a persistent error.
28	// It is only the responsibility of every exported method of Writer to
29	// ensure that this error is sticky.
30	err error
31}
32
33// NewWriter creates a new Writer writing to w.
34func NewWriter(w io.Writer) *Writer {
35	return &Writer{w: w, curr: &regFileWriter{w, 0}}
36}
37
38type fileWriter interface {
39	io.Writer
40	fileState
41
42	ReadFrom(io.Reader) (int64, error)
43}
44
45// Flush finishes writing the current file's block padding.
46// The current file must be fully written before Flush can be called.
47//
48// This is unnecessary as the next call to WriteHeader or Close
49// will implicitly flush out the file's padding.
50func (tw *Writer) Flush() error {
51	if tw.err != nil {
52		return tw.err
53	}
54	if nb := tw.curr.LogicalRemaining(); nb > 0 {
55		return fmt.Errorf("archive/tar: missed writing %d bytes", nb)
56	}
57	if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil {
58		return tw.err
59	}
60	tw.pad = 0
61	return nil
62}
63
64// WriteHeader writes hdr and prepares to accept the file's contents.
65// The Header.Size determines how many bytes can be written for the next file.
66// If the current file is not fully written, then this returns an error.
67// This implicitly flushes any padding necessary before writing the header.
68func (tw *Writer) WriteHeader(hdr *Header) error {
69	if err := tw.Flush(); err != nil {
70		return err
71	}
72	tw.hdr = *hdr // Shallow copy of Header
73
74	// Round ModTime and ignore AccessTime and ChangeTime unless
75	// the format is explicitly chosen.
76	// This ensures nominal usage of WriteHeader (without specifying the format)
77	// does not always result in the PAX format being chosen, which
78	// causes a 1KiB increase to every header.
79	if tw.hdr.Format == FormatUnknown {
80		tw.hdr.ModTime = tw.hdr.ModTime.Round(time.Second)
81		tw.hdr.AccessTime = time.Time{}
82		tw.hdr.ChangeTime = time.Time{}
83	}
84
85	allowedFormats, paxHdrs, err := tw.hdr.allowedFormats()
86	switch {
87	case allowedFormats.has(FormatUSTAR):
88		tw.err = tw.writeUSTARHeader(&tw.hdr)
89		return tw.err
90	case allowedFormats.has(FormatPAX):
91		tw.err = tw.writePAXHeader(&tw.hdr, paxHdrs)
92		return tw.err
93	case allowedFormats.has(FormatGNU):
94		tw.err = tw.writeGNUHeader(&tw.hdr)
95		return tw.err
96	default:
97		return err // Non-fatal error
98	}
99}
100
101func (tw *Writer) writeUSTARHeader(hdr *Header) error {
102	// Check if we can use USTAR prefix/suffix splitting.
103	var namePrefix string
104	if prefix, suffix, ok := splitUSTARPath(hdr.Name); ok {
105		namePrefix, hdr.Name = prefix, suffix
106	}
107
108	// Pack the main header.
109	var f formatter
110	blk := tw.templateV7Plus(hdr, f.formatString, f.formatOctal)
111	f.formatString(blk.USTAR().Prefix(), namePrefix)
112	blk.SetFormat(FormatUSTAR)
113	if f.err != nil {
114		return f.err // Should never happen since header is validated
115	}
116	return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag)
117}
118
119func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error {
120	realName, realSize := hdr.Name, hdr.Size
121
122	// TODO(dsnet): Re-enable this when adding sparse support.
123	// See https://golang.org/issue/22735
124	/*
125		// Handle sparse files.
126		var spd sparseDatas
127		var spb []byte
128		if len(hdr.SparseHoles) > 0 {
129			sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map
130			sph = alignSparseEntries(sph, hdr.Size)
131			spd = invertSparseEntries(sph, hdr.Size)
132
133			// Format the sparse map.
134			hdr.Size = 0 // Replace with encoded size
135			spb = append(strconv.AppendInt(spb, int64(len(spd)), 10), '\n')
136			for _, s := range spd {
137				hdr.Size += s.Length
138				spb = append(strconv.AppendInt(spb, s.Offset, 10), '\n')
139				spb = append(strconv.AppendInt(spb, s.Length, 10), '\n')
140			}
141			pad := blockPadding(int64(len(spb)))
142			spb = append(spb, zeroBlock[:pad]...)
143			hdr.Size += int64(len(spb)) // Accounts for encoded sparse map
144
145			// Add and modify appropriate PAX records.
146			dir, file := path.Split(realName)
147			hdr.Name = path.Join(dir, "GNUSparseFile.0", file)
148			paxHdrs[paxGNUSparseMajor] = "1"
149			paxHdrs[paxGNUSparseMinor] = "0"
150			paxHdrs[paxGNUSparseName] = realName
151			paxHdrs[paxGNUSparseRealSize] = strconv.FormatInt(realSize, 10)
152			paxHdrs[paxSize] = strconv.FormatInt(hdr.Size, 10)
153			delete(paxHdrs, paxPath) // Recorded by paxGNUSparseName
154		}
155	*/
156	_ = realSize
157
158	// Write PAX records to the output.
159	isGlobal := hdr.Typeflag == TypeXGlobalHeader
160	if len(paxHdrs) > 0 || isGlobal {
161		// Sort keys for deterministic ordering.
162		var keys []string
163		for k := range paxHdrs {
164			keys = append(keys, k)
165		}
166		sort.Strings(keys)
167
168		// Write each record to a buffer.
169		var buf bytes.Buffer
170		for _, k := range keys {
171			rec, err := formatPAXRecord(k, paxHdrs[k])
172			if err != nil {
173				return err
174			}
175			buf.WriteString(rec)
176		}
177
178		// Write the extended header file.
179		var name string
180		var flag byte
181		if isGlobal {
182			name = realName
183			if name == "" {
184				name = "GlobalHead.0.0"
185			}
186			flag = TypeXGlobalHeader
187		} else {
188			dir, file := path.Split(realName)
189			name = path.Join(dir, "PaxHeaders.0", file)
190			flag = TypeXHeader
191		}
192		data := buf.String()
193		if err := tw.writeRawFile(name, data, flag, FormatPAX); err != nil || isGlobal {
194			return err // Global headers return here
195		}
196	}
197
198	// Pack the main header.
199	var f formatter // Ignore errors since they are expected
200	fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) }
201	blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal)
202	blk.SetFormat(FormatPAX)
203	if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil {
204		return err
205	}
206
207	// TODO(dsnet): Re-enable this when adding sparse support.
208	// See https://golang.org/issue/22735
209	/*
210		// Write the sparse map and setup the sparse writer if necessary.
211		if len(spd) > 0 {
212			// Use tw.curr since the sparse map is accounted for in hdr.Size.
213			if _, err := tw.curr.Write(spb); err != nil {
214				return err
215			}
216			tw.curr = &sparseFileWriter{tw.curr, spd, 0}
217		}
218	*/
219	return nil
220}
221
222func (tw *Writer) writeGNUHeader(hdr *Header) error {
223	// Use long-link files if Name or Linkname exceeds the field size.
224	const longName = "././@LongLink"
225	if len(hdr.Name) > nameSize {
226		data := hdr.Name + "\x00"
227		if err := tw.writeRawFile(longName, data, TypeGNULongName, FormatGNU); err != nil {
228			return err
229		}
230	}
231	if len(hdr.Linkname) > nameSize {
232		data := hdr.Linkname + "\x00"
233		if err := tw.writeRawFile(longName, data, TypeGNULongLink, FormatGNU); err != nil {
234			return err
235		}
236	}
237
238	// Pack the main header.
239	var f formatter // Ignore errors since they are expected
240	var spd sparseDatas
241	var spb []byte
242	blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric)
243	if !hdr.AccessTime.IsZero() {
244		f.formatNumeric(blk.GNU().AccessTime(), hdr.AccessTime.Unix())
245	}
246	if !hdr.ChangeTime.IsZero() {
247		f.formatNumeric(blk.GNU().ChangeTime(), hdr.ChangeTime.Unix())
248	}
249	// TODO(dsnet): Re-enable this when adding sparse support.
250	// See https://golang.org/issue/22735
251	/*
252		if hdr.Typeflag == TypeGNUSparse {
253			sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map
254			sph = alignSparseEntries(sph, hdr.Size)
255			spd = invertSparseEntries(sph, hdr.Size)
256
257			// Format the sparse map.
258			formatSPD := func(sp sparseDatas, sa sparseArray) sparseDatas {
259				for i := 0; len(sp) > 0 && i < sa.MaxEntries(); i++ {
260					f.formatNumeric(sa.Entry(i).Offset(), sp[0].Offset)
261					f.formatNumeric(sa.Entry(i).Length(), sp[0].Length)
262					sp = sp[1:]
263				}
264				if len(sp) > 0 {
265					sa.IsExtended()[0] = 1
266				}
267				return sp
268			}
269			sp2 := formatSPD(spd, blk.GNU().Sparse())
270			for len(sp2) > 0 {
271				var spHdr block
272				sp2 = formatSPD(sp2, spHdr.Sparse())
273				spb = append(spb, spHdr[:]...)
274			}
275
276			// Update size fields in the header block.
277			realSize := hdr.Size
278			hdr.Size = 0 // Encoded size; does not account for encoded sparse map
279			for _, s := range spd {
280				hdr.Size += s.Length
281			}
282			copy(blk.V7().Size(), zeroBlock[:]) // Reset field
283			f.formatNumeric(blk.V7().Size(), hdr.Size)
284			f.formatNumeric(blk.GNU().RealSize(), realSize)
285		}
286	*/
287	blk.SetFormat(FormatGNU)
288	if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil {
289		return err
290	}
291
292	// Write the extended sparse map and setup the sparse writer if necessary.
293	if len(spd) > 0 {
294		// Use tw.w since the sparse map is not accounted for in hdr.Size.
295		if _, err := tw.w.Write(spb); err != nil {
296			return err
297		}
298		tw.curr = &sparseFileWriter{tw.curr, spd, 0}
299	}
300	return nil
301}
302
303type (
304	stringFormatter func([]byte, string)
305	numberFormatter func([]byte, int64)
306)
307
308// templateV7Plus fills out the V7 fields of a block using values from hdr.
309// It also fills out fields (uname, gname, devmajor, devminor) that are
310// shared in the USTAR, PAX, and GNU formats using the provided formatters.
311//
312// The block returned is only valid until the next call to
313// templateV7Plus or writeRawFile.
314func (tw *Writer) templateV7Plus(hdr *Header, fmtStr stringFormatter, fmtNum numberFormatter) *block {
315	tw.blk.Reset()
316
317	modTime := hdr.ModTime
318	if modTime.IsZero() {
319		modTime = time.Unix(0, 0)
320	}
321
322	v7 := tw.blk.V7()
323	v7.TypeFlag()[0] = hdr.Typeflag
324	fmtStr(v7.Name(), hdr.Name)
325	fmtStr(v7.LinkName(), hdr.Linkname)
326	fmtNum(v7.Mode(), hdr.Mode)
327	fmtNum(v7.UID(), int64(hdr.Uid))
328	fmtNum(v7.GID(), int64(hdr.Gid))
329	fmtNum(v7.Size(), hdr.Size)
330	fmtNum(v7.ModTime(), modTime.Unix())
331
332	ustar := tw.blk.USTAR()
333	fmtStr(ustar.UserName(), hdr.Uname)
334	fmtStr(ustar.GroupName(), hdr.Gname)
335	fmtNum(ustar.DevMajor(), hdr.Devmajor)
336	fmtNum(ustar.DevMinor(), hdr.Devminor)
337
338	return &tw.blk
339}
340
341// writeRawFile writes a minimal file with the given name and flag type.
342// It uses format to encode the header format and will write data as the body.
343// It uses default values for all of the other fields (as BSD and GNU tar does).
344func (tw *Writer) writeRawFile(name, data string, flag byte, format Format) error {
345	tw.blk.Reset()
346
347	// Best effort for the filename.
348	name = toASCII(name)
349	if len(name) > nameSize {
350		name = name[:nameSize]
351	}
352	name = strings.TrimRight(name, "/")
353
354	var f formatter
355	v7 := tw.blk.V7()
356	v7.TypeFlag()[0] = flag
357	f.formatString(v7.Name(), name)
358	f.formatOctal(v7.Mode(), 0)
359	f.formatOctal(v7.UID(), 0)
360	f.formatOctal(v7.GID(), 0)
361	f.formatOctal(v7.Size(), int64(len(data))) // Must be < 8GiB
362	f.formatOctal(v7.ModTime(), 0)
363	tw.blk.SetFormat(format)
364	if f.err != nil {
365		return f.err // Only occurs if size condition is violated
366	}
367
368	// Write the header and data.
369	if err := tw.writeRawHeader(&tw.blk, int64(len(data)), flag); err != nil {
370		return err
371	}
372	_, err := io.WriteString(tw, data)
373	return err
374}
375
376// writeRawHeader writes the value of blk, regardless of its value.
377// It sets up the Writer such that it can accept a file of the given size.
378// If the flag is a special header-only flag, then the size is treated as zero.
379func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error {
380	if err := tw.Flush(); err != nil {
381		return err
382	}
383	if _, err := tw.w.Write(blk[:]); err != nil {
384		return err
385	}
386	if isHeaderOnlyType(flag) {
387		size = 0
388	}
389	tw.curr = &regFileWriter{tw.w, size}
390	tw.pad = blockPadding(size)
391	return nil
392}
393
394// splitUSTARPath splits a path according to USTAR prefix and suffix rules.
395// If the path is not splittable, then it will return ("", "", false).
396func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
397	length := len(name)
398	if length <= nameSize || !isASCII(name) {
399		return "", "", false
400	} else if length > prefixSize+1 {
401		length = prefixSize + 1
402	} else if name[length-1] == '/' {
403		length--
404	}
405
406	i := strings.LastIndex(name[:length], "/")
407	nlen := len(name) - i - 1 // nlen is length of suffix
408	plen := i                 // plen is length of prefix
409	if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize {
410		return "", "", false
411	}
412	return name[:i], name[i+1:], true
413}
414
415// Write writes to the current file in the tar archive.
416// Write returns the error ErrWriteTooLong if more than
417// Header.Size bytes are written after WriteHeader.
418//
419// Calling Write on special types like TypeLink, TypeSymlink, TypeChar,
420// TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless
421// of what the Header.Size claims.
422func (tw *Writer) Write(b []byte) (int, error) {
423	if tw.err != nil {
424		return 0, tw.err
425	}
426	n, err := tw.curr.Write(b)
427	if err != nil && err != ErrWriteTooLong {
428		tw.err = err
429	}
430	return n, err
431}
432
433// readFrom populates the content of the current file by reading from r.
434// The bytes read must match the number of remaining bytes in the current file.
435//
436// If the current file is sparse and r is an io.ReadSeeker,
437// then readFrom uses Seek to skip past holes defined in Header.SparseHoles,
438// assuming that skipped regions are all NULs.
439// This always reads the last byte to ensure r is the right size.
440//
441// TODO(dsnet): Re-export this when adding sparse file support.
442// See https://golang.org/issue/22735
443func (tw *Writer) readFrom(r io.Reader) (int64, error) {
444	if tw.err != nil {
445		return 0, tw.err
446	}
447	n, err := tw.curr.ReadFrom(r)
448	if err != nil && err != ErrWriteTooLong {
449		tw.err = err
450	}
451	return n, err
452}
453
454// Close closes the tar archive by flushing the padding, and writing the footer.
455// If the current file (from a prior call to WriteHeader) is not fully written,
456// then this returns an error.
457func (tw *Writer) Close() error {
458	if tw.err == ErrWriteAfterClose {
459		return nil
460	}
461	if tw.err != nil {
462		return tw.err
463	}
464
465	// Trailer: two zero blocks.
466	err := tw.Flush()
467	for i := 0; i < 2 && err == nil; i++ {
468		_, err = tw.w.Write(zeroBlock[:])
469	}
470
471	// Ensure all future actions are invalid.
472	tw.err = ErrWriteAfterClose
473	return err // Report IO errors
474}
475
476// regFileWriter is a fileWriter for writing data to a regular file entry.
477type regFileWriter struct {
478	w  io.Writer // Underlying Writer
479	nb int64     // Number of remaining bytes to write
480}
481
482func (fw *regFileWriter) Write(b []byte) (n int, err error) {
483	overwrite := int64(len(b)) > fw.nb
484	if overwrite {
485		b = b[:fw.nb]
486	}
487	if len(b) > 0 {
488		n, err = fw.w.Write(b)
489		fw.nb -= int64(n)
490	}
491	switch {
492	case err != nil:
493		return n, err
494	case overwrite:
495		return n, ErrWriteTooLong
496	default:
497		return n, nil
498	}
499}
500
501func (fw *regFileWriter) ReadFrom(r io.Reader) (int64, error) {
502	return io.Copy(struct{ io.Writer }{fw}, r)
503}
504
505func (fw regFileWriter) LogicalRemaining() int64 {
506	return fw.nb
507}
508func (fw regFileWriter) PhysicalRemaining() int64 {
509	return fw.nb
510}
511
512// sparseFileWriter is a fileWriter for writing data to a sparse file entry.
513type sparseFileWriter struct {
514	fw  fileWriter  // Underlying fileWriter
515	sp  sparseDatas // Normalized list of data fragments
516	pos int64       // Current position in sparse file
517}
518
519func (sw *sparseFileWriter) Write(b []byte) (n int, err error) {
520	overwrite := int64(len(b)) > sw.LogicalRemaining()
521	if overwrite {
522		b = b[:sw.LogicalRemaining()]
523	}
524
525	b0 := b
526	endPos := sw.pos + int64(len(b))
527	for endPos > sw.pos && err == nil {
528		var nf int // Bytes written in fragment
529		dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset()
530		if sw.pos < dataStart { // In a hole fragment
531			bf := b[:min(int64(len(b)), dataStart-sw.pos)]
532			nf, err = zeroWriter{}.Write(bf)
533		} else { // In a data fragment
534			bf := b[:min(int64(len(b)), dataEnd-sw.pos)]
535			nf, err = sw.fw.Write(bf)
536		}
537		b = b[nf:]
538		sw.pos += int64(nf)
539		if sw.pos >= dataEnd && len(sw.sp) > 1 {
540			sw.sp = sw.sp[1:] // Ensure last fragment always remains
541		}
542	}
543
544	n = len(b0) - len(b)
545	switch {
546	case err == ErrWriteTooLong:
547		return n, errMissData // Not possible; implies bug in validation logic
548	case err != nil:
549		return n, err
550	case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0:
551		return n, errUnrefData // Not possible; implies bug in validation logic
552	case overwrite:
553		return n, ErrWriteTooLong
554	default:
555		return n, nil
556	}
557}
558
559func (sw *sparseFileWriter) ReadFrom(r io.Reader) (n int64, err error) {
560	rs, ok := r.(io.ReadSeeker)
561	if ok {
562		if _, err := rs.Seek(0, io.SeekCurrent); err != nil {
563			ok = false // Not all io.Seeker can really seek
564		}
565	}
566	if !ok {
567		return io.Copy(struct{ io.Writer }{sw}, r)
568	}
569
570	var readLastByte bool
571	pos0 := sw.pos
572	for sw.LogicalRemaining() > 0 && !readLastByte && err == nil {
573		var nf int64 // Size of fragment
574		dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset()
575		if sw.pos < dataStart { // In a hole fragment
576			nf = dataStart - sw.pos
577			if sw.PhysicalRemaining() == 0 {
578				readLastByte = true
579				nf--
580			}
581			_, err = rs.Seek(nf, io.SeekCurrent)
582		} else { // In a data fragment
583			nf = dataEnd - sw.pos
584			nf, err = io.CopyN(sw.fw, rs, nf)
585		}
586		sw.pos += nf
587		if sw.pos >= dataEnd && len(sw.sp) > 1 {
588			sw.sp = sw.sp[1:] // Ensure last fragment always remains
589		}
590	}
591
592	// If the last fragment is a hole, then seek to 1-byte before EOF, and
593	// read a single byte to ensure the file is the right size.
594	if readLastByte && err == nil {
595		_, err = mustReadFull(rs, []byte{0})
596		sw.pos++
597	}
598
599	n = sw.pos - pos0
600	switch {
601	case err == io.EOF:
602		return n, io.ErrUnexpectedEOF
603	case err == ErrWriteTooLong:
604		return n, errMissData // Not possible; implies bug in validation logic
605	case err != nil:
606		return n, err
607	case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0:
608		return n, errUnrefData // Not possible; implies bug in validation logic
609	default:
610		return n, ensureEOF(rs)
611	}
612}
613
614func (sw sparseFileWriter) LogicalRemaining() int64 {
615	return sw.sp[len(sw.sp)-1].endOffset() - sw.pos
616}
617func (sw sparseFileWriter) PhysicalRemaining() int64 {
618	return sw.fw.PhysicalRemaining()
619}
620
621// zeroWriter may only be written with NULs, otherwise it returns errWriteHole.
622type zeroWriter struct{}
623
624func (zeroWriter) Write(b []byte) (int, error) {
625	for i, c := range b {
626		if c != 0 {
627			return i, errWriteHole
628		}
629	}
630	return len(b), nil
631}
632
633// ensureEOF checks whether r is at EOF, reporting ErrWriteTooLong if not so.
634func ensureEOF(r io.Reader) error {
635	n, err := tryReadFull(r, []byte{0})
636	switch {
637	case n > 0:
638		return ErrWriteTooLong
639	case err == io.EOF:
640		return nil
641	default:
642		return err
643	}
644}
645