1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package tar
6
7import (
8	"fmt"
9	"io"
10	"path"
11	"sort"
12	"strings"
13	"time"
14)
15
16// Writer provides sequential writing of a tar archive.
17// Write.WriteHeader begins a new file with the provided Header,
18// and then Writer can be treated as an io.Writer to supply that file's data.
19type Writer struct {
20	w    io.Writer
21	pad  int64      // Amount of padding to write after current file entry
22	curr fileWriter // Writer for current file entry
23	hdr  Header     // Shallow copy of Header that is safe for mutations
24	blk  block      // Buffer to use as temporary local storage
25
26	// err is a persistent error.
27	// It is only the responsibility of every exported method of Writer to
28	// ensure that this error is sticky.
29	err error
30}
31
32// NewWriter creates a new Writer writing to w.
33func NewWriter(w io.Writer) *Writer {
34	return &Writer{w: w, curr: &regFileWriter{w, 0}}
35}
36
37type fileWriter interface {
38	io.Writer
39	fileState
40
41	ReadFrom(io.Reader) (int64, error)
42}
43
44// Flush finishes writing the current file's block padding.
45// The current file must be fully written before Flush can be called.
46//
47// This is unnecessary as the next call to WriteHeader or Close
48// will implicitly flush out the file's padding.
49func (tw *Writer) Flush() error {
50	if tw.err != nil {
51		return tw.err
52	}
53	if nb := tw.curr.logicalRemaining(); nb > 0 {
54		return fmt.Errorf("archive/tar: missed writing %d bytes", nb)
55	}
56	if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil {
57		return tw.err
58	}
59	tw.pad = 0
60	return nil
61}
62
63// WriteHeader writes hdr and prepares to accept the file's contents.
64// The Header.Size determines how many bytes can be written for the next file.
65// If the current file is not fully written, then this returns an error.
66// This implicitly flushes any padding necessary before writing the header.
67func (tw *Writer) WriteHeader(hdr *Header) error {
68	if err := tw.Flush(); err != nil {
69		return err
70	}
71	tw.hdr = *hdr // Shallow copy of Header
72
73	// Avoid usage of the legacy TypeRegA flag, and automatically promote
74	// it to use TypeReg or TypeDir.
75	if tw.hdr.Typeflag == TypeRegA {
76		if strings.HasSuffix(tw.hdr.Name, "/") {
77			tw.hdr.Typeflag = TypeDir
78		} else {
79			tw.hdr.Typeflag = TypeReg
80		}
81	}
82
83	// Round ModTime and ignore AccessTime and ChangeTime unless
84	// the format is explicitly chosen.
85	// This ensures nominal usage of WriteHeader (without specifying the format)
86	// does not always result in the PAX format being chosen, which
87	// causes a 1KiB increase to every header.
88	if tw.hdr.Format == FormatUnknown {
89		tw.hdr.ModTime = tw.hdr.ModTime.Round(time.Second)
90		tw.hdr.AccessTime = time.Time{}
91		tw.hdr.ChangeTime = time.Time{}
92	}
93
94	allowedFormats, paxHdrs, err := tw.hdr.allowedFormats()
95	switch {
96	case allowedFormats.has(FormatUSTAR):
97		tw.err = tw.writeUSTARHeader(&tw.hdr)
98		return tw.err
99	case allowedFormats.has(FormatPAX):
100		tw.err = tw.writePAXHeader(&tw.hdr, paxHdrs)
101		return tw.err
102	case allowedFormats.has(FormatGNU):
103		tw.err = tw.writeGNUHeader(&tw.hdr)
104		return tw.err
105	default:
106		return err // Non-fatal error
107	}
108}
109
110func (tw *Writer) writeUSTARHeader(hdr *Header) error {
111	// Check if we can use USTAR prefix/suffix splitting.
112	var namePrefix string
113	if prefix, suffix, ok := splitUSTARPath(hdr.Name); ok {
114		namePrefix, hdr.Name = prefix, suffix
115	}
116
117	// Pack the main header.
118	var f formatter
119	blk := tw.templateV7Plus(hdr, f.formatString, f.formatOctal)
120	f.formatString(blk.toUSTAR().prefix(), namePrefix)
121	blk.setFormat(FormatUSTAR)
122	if f.err != nil {
123		return f.err // Should never happen since header is validated
124	}
125	return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag)
126}
127
128func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error {
129	realName, realSize := hdr.Name, hdr.Size
130
131	// TODO(dsnet): Re-enable this when adding sparse support.
132	// See https://golang.org/issue/22735
133	/*
134		// Handle sparse files.
135		var spd sparseDatas
136		var spb []byte
137		if len(hdr.SparseHoles) > 0 {
138			sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map
139			sph = alignSparseEntries(sph, hdr.Size)
140			spd = invertSparseEntries(sph, hdr.Size)
141
142			// Format the sparse map.
143			hdr.Size = 0 // Replace with encoded size
144			spb = append(strconv.AppendInt(spb, int64(len(spd)), 10), '\n')
145			for _, s := range spd {
146				hdr.Size += s.Length
147				spb = append(strconv.AppendInt(spb, s.Offset, 10), '\n')
148				spb = append(strconv.AppendInt(spb, s.Length, 10), '\n')
149			}
150			pad := blockPadding(int64(len(spb)))
151			spb = append(spb, zeroBlock[:pad]...)
152			hdr.Size += int64(len(spb)) // Accounts for encoded sparse map
153
154			// Add and modify appropriate PAX records.
155			dir, file := path.Split(realName)
156			hdr.Name = path.Join(dir, "GNUSparseFile.0", file)
157			paxHdrs[paxGNUSparseMajor] = "1"
158			paxHdrs[paxGNUSparseMinor] = "0"
159			paxHdrs[paxGNUSparseName] = realName
160			paxHdrs[paxGNUSparseRealSize] = strconv.FormatInt(realSize, 10)
161			paxHdrs[paxSize] = strconv.FormatInt(hdr.Size, 10)
162			delete(paxHdrs, paxPath) // Recorded by paxGNUSparseName
163		}
164	*/
165	_ = realSize
166
167	// Write PAX records to the output.
168	isGlobal := hdr.Typeflag == TypeXGlobalHeader
169	if len(paxHdrs) > 0 || isGlobal {
170		// Sort keys for deterministic ordering.
171		var keys []string
172		for k := range paxHdrs {
173			keys = append(keys, k)
174		}
175		sort.Strings(keys)
176
177		// Write each record to a buffer.
178		var buf strings.Builder
179		for _, k := range keys {
180			rec, err := formatPAXRecord(k, paxHdrs[k])
181			if err != nil {
182				return err
183			}
184			buf.WriteString(rec)
185		}
186
187		// Write the extended header file.
188		var name string
189		var flag byte
190		if isGlobal {
191			name = realName
192			if name == "" {
193				name = "GlobalHead.0.0"
194			}
195			flag = TypeXGlobalHeader
196		} else {
197			dir, file := path.Split(realName)
198			name = path.Join(dir, "PaxHeaders.0", file)
199			flag = TypeXHeader
200		}
201		data := buf.String()
202		if err := tw.writeRawFile(name, data, flag, FormatPAX); err != nil || isGlobal {
203			return err // Global headers return here
204		}
205	}
206
207	// Pack the main header.
208	var f formatter // Ignore errors since they are expected
209	fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) }
210	blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal)
211	blk.setFormat(FormatPAX)
212	if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil {
213		return err
214	}
215
216	// TODO(dsnet): Re-enable this when adding sparse support.
217	// See https://golang.org/issue/22735
218	/*
219		// Write the sparse map and setup the sparse writer if necessary.
220		if len(spd) > 0 {
221			// Use tw.curr since the sparse map is accounted for in hdr.Size.
222			if _, err := tw.curr.Write(spb); err != nil {
223				return err
224			}
225			tw.curr = &sparseFileWriter{tw.curr, spd, 0}
226		}
227	*/
228	return nil
229}
230
231func (tw *Writer) writeGNUHeader(hdr *Header) error {
232	// Use long-link files if Name or Linkname exceeds the field size.
233	const longName = "././@LongLink"
234	if len(hdr.Name) > nameSize {
235		data := hdr.Name + "\x00"
236		if err := tw.writeRawFile(longName, data, TypeGNULongName, FormatGNU); err != nil {
237			return err
238		}
239	}
240	if len(hdr.Linkname) > nameSize {
241		data := hdr.Linkname + "\x00"
242		if err := tw.writeRawFile(longName, data, TypeGNULongLink, FormatGNU); err != nil {
243			return err
244		}
245	}
246
247	// Pack the main header.
248	var f formatter // Ignore errors since they are expected
249	var spd sparseDatas
250	var spb []byte
251	blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric)
252	if !hdr.AccessTime.IsZero() {
253		f.formatNumeric(blk.toGNU().accessTime(), hdr.AccessTime.Unix())
254	}
255	if !hdr.ChangeTime.IsZero() {
256		f.formatNumeric(blk.toGNU().changeTime(), hdr.ChangeTime.Unix())
257	}
258	// TODO(dsnet): Re-enable this when adding sparse support.
259	// See https://golang.org/issue/22735
260	/*
261		if hdr.Typeflag == TypeGNUSparse {
262			sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map
263			sph = alignSparseEntries(sph, hdr.Size)
264			spd = invertSparseEntries(sph, hdr.Size)
265
266			// Format the sparse map.
267			formatSPD := func(sp sparseDatas, sa sparseArray) sparseDatas {
268				for i := 0; len(sp) > 0 && i < sa.MaxEntries(); i++ {
269					f.formatNumeric(sa.Entry(i).Offset(), sp[0].Offset)
270					f.formatNumeric(sa.Entry(i).Length(), sp[0].Length)
271					sp = sp[1:]
272				}
273				if len(sp) > 0 {
274					sa.IsExtended()[0] = 1
275				}
276				return sp
277			}
278			sp2 := formatSPD(spd, blk.GNU().Sparse())
279			for len(sp2) > 0 {
280				var spHdr block
281				sp2 = formatSPD(sp2, spHdr.Sparse())
282				spb = append(spb, spHdr[:]...)
283			}
284
285			// Update size fields in the header block.
286			realSize := hdr.Size
287			hdr.Size = 0 // Encoded size; does not account for encoded sparse map
288			for _, s := range spd {
289				hdr.Size += s.Length
290			}
291			copy(blk.V7().Size(), zeroBlock[:]) // Reset field
292			f.formatNumeric(blk.V7().Size(), hdr.Size)
293			f.formatNumeric(blk.GNU().RealSize(), realSize)
294		}
295	*/
296	blk.setFormat(FormatGNU)
297	if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil {
298		return err
299	}
300
301	// Write the extended sparse map and setup the sparse writer if necessary.
302	if len(spd) > 0 {
303		// Use tw.w since the sparse map is not accounted for in hdr.Size.
304		if _, err := tw.w.Write(spb); err != nil {
305			return err
306		}
307		tw.curr = &sparseFileWriter{tw.curr, spd, 0}
308	}
309	return nil
310}
311
312type (
313	stringFormatter func([]byte, string)
314	numberFormatter func([]byte, int64)
315)
316
317// templateV7Plus fills out the V7 fields of a block using values from hdr.
318// It also fills out fields (uname, gname, devmajor, devminor) that are
319// shared in the USTAR, PAX, and GNU formats using the provided formatters.
320//
321// The block returned is only valid until the next call to
322// templateV7Plus or writeRawFile.
323func (tw *Writer) templateV7Plus(hdr *Header, fmtStr stringFormatter, fmtNum numberFormatter) *block {
324	tw.blk.reset()
325
326	modTime := hdr.ModTime
327	if modTime.IsZero() {
328		modTime = time.Unix(0, 0)
329	}
330
331	v7 := tw.blk.toV7()
332	v7.typeFlag()[0] = hdr.Typeflag
333	fmtStr(v7.name(), hdr.Name)
334	fmtStr(v7.linkName(), hdr.Linkname)
335	fmtNum(v7.mode(), hdr.Mode)
336	fmtNum(v7.uid(), int64(hdr.Uid))
337	fmtNum(v7.gid(), int64(hdr.Gid))
338	fmtNum(v7.size(), hdr.Size)
339	fmtNum(v7.modTime(), modTime.Unix())
340
341	ustar := tw.blk.toUSTAR()
342	fmtStr(ustar.userName(), hdr.Uname)
343	fmtStr(ustar.groupName(), hdr.Gname)
344	fmtNum(ustar.devMajor(), hdr.Devmajor)
345	fmtNum(ustar.devMinor(), hdr.Devminor)
346
347	return &tw.blk
348}
349
350// writeRawFile writes a minimal file with the given name and flag type.
351// It uses format to encode the header format and will write data as the body.
352// It uses default values for all of the other fields (as BSD and GNU tar does).
353func (tw *Writer) writeRawFile(name, data string, flag byte, format Format) error {
354	tw.blk.reset()
355
356	// Best effort for the filename.
357	name = toASCII(name)
358	if len(name) > nameSize {
359		name = name[:nameSize]
360	}
361	name = strings.TrimRight(name, "/")
362
363	var f formatter
364	v7 := tw.blk.toV7()
365	v7.typeFlag()[0] = flag
366	f.formatString(v7.name(), name)
367	f.formatOctal(v7.mode(), 0)
368	f.formatOctal(v7.uid(), 0)
369	f.formatOctal(v7.gid(), 0)
370	f.formatOctal(v7.size(), int64(len(data))) // Must be < 8GiB
371	f.formatOctal(v7.modTime(), 0)
372	tw.blk.setFormat(format)
373	if f.err != nil {
374		return f.err // Only occurs if size condition is violated
375	}
376
377	// Write the header and data.
378	if err := tw.writeRawHeader(&tw.blk, int64(len(data)), flag); err != nil {
379		return err
380	}
381	_, err := io.WriteString(tw, data)
382	return err
383}
384
385// writeRawHeader writes the value of blk, regardless of its value.
386// It sets up the Writer such that it can accept a file of the given size.
387// If the flag is a special header-only flag, then the size is treated as zero.
388func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error {
389	if err := tw.Flush(); err != nil {
390		return err
391	}
392	if _, err := tw.w.Write(blk[:]); err != nil {
393		return err
394	}
395	if isHeaderOnlyType(flag) {
396		size = 0
397	}
398	tw.curr = &regFileWriter{tw.w, size}
399	tw.pad = blockPadding(size)
400	return nil
401}
402
403// splitUSTARPath splits a path according to USTAR prefix and suffix rules.
404// If the path is not splittable, then it will return ("", "", false).
405func splitUSTARPath(name string) (prefix, suffix string, ok bool) {
406	length := len(name)
407	if length <= nameSize || !isASCII(name) {
408		return "", "", false
409	} else if length > prefixSize+1 {
410		length = prefixSize + 1
411	} else if name[length-1] == '/' {
412		length--
413	}
414
415	i := strings.LastIndex(name[:length], "/")
416	nlen := len(name) - i - 1 // nlen is length of suffix
417	plen := i                 // plen is length of prefix
418	if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize {
419		return "", "", false
420	}
421	return name[:i], name[i+1:], true
422}
423
424// Write writes to the current file in the tar archive.
425// Write returns the error ErrWriteTooLong if more than
426// Header.Size bytes are written after WriteHeader.
427//
428// Calling Write on special types like TypeLink, TypeSymlink, TypeChar,
429// TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless
430// of what the Header.Size claims.
431func (tw *Writer) Write(b []byte) (int, error) {
432	if tw.err != nil {
433		return 0, tw.err
434	}
435	n, err := tw.curr.Write(b)
436	if err != nil && err != ErrWriteTooLong {
437		tw.err = err
438	}
439	return n, err
440}
441
442// readFrom populates the content of the current file by reading from r.
443// The bytes read must match the number of remaining bytes in the current file.
444//
445// If the current file is sparse and r is an io.ReadSeeker,
446// then readFrom uses Seek to skip past holes defined in Header.SparseHoles,
447// assuming that skipped regions are all NULs.
448// This always reads the last byte to ensure r is the right size.
449//
450// TODO(dsnet): Re-export this when adding sparse file support.
451// See https://golang.org/issue/22735
452func (tw *Writer) readFrom(r io.Reader) (int64, error) {
453	if tw.err != nil {
454		return 0, tw.err
455	}
456	n, err := tw.curr.ReadFrom(r)
457	if err != nil && err != ErrWriteTooLong {
458		tw.err = err
459	}
460	return n, err
461}
462
463// Close closes the tar archive by flushing the padding, and writing the footer.
464// If the current file (from a prior call to WriteHeader) is not fully written,
465// then this returns an error.
466func (tw *Writer) Close() error {
467	if tw.err == ErrWriteAfterClose {
468		return nil
469	}
470	if tw.err != nil {
471		return tw.err
472	}
473
474	// Trailer: two zero blocks.
475	err := tw.Flush()
476	for i := 0; i < 2 && err == nil; i++ {
477		_, err = tw.w.Write(zeroBlock[:])
478	}
479
480	// Ensure all future actions are invalid.
481	tw.err = ErrWriteAfterClose
482	return err // Report IO errors
483}
484
485// regFileWriter is a fileWriter for writing data to a regular file entry.
486type regFileWriter struct {
487	w  io.Writer // Underlying Writer
488	nb int64     // Number of remaining bytes to write
489}
490
491func (fw *regFileWriter) Write(b []byte) (n int, err error) {
492	overwrite := int64(len(b)) > fw.nb
493	if overwrite {
494		b = b[:fw.nb]
495	}
496	if len(b) > 0 {
497		n, err = fw.w.Write(b)
498		fw.nb -= int64(n)
499	}
500	switch {
501	case err != nil:
502		return n, err
503	case overwrite:
504		return n, ErrWriteTooLong
505	default:
506		return n, nil
507	}
508}
509
510func (fw *regFileWriter) ReadFrom(r io.Reader) (int64, error) {
511	return io.Copy(struct{ io.Writer }{fw}, r)
512}
513
514// logicalRemaining implements fileState.logicalRemaining.
515func (fw regFileWriter) logicalRemaining() int64 {
516	return fw.nb
517}
518
519// logicalRemaining implements fileState.physicalRemaining.
520func (fw regFileWriter) physicalRemaining() int64 {
521	return fw.nb
522}
523
524// sparseFileWriter is a fileWriter for writing data to a sparse file entry.
525type sparseFileWriter struct {
526	fw  fileWriter  // Underlying fileWriter
527	sp  sparseDatas // Normalized list of data fragments
528	pos int64       // Current position in sparse file
529}
530
531func (sw *sparseFileWriter) Write(b []byte) (n int, err error) {
532	overwrite := int64(len(b)) > sw.logicalRemaining()
533	if overwrite {
534		b = b[:sw.logicalRemaining()]
535	}
536
537	b0 := b
538	endPos := sw.pos + int64(len(b))
539	for endPos > sw.pos && err == nil {
540		var nf int // Bytes written in fragment
541		dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset()
542		if sw.pos < dataStart { // In a hole fragment
543			bf := b[:min(int64(len(b)), dataStart-sw.pos)]
544			nf, err = zeroWriter{}.Write(bf)
545		} else { // In a data fragment
546			bf := b[:min(int64(len(b)), dataEnd-sw.pos)]
547			nf, err = sw.fw.Write(bf)
548		}
549		b = b[nf:]
550		sw.pos += int64(nf)
551		if sw.pos >= dataEnd && len(sw.sp) > 1 {
552			sw.sp = sw.sp[1:] // Ensure last fragment always remains
553		}
554	}
555
556	n = len(b0) - len(b)
557	switch {
558	case err == ErrWriteTooLong:
559		return n, errMissData // Not possible; implies bug in validation logic
560	case err != nil:
561		return n, err
562	case sw.logicalRemaining() == 0 && sw.physicalRemaining() > 0:
563		return n, errUnrefData // Not possible; implies bug in validation logic
564	case overwrite:
565		return n, ErrWriteTooLong
566	default:
567		return n, nil
568	}
569}
570
571func (sw *sparseFileWriter) ReadFrom(r io.Reader) (n int64, err error) {
572	rs, ok := r.(io.ReadSeeker)
573	if ok {
574		if _, err := rs.Seek(0, io.SeekCurrent); err != nil {
575			ok = false // Not all io.Seeker can really seek
576		}
577	}
578	if !ok {
579		return io.Copy(struct{ io.Writer }{sw}, r)
580	}
581
582	var readLastByte bool
583	pos0 := sw.pos
584	for sw.logicalRemaining() > 0 && !readLastByte && err == nil {
585		var nf int64 // Size of fragment
586		dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset()
587		if sw.pos < dataStart { // In a hole fragment
588			nf = dataStart - sw.pos
589			if sw.physicalRemaining() == 0 {
590				readLastByte = true
591				nf--
592			}
593			_, err = rs.Seek(nf, io.SeekCurrent)
594		} else { // In a data fragment
595			nf = dataEnd - sw.pos
596			nf, err = io.CopyN(sw.fw, rs, nf)
597		}
598		sw.pos += nf
599		if sw.pos >= dataEnd && len(sw.sp) > 1 {
600			sw.sp = sw.sp[1:] // Ensure last fragment always remains
601		}
602	}
603
604	// If the last fragment is a hole, then seek to 1-byte before EOF, and
605	// read a single byte to ensure the file is the right size.
606	if readLastByte && err == nil {
607		_, err = mustReadFull(rs, []byte{0})
608		sw.pos++
609	}
610
611	n = sw.pos - pos0
612	switch {
613	case err == io.EOF:
614		return n, io.ErrUnexpectedEOF
615	case err == ErrWriteTooLong:
616		return n, errMissData // Not possible; implies bug in validation logic
617	case err != nil:
618		return n, err
619	case sw.logicalRemaining() == 0 && sw.physicalRemaining() > 0:
620		return n, errUnrefData // Not possible; implies bug in validation logic
621	default:
622		return n, ensureEOF(rs)
623	}
624}
625
626func (sw sparseFileWriter) logicalRemaining() int64 {
627	return sw.sp[len(sw.sp)-1].endOffset() - sw.pos
628}
629func (sw sparseFileWriter) physicalRemaining() int64 {
630	return sw.fw.physicalRemaining()
631}
632
633// zeroWriter may only be written with NULs, otherwise it returns errWriteHole.
634type zeroWriter struct{}
635
636func (zeroWriter) Write(b []byte) (int, error) {
637	for i, c := range b {
638		if c != 0 {
639			return i, errWriteHole
640		}
641	}
642	return len(b), nil
643}
644
645// ensureEOF checks whether r is at EOF, reporting ErrWriteTooLong if not so.
646func ensureEOF(r io.Reader) error {
647	n, err := tryReadFull(r, []byte{0})
648	switch {
649	case n > 0:
650		return ErrWriteTooLong
651	case err == io.EOF:
652		return nil
653	default:
654		return err
655	}
656}
657