1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package tar
6
7// TODO(dsymonds):
8// - catch more errors (no first header, etc.)
9
10import (
11	"bytes"
12	"errors"
13	"fmt"
14	"io"
15	"os"
16	"path"
17	"strconv"
18	"strings"
19	"time"
20)
21
22var (
23	ErrWriteTooLong    = errors.New("archive/tar: write too long")
24	ErrFieldTooLong    = errors.New("archive/tar: header field too long")
25	ErrWriteAfterClose = errors.New("archive/tar: write after close")
26	errNameTooLong     = errors.New("archive/tar: name too long")
27)
28
29// A Writer provides sequential writing of a tar archive in POSIX.1 format.
30// A tar archive consists of a sequence of files.
31// Call WriteHeader to begin a new file, and then call Write to supply that file's data,
32// writing at most hdr.Size bytes in total.
33type Writer struct {
34	w          io.Writer
35	err        error
36	nb         int64 // number of unwritten bytes for current file entry
37	pad        int64 // amount of padding to write after current file entry
38	closed     bool
39	usedBinary bool // whether the binary numeric field extension was used
40}
41
42// NewWriter creates a new Writer writing to w.
43func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
44
45// Flush finishes writing the current file (optional).
46func (tw *Writer) Flush() error {
47	if tw.nb > 0 {
48		tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb)
49		return tw.err
50	}
51
52	n := tw.nb + tw.pad
53	for n > 0 && tw.err == nil {
54		nr := n
55		if nr > blockSize {
56			nr = blockSize
57		}
58		var nw int
59		nw, tw.err = tw.w.Write(zeroBlock[0:nr])
60		n -= int64(nw)
61	}
62	tw.nb = 0
63	tw.pad = 0
64	return tw.err
65}
66
67// Write s into b, terminating it with a NUL if there is room.
68func (tw *Writer) cString(b []byte, s string) {
69	if len(s) > len(b) {
70		if tw.err == nil {
71			tw.err = ErrFieldTooLong
72		}
73		return
74	}
75	copy(b, s)
76	if len(s) < len(b) {
77		b[len(s)] = 0
78	}
79}
80
81// Encode x as an octal ASCII string and write it into b with leading zeros.
82func (tw *Writer) octal(b []byte, x int64) {
83	s := strconv.FormatInt(x, 8)
84	// leading zeros, but leave room for a NUL.
85	for len(s)+1 < len(b) {
86		s = "0" + s
87	}
88	tw.cString(b, s)
89}
90
91// Write x into b, either as octal or as binary (GNUtar/star extension).
92func (tw *Writer) numeric(b []byte, x int64) {
93	// Try octal first.
94	s := strconv.FormatInt(x, 8)
95	if len(s) < len(b) {
96		tw.octal(b, x)
97		return
98	}
99	// Too big: use binary (big-endian).
100	tw.usedBinary = true
101	for i := len(b) - 1; x > 0 && i >= 0; i-- {
102		b[i] = byte(x)
103		x >>= 8
104	}
105	b[0] |= 0x80 // highest bit indicates binary format
106}
107
108var (
109	minTime = time.Unix(0, 0)
110	// There is room for 11 octal digits (33 bits) of mtime.
111	maxTime = minTime.Add((1<<33 - 1) * time.Second)
112)
113
114// WriteHeader writes hdr and prepares to accept the file's contents.
115// WriteHeader calls Flush if it is not the first header.
116// Calling after a Close will return ErrWriteAfterClose.
117func (tw *Writer) WriteHeader(hdr *Header) error {
118	if tw.closed {
119		return ErrWriteAfterClose
120	}
121	if tw.err == nil {
122		tw.Flush()
123	}
124	if tw.err != nil {
125		return tw.err
126	}
127	// Decide whether or not to use PAX extensions
128	// TODO(shanemhansen): we might want to use PAX headers for
129	// subsecond time resolution, but for now let's just capture
130	// the long name/long symlink use case.
131	suffix := hdr.Name
132	prefix := ""
133	if len(hdr.Name) > fileNameSize || len(hdr.Linkname) > fileNameSize {
134		var err error
135		prefix, suffix, err = tw.splitUSTARLongName(hdr.Name)
136		// Either we were unable to pack the long name into ustar format
137		// or the link name is too long; use PAX headers.
138		if err == errNameTooLong || len(hdr.Linkname) > fileNameSize {
139			if err := tw.writePAXHeader(hdr); err != nil {
140				return err
141			}
142		} else if err != nil {
143			return err
144		}
145	}
146	tw.nb = int64(hdr.Size)
147	tw.pad = -tw.nb & (blockSize - 1) // blockSize is a power of two
148
149	header := make([]byte, blockSize)
150	s := slicer(header)
151	tw.cString(s.next(fileNameSize), suffix)
152
153	// Handle out of range ModTime carefully.
154	var modTime int64
155	if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) {
156		modTime = hdr.ModTime.Unix()
157	}
158
159	tw.octal(s.next(8), hdr.Mode)          // 100:108
160	tw.numeric(s.next(8), int64(hdr.Uid))  // 108:116
161	tw.numeric(s.next(8), int64(hdr.Gid))  // 116:124
162	tw.numeric(s.next(12), hdr.Size)       // 124:136
163	tw.numeric(s.next(12), modTime)        // 136:148
164	s.next(8)                              // chksum (148:156)
165	s.next(1)[0] = hdr.Typeflag            // 156:157
166	tw.cString(s.next(100), hdr.Linkname)  // linkname (157:257)
167	copy(s.next(8), []byte("ustar\x0000")) // 257:265
168	tw.cString(s.next(32), hdr.Uname)      // 265:297
169	tw.cString(s.next(32), hdr.Gname)      // 297:329
170	tw.numeric(s.next(8), hdr.Devmajor)    // 329:337
171	tw.numeric(s.next(8), hdr.Devminor)    // 337:345
172	tw.cString(s.next(155), prefix)        // 345:500
173	// Use the GNU magic instead of POSIX magic if we used any GNU extensions.
174	if tw.usedBinary {
175		copy(header[257:265], []byte("ustar  \x00"))
176	}
177	// Use the ustar magic if we used ustar long names.
178	if len(prefix) > 0 {
179		copy(header[257:265], []byte("ustar\000"))
180	}
181
182	// The chksum field is terminated by a NUL and a space.
183	// This is different from the other octal fields.
184	chksum, _ := checksum(header)
185	tw.octal(header[148:155], chksum)
186	header[155] = ' '
187
188	if tw.err != nil {
189		// problem with header; probably integer too big for a field.
190		return tw.err
191	}
192
193	_, tw.err = tw.w.Write(header)
194
195	return tw.err
196}
197
198// writeUSTARLongName splits a USTAR long name hdr.Name.
199// name must be < 256 characters. errNameTooLong is returned
200// if hdr.Name can't be split. The splitting heuristic
201// is compatible with gnu tar.
202func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) {
203	length := len(name)
204	if length > fileNamePrefixSize+1 {
205		length = fileNamePrefixSize + 1
206	} else if name[length-1] == '/' {
207		length--
208	}
209	i := strings.LastIndex(name[:length], "/")
210	nlen := length - i - 1
211	if i <= 0 || nlen > fileNameSize || nlen == 0 {
212		err = errNameTooLong
213		return
214	}
215	prefix, suffix = name[:i], name[i+1:]
216	return
217}
218
219// writePaxHeader writes an extended pax header to the
220// archive.
221func (tw *Writer) writePAXHeader(hdr *Header) error {
222	// Prepare extended header
223	ext := new(Header)
224	ext.Typeflag = TypeXHeader
225	// Setting ModTime is required for reader parsing to
226	// succeed, and seems harmless enough.
227	ext.ModTime = hdr.ModTime
228	// The spec asks that we namespace our pseudo files
229	// with the current pid.
230	pid := os.Getpid()
231	dir, file := path.Split(hdr.Name)
232	ext.Name = path.Join(dir,
233		fmt.Sprintf("PaxHeaders.%d", pid), file)[0:100]
234	// Construct the body
235	var buf bytes.Buffer
236	if len(hdr.Name) > fileNameSize {
237		fmt.Fprint(&buf, paxHeader("path="+hdr.Name))
238	}
239	if len(hdr.Linkname) > fileNameSize {
240		fmt.Fprint(&buf, paxHeader("linkpath="+hdr.Linkname))
241	}
242	ext.Size = int64(len(buf.Bytes()))
243	if err := tw.WriteHeader(ext); err != nil {
244		return err
245	}
246	if _, err := tw.Write(buf.Bytes()); err != nil {
247		return err
248	}
249	if err := tw.Flush(); err != nil {
250		return err
251	}
252	return nil
253}
254
255// paxHeader formats a single pax record, prefixing it with the appropriate length
256func paxHeader(msg string) string {
257	const padding = 2 // Extra padding for space and newline
258	size := len(msg) + padding
259	size += len(strconv.Itoa(size))
260	record := fmt.Sprintf("%d %s\n", size, msg)
261	if len(record) != size {
262		// Final adjustment if adding size increased
263		// the number of digits in size
264		size = len(record)
265		record = fmt.Sprintf("%d %s\n", size, msg)
266	}
267	return record
268}
269
270// Write writes to the current entry in the tar archive.
271// Write returns the error ErrWriteTooLong if more than
272// hdr.Size bytes are written after WriteHeader.
273func (tw *Writer) Write(b []byte) (n int, err error) {
274	if tw.closed {
275		err = ErrWriteTooLong
276		return
277	}
278	overwrite := false
279	if int64(len(b)) > tw.nb {
280		b = b[0:tw.nb]
281		overwrite = true
282	}
283	n, err = tw.w.Write(b)
284	tw.nb -= int64(n)
285	if err == nil && overwrite {
286		err = ErrWriteTooLong
287		return
288	}
289	tw.err = err
290	return
291}
292
293// Close closes the tar archive, flushing any unwritten
294// data to the underlying writer.
295func (tw *Writer) Close() error {
296	if tw.err != nil || tw.closed {
297		return tw.err
298	}
299	tw.Flush()
300	tw.closed = true
301	if tw.err != nil {
302		return tw.err
303	}
304
305	// trailer: two zero blocks
306	for i := 0; i < 2; i++ {
307		_, tw.err = tw.w.Write(zeroBlock)
308		if tw.err != nil {
309			break
310		}
311	}
312	return tw.err
313}
314