1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package tar
6
7import (
8	"bytes"
9	"fmt"
10	"strconv"
11	"strings"
12	"time"
13)
14
15// hasNUL reports whether the NUL character exists within s.
16func hasNUL(s string) bool {
17	return strings.IndexByte(s, 0) >= 0
18}
19
20// isASCII reports whether the input is an ASCII C-style string.
21func isASCII(s string) bool {
22	for _, c := range s {
23		if c >= 0x80 || c == 0x00 {
24			return false
25		}
26	}
27	return true
28}
29
30// toASCII converts the input to an ASCII C-style string.
31// This a best effort conversion, so invalid characters are dropped.
32func toASCII(s string) string {
33	if isASCII(s) {
34		return s
35	}
36	b := make([]byte, 0, len(s))
37	for _, c := range s {
38		if c < 0x80 && c != 0x00 {
39			b = append(b, byte(c))
40		}
41	}
42	return string(b)
43}
44
45type parser struct {
46	err error // Last error seen
47}
48
49type formatter struct {
50	err error // Last error seen
51}
52
53// parseString parses bytes as a NUL-terminated C-style string.
54// If a NUL byte is not found then the whole slice is returned as a string.
55func (*parser) parseString(b []byte) string {
56	if i := bytes.IndexByte(b, 0); i >= 0 {
57		return string(b[:i])
58	}
59	return string(b)
60}
61
62// formatString copies s into b, NUL-terminating if possible.
63func (f *formatter) formatString(b []byte, s string) {
64	if len(s) > len(b) {
65		f.err = ErrFieldTooLong
66	}
67	copy(b, s)
68	if len(s) < len(b) {
69		b[len(s)] = 0
70	}
71
72	// Some buggy readers treat regular files with a trailing slash
73	// in the V7 path field as a directory even though the full path
74	// recorded elsewhere (e.g., via PAX record) contains no trailing slash.
75	if len(s) > len(b) && b[len(b)-1] == '/' {
76		n := len(strings.TrimRight(s[:len(b)], "/"))
77		b[n] = 0 // Replace trailing slash with NUL terminator
78	}
79}
80
81// fitsInBase256 reports whether x can be encoded into n bytes using base-256
82// encoding. Unlike octal encoding, base-256 encoding does not require that the
83// string ends with a NUL character. Thus, all n bytes are available for output.
84//
85// If operating in binary mode, this assumes strict GNU binary mode; which means
86// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
87// equivalent to the sign bit in two's complement form.
88func fitsInBase256(n int, x int64) bool {
89	binBits := uint(n-1) * 8
90	return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
91}
92
93// parseNumeric parses the input as being encoded in either base-256 or octal.
94// This function may return negative numbers.
95// If parsing fails or an integer overflow occurs, err will be set.
96func (p *parser) parseNumeric(b []byte) int64 {
97	// Check for base-256 (binary) format first.
98	// If the first bit is set, then all following bits constitute a two's
99	// complement encoded number in big-endian byte order.
100	if len(b) > 0 && b[0]&0x80 != 0 {
101		// Handling negative numbers relies on the following identity:
102		//	-a-1 == ^a
103		//
104		// If the number is negative, we use an inversion mask to invert the
105		// data bytes and treat the value as an unsigned number.
106		var inv byte // 0x00 if positive or zero, 0xff if negative
107		if b[0]&0x40 != 0 {
108			inv = 0xff
109		}
110
111		var x uint64
112		for i, c := range b {
113			c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
114			if i == 0 {
115				c &= 0x7f // Ignore signal bit in first byte
116			}
117			if (x >> 56) > 0 {
118				p.err = ErrHeader // Integer overflow
119				return 0
120			}
121			x = x<<8 | uint64(c)
122		}
123		if (x >> 63) > 0 {
124			p.err = ErrHeader // Integer overflow
125			return 0
126		}
127		if inv == 0xff {
128			return ^int64(x)
129		}
130		return int64(x)
131	}
132
133	// Normal case is base-8 (octal) format.
134	return p.parseOctal(b)
135}
136
137// formatNumeric encodes x into b using base-8 (octal) encoding if possible.
138// Otherwise it will attempt to use base-256 (binary) encoding.
139func (f *formatter) formatNumeric(b []byte, x int64) {
140	if fitsInOctal(len(b), x) {
141		f.formatOctal(b, x)
142		return
143	}
144
145	if fitsInBase256(len(b), x) {
146		for i := len(b) - 1; i >= 0; i-- {
147			b[i] = byte(x)
148			x >>= 8
149		}
150		b[0] |= 0x80 // Highest bit indicates binary format
151		return
152	}
153
154	f.formatOctal(b, 0) // Last resort, just write zero
155	f.err = ErrFieldTooLong
156}
157
158func (p *parser) parseOctal(b []byte) int64 {
159	// Because unused fields are filled with NULs, we need
160	// to skip leading NULs. Fields may also be padded with
161	// spaces or NULs.
162	// So we remove leading and trailing NULs and spaces to
163	// be sure.
164	b = bytes.Trim(b, " \x00")
165
166	if len(b) == 0 {
167		return 0
168	}
169	x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
170	if perr != nil {
171		p.err = ErrHeader
172	}
173	return int64(x)
174}
175
176func (f *formatter) formatOctal(b []byte, x int64) {
177	if !fitsInOctal(len(b), x) {
178		x = 0 // Last resort, just write zero
179		f.err = ErrFieldTooLong
180	}
181
182	s := strconv.FormatInt(x, 8)
183	// Add leading zeros, but leave room for a NUL.
184	if n := len(b) - len(s) - 1; n > 0 {
185		s = strings.Repeat("0", n) + s
186	}
187	f.formatString(b, s)
188}
189
190// fitsInOctal reports whether the integer x fits in a field n-bytes long
191// using octal encoding with the appropriate NUL terminator.
192func fitsInOctal(n int, x int64) bool {
193	octBits := uint(n-1) * 3
194	return x >= 0 && (n >= 22 || x < 1<<octBits)
195}
196
197// parsePAXTime takes a string of the form %d.%d as described in the PAX
198// specification. Note that this implementation allows for negative timestamps,
199// which is allowed for by the PAX specification, but not always portable.
200func parsePAXTime(s string) (time.Time, error) {
201	const maxNanoSecondDigits = 9
202
203	// Split string into seconds and sub-seconds parts.
204	ss, sn := s, ""
205	if pos := strings.IndexByte(s, '.'); pos >= 0 {
206		ss, sn = s[:pos], s[pos+1:]
207	}
208
209	// Parse the seconds.
210	secs, err := strconv.ParseInt(ss, 10, 64)
211	if err != nil {
212		return time.Time{}, ErrHeader
213	}
214	if len(sn) == 0 {
215		return time.Unix(secs, 0), nil // No sub-second values
216	}
217
218	// Parse the nanoseconds.
219	if strings.Trim(sn, "0123456789") != "" {
220		return time.Time{}, ErrHeader
221	}
222	if len(sn) < maxNanoSecondDigits {
223		sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad
224	} else {
225		sn = sn[:maxNanoSecondDigits] // Right truncate
226	}
227	nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed
228	if len(ss) > 0 && ss[0] == '-' {
229		return time.Unix(secs, -1*nsecs), nil // Negative correction
230	}
231	return time.Unix(secs, nsecs), nil
232}
233
234// formatPAXTime converts ts into a time of the form %d.%d as described in the
235// PAX specification. This function is capable of negative timestamps.
236func formatPAXTime(ts time.Time) (s string) {
237	secs, nsecs := ts.Unix(), ts.Nanosecond()
238	if nsecs == 0 {
239		return strconv.FormatInt(secs, 10)
240	}
241
242	// If seconds is negative, then perform correction.
243	sign := ""
244	if secs < 0 {
245		sign = "-"             // Remember sign
246		secs = -(secs + 1)     // Add a second to secs
247		nsecs = -(nsecs - 1E9) // Take that second away from nsecs
248	}
249	return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0")
250}
251
252// parsePAXRecord parses the input PAX record string into a key-value pair.
253// If parsing is successful, it will slice off the currently read record and
254// return the remainder as r.
255func parsePAXRecord(s string) (k, v, r string, err error) {
256	// The size field ends at the first space.
257	sp := strings.IndexByte(s, ' ')
258	if sp == -1 {
259		return "", "", s, ErrHeader
260	}
261
262	// Parse the first token as a decimal integer.
263	n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
264	if perr != nil || n < 5 || int64(len(s)) < n {
265		return "", "", s, ErrHeader
266	}
267
268	// Extract everything between the space and the final newline.
269	rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
270	if nl != "\n" {
271		return "", "", s, ErrHeader
272	}
273
274	// The first equals separates the key from the value.
275	eq := strings.IndexByte(rec, '=')
276	if eq == -1 {
277		return "", "", s, ErrHeader
278	}
279	k, v = rec[:eq], rec[eq+1:]
280
281	if !validPAXRecord(k, v) {
282		return "", "", s, ErrHeader
283	}
284	return k, v, rem, nil
285}
286
287// formatPAXRecord formats a single PAX record, prefixing it with the
288// appropriate length.
289func formatPAXRecord(k, v string) (string, error) {
290	if !validPAXRecord(k, v) {
291		return "", ErrHeader
292	}
293
294	const padding = 3 // Extra padding for ' ', '=', and '\n'
295	size := len(k) + len(v) + padding
296	size += len(strconv.Itoa(size))
297	record := strconv.Itoa(size) + " " + k + "=" + v + "\n"
298
299	// Final adjustment if adding size field increased the record size.
300	if len(record) != size {
301		size = len(record)
302		record = strconv.Itoa(size) + " " + k + "=" + v + "\n"
303	}
304	return record, nil
305}
306
307// validPAXRecord reports whether the key-value pair is valid where each
308// record is formatted as:
309//	"%d %s=%s\n" % (size, key, value)
310//
311// Keys and values should be UTF-8, but the number of bad writers out there
312// forces us to be a more liberal.
313// Thus, we only reject all keys with NUL, and only reject NULs in values
314// for the PAX version of the USTAR string fields.
315// The key must not contain an '=' character.
316func validPAXRecord(k, v string) bool {
317	if k == "" || strings.IndexByte(k, '=') >= 0 {
318		return false
319	}
320	switch k {
321	case paxPath, paxLinkpath, paxUname, paxGname:
322		return !hasNUL(v)
323	default:
324		return !hasNUL(k)
325	}
326}
327