1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package tar
6
7// Constants to identify various tar formats.
8const (
9	// The format is unknown.
10	formatUnknown = (1 << iota) / 2 // Sequence of 0, 1, 2, 4, 8, etc...
11
12	// The format of the original Unix V7 tar tool prior to standardization.
13	formatV7
14
15	// The old and new GNU formats, which are incompatible with USTAR.
16	// This does cover the old GNU sparse extension.
17	// This does not cover the GNU sparse extensions using PAX headers,
18	// versions 0.0, 0.1, and 1.0; these fall under the PAX format.
19	formatGNU
20
21	// Schily's tar format, which is incompatible with USTAR.
22	// This does not cover STAR extensions to the PAX format; these fall under
23	// the PAX format.
24	formatSTAR
25
26	// USTAR is the former standardization of tar defined in POSIX.1-1988.
27	// This is incompatible with the GNU and STAR formats.
28	formatUSTAR
29
30	// PAX is the latest standardization of tar defined in POSIX.1-2001.
31	// This is an extension of USTAR and is "backwards compatible" with it.
32	//
33	// Some newer formats add their own extensions to PAX, such as GNU sparse
34	// files and SCHILY extended attributes. Since they are backwards compatible
35	// with PAX, they will be labelled as "PAX".
36	formatPAX
37)
38
39// Magics used to identify various formats.
40const (
41	magicGNU, versionGNU     = "ustar ", " \x00"
42	magicUSTAR, versionUSTAR = "ustar\x00", "00"
43	trailerSTAR              = "tar\x00"
44)
45
46// Size constants from various tar specifications.
47const (
48	blockSize  = 512 // Size of each block in a tar stream
49	nameSize   = 100 // Max length of the name field in USTAR format
50	prefixSize = 155 // Max length of the prefix field in USTAR format
51)
52
53var zeroBlock block
54
55type block [blockSize]byte
56
57// Convert block to any number of formats.
58func (b *block) V7() *headerV7       { return (*headerV7)(b) }
59func (b *block) GNU() *headerGNU     { return (*headerGNU)(b) }
60func (b *block) STAR() *headerSTAR   { return (*headerSTAR)(b) }
61func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) }
62func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) }
63
64// GetFormat checks that the block is a valid tar header based on the checksum.
65// It then attempts to guess the specific format based on magic values.
66// If the checksum fails, then formatUnknown is returned.
67func (b *block) GetFormat() (format int) {
68	// Verify checksum.
69	var p parser
70	value := p.parseOctal(b.V7().Chksum())
71	chksum1, chksum2 := b.ComputeChecksum()
72	if p.err != nil || (value != chksum1 && value != chksum2) {
73		return formatUnknown
74	}
75
76	// Guess the magic values.
77	magic := string(b.USTAR().Magic())
78	version := string(b.USTAR().Version())
79	trailer := string(b.STAR().Trailer())
80	switch {
81	case magic == magicUSTAR && trailer == trailerSTAR:
82		return formatSTAR
83	case magic == magicUSTAR:
84		return formatUSTAR
85	case magic == magicGNU && version == versionGNU:
86		return formatGNU
87	default:
88		return formatV7
89	}
90}
91
92// SetFormat writes the magic values necessary for specified format
93// and then updates the checksum accordingly.
94func (b *block) SetFormat(format int) {
95	// Set the magic values.
96	switch format {
97	case formatV7:
98		// Do nothing.
99	case formatGNU:
100		copy(b.GNU().Magic(), magicGNU)
101		copy(b.GNU().Version(), versionGNU)
102	case formatSTAR:
103		copy(b.STAR().Magic(), magicUSTAR)
104		copy(b.STAR().Version(), versionUSTAR)
105		copy(b.STAR().Trailer(), trailerSTAR)
106	case formatUSTAR, formatPAX:
107		copy(b.USTAR().Magic(), magicUSTAR)
108		copy(b.USTAR().Version(), versionUSTAR)
109	default:
110		panic("invalid format")
111	}
112
113	// Update checksum.
114	// This field is special in that it is terminated by a NULL then space.
115	var f formatter
116	field := b.V7().Chksum()
117	chksum, _ := b.ComputeChecksum() // Possible values are 256..128776
118	f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143
119	field[7] = ' '
120}
121
122// ComputeChecksum computes the checksum for the header block.
123// POSIX specifies a sum of the unsigned byte values, but the Sun tar used
124// signed byte values.
125// We compute and return both.
126func (b *block) ComputeChecksum() (unsigned, signed int64) {
127	for i, c := range b {
128		if 148 <= i && i < 156 {
129			c = ' ' // Treat the checksum field itself as all spaces.
130		}
131		unsigned += int64(uint8(c))
132		signed += int64(int8(c))
133	}
134	return unsigned, signed
135}
136
137type headerV7 [blockSize]byte
138
139func (h *headerV7) Name() []byte     { return h[000:][:100] }
140func (h *headerV7) Mode() []byte     { return h[100:][:8] }
141func (h *headerV7) UID() []byte      { return h[108:][:8] }
142func (h *headerV7) GID() []byte      { return h[116:][:8] }
143func (h *headerV7) Size() []byte     { return h[124:][:12] }
144func (h *headerV7) ModTime() []byte  { return h[136:][:12] }
145func (h *headerV7) Chksum() []byte   { return h[148:][:8] }
146func (h *headerV7) TypeFlag() []byte { return h[156:][:1] }
147func (h *headerV7) LinkName() []byte { return h[157:][:100] }
148
149type headerGNU [blockSize]byte
150
151func (h *headerGNU) V7() *headerV7       { return (*headerV7)(h) }
152func (h *headerGNU) Magic() []byte       { return h[257:][:6] }
153func (h *headerGNU) Version() []byte     { return h[263:][:2] }
154func (h *headerGNU) UserName() []byte    { return h[265:][:32] }
155func (h *headerGNU) GroupName() []byte   { return h[297:][:32] }
156func (h *headerGNU) DevMajor() []byte    { return h[329:][:8] }
157func (h *headerGNU) DevMinor() []byte    { return h[337:][:8] }
158func (h *headerGNU) AccessTime() []byte  { return h[345:][:12] }
159func (h *headerGNU) ChangeTime() []byte  { return h[357:][:12] }
160func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) }
161func (h *headerGNU) RealSize() []byte    { return h[483:][:12] }
162
163type headerSTAR [blockSize]byte
164
165func (h *headerSTAR) V7() *headerV7      { return (*headerV7)(h) }
166func (h *headerSTAR) Magic() []byte      { return h[257:][:6] }
167func (h *headerSTAR) Version() []byte    { return h[263:][:2] }
168func (h *headerSTAR) UserName() []byte   { return h[265:][:32] }
169func (h *headerSTAR) GroupName() []byte  { return h[297:][:32] }
170func (h *headerSTAR) DevMajor() []byte   { return h[329:][:8] }
171func (h *headerSTAR) DevMinor() []byte   { return h[337:][:8] }
172func (h *headerSTAR) Prefix() []byte     { return h[345:][:131] }
173func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] }
174func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] }
175func (h *headerSTAR) Trailer() []byte    { return h[508:][:4] }
176
177type headerUSTAR [blockSize]byte
178
179func (h *headerUSTAR) V7() *headerV7     { return (*headerV7)(h) }
180func (h *headerUSTAR) Magic() []byte     { return h[257:][:6] }
181func (h *headerUSTAR) Version() []byte   { return h[263:][:2] }
182func (h *headerUSTAR) UserName() []byte  { return h[265:][:32] }
183func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] }
184func (h *headerUSTAR) DevMajor() []byte  { return h[329:][:8] }
185func (h *headerUSTAR) DevMinor() []byte  { return h[337:][:8] }
186func (h *headerUSTAR) Prefix() []byte    { return h[345:][:155] }
187
188type sparseArray []byte
189
190func (s sparseArray) Entry(i int) sparseNode { return (sparseNode)(s[i*24:]) }
191func (s sparseArray) IsExtended() []byte     { return s[24*s.MaxEntries():][:1] }
192func (s sparseArray) MaxEntries() int        { return len(s) / 24 }
193
194type sparseNode []byte
195
196func (s sparseNode) Offset() []byte   { return s[00:][:12] }
197func (s sparseNode) NumBytes() []byte { return s[12:][:12] }
198