1// Copyright 2016 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package tar 6 7import "strings" 8 9// Format represents the tar archive format. 10// 11// The original tar format was introduced in Unix V7. 12// Since then, there have been multiple competing formats attempting to 13// standardize or extend the V7 format to overcome its limitations. 14// The most common formats are the USTAR, PAX, and GNU formats, 15// each with their own advantages and limitations. 16// 17// The following table captures the capabilities of each format: 18// 19// | USTAR | PAX | GNU 20// ------------------+--------+-----------+---------- 21// Name | 256B | unlimited | unlimited 22// Linkname | 100B | unlimited | unlimited 23// Size | uint33 | unlimited | uint89 24// Mode | uint21 | uint21 | uint57 25// Uid/Gid | uint21 | unlimited | uint57 26// Uname/Gname | 32B | unlimited | 32B 27// ModTime | uint33 | unlimited | int89 28// AccessTime | n/a | unlimited | int89 29// ChangeTime | n/a | unlimited | int89 30// Devmajor/Devminor | uint21 | uint21 | uint57 31// ------------------+--------+-----------+---------- 32// string encoding | ASCII | UTF-8 | binary 33// sub-second times | no | yes | no 34// sparse files | no | yes | yes 35// 36// The table's upper portion shows the Header fields, where each format reports 37// the maximum number of bytes allowed for each string field and 38// the integer type used to store each numeric field 39// (where timestamps are stored as the number of seconds since the Unix epoch). 40// 41// The table's lower portion shows specialized features of each format, 42// such as supported string encodings, support for sub-second timestamps, 43// or support for sparse files. 44type Format int 45 46// Constants to identify various tar formats. 47const ( 48 // Deliberately hide the meaning of constants from public API. 49 _ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc... 50 51 // FormatUnknown indicates that the format is unknown. 52 FormatUnknown 53 54 // The format of the original Unix V7 tar tool prior to standardization. 55 formatV7 56 57 // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988. 58 // 59 // While this format is compatible with most tar readers, 60 // the format has several limitations making it unsuitable for some usages. 61 // Most notably, it cannot support sparse files, files larger than 8GiB, 62 // filenames larger than 256 characters, and non-ASCII filenames. 63 // 64 // Reference: 65 // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 66 FormatUSTAR 67 68 // FormatPAX represents the PAX header format defined in POSIX.1-2001. 69 // 70 // PAX extends USTAR by writing a special file with Typeflag TypeXHeader 71 // preceding the original header. This file contains a set of key-value 72 // records, which are used to overcome USTAR's shortcomings, in addition to 73 // providing the ability to have sub-second resolution for timestamps. 74 // 75 // Some newer formats add their own extensions to PAX by defining their 76 // own keys and assigning certain semantic meaning to the associated values. 77 // For example, sparse file support in PAX is implemented using keys 78 // defined by the GNU manual (e.g., "GNU.sparse.map"). 79 // 80 // Reference: 81 // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html 82 FormatPAX 83 84 // FormatGNU represents the GNU header format. 85 // 86 // The GNU header format is older than the USTAR and PAX standards and 87 // is not compatible with them. The GNU format supports 88 // arbitrary file sizes, filenames of arbitrary encoding and length, 89 // sparse files, and other features. 90 // 91 // It is recommended that PAX be chosen over GNU unless the target 92 // application can only parse GNU formatted archives. 93 // 94 // Reference: 95 // http://www.gnu.org/software/tar/manual/html_node/Standard.html 96 FormatGNU 97 98 // Schily's tar format, which is incompatible with USTAR. 99 // This does not cover STAR extensions to the PAX format; these fall under 100 // the PAX format. 101 formatSTAR 102 103 formatMax 104) 105 106func (f Format) has(f2 Format) bool { return f&f2 != 0 } 107func (f *Format) mayBe(f2 Format) { *f |= f2 } 108func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 } 109func (f *Format) mustNotBe(f2 Format) { *f &^= f2 } 110 111var formatNames = map[Format]string{ 112 formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR", 113} 114 115func (f Format) String() string { 116 var ss []string 117 for f2 := Format(1); f2 < formatMax; f2 <<= 1 { 118 if f.has(f2) { 119 ss = append(ss, formatNames[f2]) 120 } 121 } 122 switch len(ss) { 123 case 0: 124 return "<unknown>" 125 case 1: 126 return ss[0] 127 default: 128 return "(" + strings.Join(ss, " | ") + ")" 129 } 130} 131 132// Magics used to identify various formats. 133const ( 134 magicGNU, versionGNU = "ustar ", " \x00" 135 magicUSTAR, versionUSTAR = "ustar\x00", "00" 136 trailerSTAR = "tar\x00" 137) 138 139// Size constants from various tar specifications. 140const ( 141 blockSize = 512 // Size of each block in a tar stream 142 nameSize = 100 // Max length of the name field in USTAR format 143 prefixSize = 155 // Max length of the prefix field in USTAR format 144) 145 146// blockPadding computes the number of bytes needed to pad offset up to the 147// nearest block edge where 0 <= n < blockSize. 148func blockPadding(offset int64) (n int64) { 149 return -offset & (blockSize - 1) 150} 151 152var zeroBlock block 153 154type block [blockSize]byte 155 156// Convert block to any number of formats. 157func (b *block) V7() *headerV7 { return (*headerV7)(b) } 158func (b *block) GNU() *headerGNU { return (*headerGNU)(b) } 159func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) } 160func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) } 161func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) } 162 163// GetFormat checks that the block is a valid tar header based on the checksum. 164// It then attempts to guess the specific format based on magic values. 165// If the checksum fails, then FormatUnknown is returned. 166func (b *block) GetFormat() Format { 167 // Verify checksum. 168 var p parser 169 value := p.parseOctal(b.V7().Chksum()) 170 chksum1, chksum2 := b.ComputeChecksum() 171 if p.err != nil || (value != chksum1 && value != chksum2) { 172 return FormatUnknown 173 } 174 175 // Guess the magic values. 176 magic := string(b.USTAR().Magic()) 177 version := string(b.USTAR().Version()) 178 trailer := string(b.STAR().Trailer()) 179 switch { 180 case magic == magicUSTAR && trailer == trailerSTAR: 181 return formatSTAR 182 case magic == magicUSTAR: 183 return FormatUSTAR | FormatPAX 184 case magic == magicGNU && version == versionGNU: 185 return FormatGNU 186 default: 187 return formatV7 188 } 189} 190 191// SetFormat writes the magic values necessary for specified format 192// and then updates the checksum accordingly. 193func (b *block) SetFormat(format Format) { 194 // Set the magic values. 195 switch { 196 case format.has(formatV7): 197 // Do nothing. 198 case format.has(FormatGNU): 199 copy(b.GNU().Magic(), magicGNU) 200 copy(b.GNU().Version(), versionGNU) 201 case format.has(formatSTAR): 202 copy(b.STAR().Magic(), magicUSTAR) 203 copy(b.STAR().Version(), versionUSTAR) 204 copy(b.STAR().Trailer(), trailerSTAR) 205 case format.has(FormatUSTAR | FormatPAX): 206 copy(b.USTAR().Magic(), magicUSTAR) 207 copy(b.USTAR().Version(), versionUSTAR) 208 default: 209 panic("invalid format") 210 } 211 212 // Update checksum. 213 // This field is special in that it is terminated by a NULL then space. 214 var f formatter 215 field := b.V7().Chksum() 216 chksum, _ := b.ComputeChecksum() // Possible values are 256..128776 217 f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143 218 field[7] = ' ' 219} 220 221// ComputeChecksum computes the checksum for the header block. 222// POSIX specifies a sum of the unsigned byte values, but the Sun tar used 223// signed byte values. 224// We compute and return both. 225func (b *block) ComputeChecksum() (unsigned, signed int64) { 226 for i, c := range b { 227 if 148 <= i && i < 156 { 228 c = ' ' // Treat the checksum field itself as all spaces. 229 } 230 unsigned += int64(uint8(c)) 231 signed += int64(int8(c)) 232 } 233 return unsigned, signed 234} 235 236// Reset clears the block with all zeros. 237func (b *block) Reset() { 238 *b = block{} 239} 240 241type headerV7 [blockSize]byte 242 243func (h *headerV7) Name() []byte { return h[000:][:100] } 244func (h *headerV7) Mode() []byte { return h[100:][:8] } 245func (h *headerV7) UID() []byte { return h[108:][:8] } 246func (h *headerV7) GID() []byte { return h[116:][:8] } 247func (h *headerV7) Size() []byte { return h[124:][:12] } 248func (h *headerV7) ModTime() []byte { return h[136:][:12] } 249func (h *headerV7) Chksum() []byte { return h[148:][:8] } 250func (h *headerV7) TypeFlag() []byte { return h[156:][:1] } 251func (h *headerV7) LinkName() []byte { return h[157:][:100] } 252 253type headerGNU [blockSize]byte 254 255func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) } 256func (h *headerGNU) Magic() []byte { return h[257:][:6] } 257func (h *headerGNU) Version() []byte { return h[263:][:2] } 258func (h *headerGNU) UserName() []byte { return h[265:][:32] } 259func (h *headerGNU) GroupName() []byte { return h[297:][:32] } 260func (h *headerGNU) DevMajor() []byte { return h[329:][:8] } 261func (h *headerGNU) DevMinor() []byte { return h[337:][:8] } 262func (h *headerGNU) AccessTime() []byte { return h[345:][:12] } 263func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] } 264func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) } 265func (h *headerGNU) RealSize() []byte { return h[483:][:12] } 266 267type headerSTAR [blockSize]byte 268 269func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) } 270func (h *headerSTAR) Magic() []byte { return h[257:][:6] } 271func (h *headerSTAR) Version() []byte { return h[263:][:2] } 272func (h *headerSTAR) UserName() []byte { return h[265:][:32] } 273func (h *headerSTAR) GroupName() []byte { return h[297:][:32] } 274func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] } 275func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] } 276func (h *headerSTAR) Prefix() []byte { return h[345:][:131] } 277func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] } 278func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] } 279func (h *headerSTAR) Trailer() []byte { return h[508:][:4] } 280 281type headerUSTAR [blockSize]byte 282 283func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) } 284func (h *headerUSTAR) Magic() []byte { return h[257:][:6] } 285func (h *headerUSTAR) Version() []byte { return h[263:][:2] } 286func (h *headerUSTAR) UserName() []byte { return h[265:][:32] } 287func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] } 288func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] } 289func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] } 290func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] } 291 292type sparseArray []byte 293 294func (s sparseArray) Entry(i int) sparseElem { return (sparseElem)(s[i*24:]) } 295func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] } 296func (s sparseArray) MaxEntries() int { return len(s) / 24 } 297 298type sparseElem []byte 299 300func (s sparseElem) Offset() []byte { return s[00:][:12] } 301func (s sparseElem) Length() []byte { return s[12:][:12] } 302