1 2 3 4TAR(5) 1994 TAR(5) 5 6 7NNAAMMEE 8 tar - tape (or other media) archive file format 9 10DDEESSCCRRIIPPTTIIOONN 11 A ``tar tape'' or file contains a series of records. Each 12 record contains RECORDSIZE bytes (see below). Although 13 this format may be thought of as being on magnetic tape, 14 other media are often used. 15 16 Each file archived is represented by a header record which 17 describes the file, followed by zero or more records which 18 give the contents of the file. At the end of the archive 19 file there may be a record filled with binary zeros as an 20 end-of-file indicator. A reasonable system should write a 21 record of zeros at the end, but must not assume that an 22 end-of-file record exists when reading an archive. 23 24 The records may be blocked for physical I/O operations. 25 Each block of _N records (where _N is set by the --bb option 26 to _t_a_r) is written with a single write() operation. On 27 open reel magnetic tapes, the result of such a write is a 28 single tape record. When writing an archive, the last 29 block of records should be written at the full size, with 30 records after the zero record containing all zeroes. When 31 reading an archive, a reasonable system should properly 32 handle an archive whose last block is shorter than the 33 rest, or which contains garbage records after a zero 34 record. 35 36 The header record is defined in the header file <tar.h> as 37 follows: 38 /* 39 * Standard Archive Format - Standard TAR - USTAR + GNU Extensions 40 */ 41 #define RECORDSIZE 512 42 #define NAMSIZ 100 43 #define TUNMLEN 32 44 #define TGNMLEN 32 45 #define SPARSE_EXT_HDR 21 46 #define SPARSE_IN_HDR 4 47 48 struct sparse 49 { 50 char offset[12]; 51 char numbytes[12]; 52 }; 53 54 union record 55 { 56 char charptr[RECORDSIZE]; 57 struct header 58 { 59 char arch_name[NAMSIZ]; 60 char mode[8]; 61 62 63 64May 29 1 65 66 67 68 69 70TAR(5) 1994 TAR(5) 71 72 73 char uid[8]; 74 char gid[8]; 75 char size[12]; 76 char mtime[12]; 77 char chksum[8]; 78 char linkflag; 79 char arch_linkname[NAMSIZ]; 80 char magic[8]; 81 char uname[TUNMLEN]; 82 char gname[TGNMLEN]; 83 char devmajor[8]; 84 char devminor[8]; 85 /* these following fields were added by JF for gnu */ 86 /* and are NOT standard */ 87 char atime[12]; 88 char ctime[12]; 89 char offset[12]; 90 char longnames[4]; 91 #ifdef NEEDPAD 92 char pad; 93 #endif 94 struct sparse sp[SPARSE_IN_HDR]; 95 char isextended; 96 char realsize[12]; /* true size of the sparse file */ 97 /* char ending_blanks[12];*//* number of nulls at the 98 end of the file, if any */ 99 } 100 header; 101 struct extended_header 102 { 103 struct sparse sp[21]; 104 char isextended; 105 } 106 ext_hdr; 107 }; 108 109 /* The checksum field is filled with this while the checksum is computed. */ 110 #define CHKBLANKS " " /* 8 blanks, no null */ 111 112 /* The magic field is filled with this if uname and gname are valid. */ 113 #define TMAGIC "ustar " /* 7 chars and a null */ 114 115 116 /* The linkflag defines the type of file */ 117 #define LF_OLDNORMAL ' ' /* Normal disk file, Unix compat */ 118 #define LF_NORMAL '0' /* Normal disk file */ 119 #define LF_LINK '1' /* Link to previously dumped file */ 120 #define LF_SYMLINK '2' /* Symbolic link */ 121 #define LF_CHR '3' /* Character special file */ 122 #define LF_BLK '4' /* Block special file */ 123 #define LF_DIR '5' /* Directory */ 124 #define LF_FIFO '6' /* FIFO special file */ 125 #define LF_CONTIG '7' /* Contiguous file */ 126 /* Further link types may be defined later. */ 127 128 129 130May 29 2 131 132 133 134 135 136TAR(5) 1994 TAR(5) 137 138 139 /* Note that the standards committee allows only capital A through 140 capital Z for user-defined expansion. This means that defining something 141 as, say '8' is a *bad* idea. */ 142 #define LF_DUMPDIR 'D' /* This is a dir entry that contains 143 the names of files that were in 144 the dir at the time the dump 145 was made */ 146 #define LF_LONGLINK 'K' /* Identifies the NEXT file on the tape 147 as having a long linkname */ 148 #define LF_LONGNAME 'L' /* Identifies the NEXT file on the tape 149 as having a long name. */ 150 #define LF_MULTIVOL 'M' /* This is the continuation 151 of a file that began on another 152 volume */ 153 #define LF_NAMES 'N' /* For storing filenames that didn't 154 fit in 100 characters */ 155 #define LF_SPARSE 'S' /* This is for sparse files */ 156 #define LF_VOLHDR 'V' /* This file is a tape/volume header */ 157 158 /* Bits used in the mode field - values in octal */ 159 #define TSUID 04000 /* Set UID on execution */ 160 #define TSGID 02000 /* Set GID on execution */ 161 #define TSVTX 01000 /* Save text (sticky bit) */ 162 163 /* File permissions */ 164 #define TUREAD 00400 /* read by owner */ 165 #define TUWRITE 00200 /* write by owner */ 166 #define TUEXEC 00100 /* execute/search by owner */ 167 #define TGREAD 00040 /* read by group */ 168 #define TGWRITE 00020 /* write by group */ 169 #define TGEXEC 00010 /* execute/search by group */ 170 #define TOREAD 00004 /* read by other */ 171 #define TOWRITE 00002 /* write by other */ 172 #define TOEXEC 00001 /* execute/search by other */ 173 174 All characters in header records are represented using 175 8-bit characters in the local variant of ASCII. Each 176 field within the structure is contiguous; that is, there 177 is no padding used within the structure. Each character 178 on the archive medium is stored contiguously. 179 180 Bytes representing the contents of files (after the header 181 record of each file) are not translated in any way and are 182 not constrained to represent characters or to be in any 183 character set. The _t_a_r(5) format does not distinguish 184 text files from binary files, and no translation of file 185 contents should be performed. 186 187 The fields _n_a_m_e, _l_i_n_k_n_a_m_e, _m_a_g_i_c, _u_n_a_m_e, and _g_n_a_m_e are 188 null-terminated character strings. All other fields are 189 zero-filled octal numbers in ASCII. Each numeric field 190 (of width _w) contains _w-2 digits, a space, and a null, 191 except _s_i_z_e and _m_t_i_m_e, which do not contain the trailing 192 null. 193 194 195 196May 29 3 197 198 199 200 201 202TAR(5) 1994 TAR(5) 203 204 205 The _n_a_m_e field is the pathname of the file, with directory 206 names (if any) preceding the file name, separated by 207 slashes. 208 209 The _m_o_d_e field provides nine bits specifying file 210 permissions and three bits to specify the Set UID, Set GID 211 and Save Text (TSVTX) modes. Values for these bits are 212 defined above. When special permissions are required to 213 create a file with a given mode, and the user restoring 214 files from the archive does not hold such permissions, the 215 mode bit(s) specifying those special permissions are 216 ignored. Modes which are not supported by the operating 217 system restoring files from the archive will be ignored. 218 Unsupported modes should be faked up when creating an 219 archive; e.g. the group permission could be copied from 220 the `other' permission. 221 222 The _u_i_d and _g_i_d fields are the user and group ID of the 223 file owners, respectively. 224 225 The _s_i_z_e field is the size of the file in bytes; linked 226 files are archived with this field specified as zero. 227 228 The _m_t_i_m_e field is the modification time of the file at 229 the time it was archived. It is the ASCII representation 230 of the octal value of the last time the file was modified, 231 represented as in integer number of seconds since January 232 1, 1970, 00:00 Coordinated Universal Time. 233 234 The _c_h_k_s_u_m field is the ASCII representaion of the octal 235 value of the simple sum of all bytes in the header record. 236 Each 8-bit byte in the header is treated as an unsigned 237 value. These values are added to an unsigned integer, 238 initialized to zero, the precision of which shall be no 239 less than seventeen bits. When calculating the checksum, 240 the _c_h_k_s_u_m field is treated as if it were all blanks. 241 242 The _t_y_p_e_f_l_a_g field specifies the type of file archived. 243 If a particular implementation does not recognize or 244 permit the specified type, the file will be extracted as 245 if it were a regular file. As this action occurs, _t_a_r 246 issues a warning to the standard error. 247 248 LF_NORMAL or LF_OLDNORMAL 249 represents a regular file. For backward 250 compatibility, a _t_y_p_e_f_l_a_g value of LF_OLDNORMAL 251 should be silently recognized as a regular file. 252 New archives should be created using LF_NORMAL. 253 Also, for backward compatability, _t_a_r treats a 254 regular file whose name ends with a slash as a 255 directory. 256 257 LF_LINK 258 represents a file linked to another file, of any 259 260 261 262May 29 4 263 264 265 266 267 268TAR(5) 1994 TAR(5) 269 270 271 type, previously archived. Such files are 272 identified in Unix by each file having the same 273 device and inode number. The linked-to name is 274 specified in the _l_i_n_k_n_a_m_e field with a trailing 275 null. 276 277 LF_SYMLINK 278 represents a symbolic link to another file. The 279 linked-to name is specified in the _l_i_n_k_n_a_m_e field 280 with a trailing null. 281 282 LF_CHR or LF_BLK 283 represent character special files and block special 284 files respectively. In this case the _d_e_v_m_a_j_o_r and 285 _d_e_v_m_i_n_o_r fields will contain the major and minor 286 device numbers respectively. Operating systems may 287 map the device specifications to their own local 288 specification, or may ignore the entry. 289 290 LF_DIR specifies a directory or sub-directory. The 291 directory name in the _n_a_m_e field should end with a 292 slash. On systems where disk allocation is 293 performed on a directory basis the _s_i_z_e field will 294 contain the maximum number of bytes (which may be 295 rounded to the nearest disk block allocation unit) 296 which the directory may hold. A _s_i_z_e field of zero 297 indicates no such limiting. Systems which do not 298 support limiting in this manner should ignore the 299 _s_i_z_e field. 300 301 LF_FIFO 302 specifies a FIFO special file. Note that the 303 archiving of a FIFO file archives the existence of 304 this file and not its contents. 305 306 LF_CONTIG 307 specifies a contiguous file, which is the same as a 308 normal file except that, in operating systems which 309 support it, all its space is allocated contiguously 310 on the disk. Operating systems which do not allow 311 contiguous allocation should silently treat this 312 type as a normal file. 313 314 `A' - `Z' 315 are reserved for custom implementations. None are 316 used by this version of the _t_a_r program. 317 318 _o_t_h_e_r values are reserved for specification in future 319 revisions of the P1003 standard, and should not be 320 used by any _t_a_r program. 321 322 The _m_a_g_i_c field indicates that this archive was output in 323 the P1003 archive format. If this field contains TMAGIC, 324 then the _u_n_a_m_e and _g_n_a_m_e fields will contain the ASCII 325 326 327 328May 29 5 329 330 331 332 333 334TAR(5) 1994 TAR(5) 335 336 337 representation of the owner and group of the file 338 respectively. If found, the user and group ID represented 339 by these names will be used rather than the values 340 contained within the _u_i_d and _g_i_d fields. User names 341 longer than TUNMLEN-1 or group names longer than TGNMLEN-1 342 characters will be truncated. 343 344SSEEEE AALLSSOO 345 tar(1), ar(5), cpio(5), dump(8), restor(8), restore(8) 346 347BBUUGGSS 348 Names or link names longer than NAMSIZ-1 characters cannot 349 be archived. 350 351 This format does not yet address multi-volume archives. 352 353NNOOTTEESS 354 This manual page was adapted by John Gilmore from Draft 6 355 of the P1003 specification Hacked to install information 356 from the GNU tar version 1.11.2 header definition by Thos 357 Sumner. No additional explication beyond the source file 358 comments were added at this time. 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394May 29 6 395 396 397