xref: /openbsd/usr.bin/file/magdir/archive (revision f2dfb0a4)
1
2#------------------------------------------------------------------------------
3# archive:  file(1) magic for archive formats (see also "msdos" for self-
4#           extracting compressed archives)
5#
6# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
7# pre-POSIX "tar" archives are handled in the C code.
8
9# POSIX tar archives
10257	string		ustar\0		POSIX tar archive
11257	string		ustar\040\040\0	GNU tar archive
12
13# cpio archives
14#
15# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
16# The idea is to indicate archives produced on machines with the same
17# byte order as the machine running "file" with "cpio archive", and
18# to indicate archives produced on machines with the opposite byte order
19# from the machine running "file" with "byte-swapped cpio archive".
20#
21# The SVR4 "cpio(4)" hints that there are additional formats, but they
22# are defined as "short"s; I think all the new formats are
23# character-header formats and thus are strings, not numbers.
240	short		070707		cpio archive
250	short		0143561		byte-swapped cpio archive
260	string		070707		ASCII cpio archive (pre-SVR4 or odc)
270	string		070701		ASCII cpio archive (SVR4 with no CRC)
280	string		070702		ASCII cpio archive (SVR4 with CRC)
29
30# other archives
310	long		0177555		very old archive
320	short		0177555		very old PDP-11 archive
330	long		0177545		old archive
340	short		0177545		old PDP-11 archive
350	long		0100554		apl workspace
360	string		=<ar>		archive
37
38# MIPS archive (needs to go first)
39#
400	string	!<arch>\n__________E	MIPS archive
41>20	string	U			with MIPS Ucode members
42>21	string	L			with MIPSEL members
43>21	string	B			with MIPSEB members
44>19	string	L			and an EL hash table
45>19	string	B			and an EB hash table
46>22	string	X			-- out of date
47
480	string		-h-		Software Tools format archive text
49
50#
51# XXX - why are there multiple <ar> thingies?  Note that 0x213c6172 is
52# "!<ar", so, for new-style (4.xBSD/SVR2andup) archives, we have:
53#
54# 0	string		!<arch>		current ar archive
55# 0	long		0x213c6172	archive file
56#
57# and for SVR1 archives, we have:
58#
59# 0	string		\<ar>		System V Release 1 ar archive
60# 0	string		=<ar>		archive
61#
62# XXX - did Aegis really store shared libraries, breakpointed modules,
63# and absolute code program modules in the same format as new-style
64# "ar" archives?
65#
660	string		!<arch>		current ar archive
67>8	string		__.SYMDEF	random library
68>8	string		debian-split	part of multipart Debian package
69>8	string		debian-binary	Debian binary package
70>0	belong		=65538		- pre SR9.5
71>0	belong		=65539		- post SR9.5
72>0	beshort		2		- object archive
73>0	beshort		3		- shared library module
74>0	beshort		4		- debug break-pointed module
75>0	beshort		5		- absolute code program module
760	string		\<ar>		System V Release 1 ar archive
770	string		=<ar>		archive
78#
79# XXX - from "vax", which appears to collect a bunch of byte-swapped
80# thingies, to help you recognize VAX files on big-endian machines;
81# with "leshort", "lelong", and "string", that's no longer necessary....
82#
830	belong		0x65ff0000	VAX 3.0 archive
840	belong		0x3c61723e	VAX 5.0 archive
85#
860	long		0x213c6172	archive file
870	lelong		0177555		very old VAX archive
880	leshort		0177555		very old PDP-11 archive
89#
90# XXX - "pdp" claims that 0177545 can have an __.SYMDEF member and thus
91# be a random library (it said 0xff65 rather than 0177545).
92#
930	lelong		0177545		old VAX archive
94>8	string		__.SYMDEF	random library
950	leshort		0177545		old PDP-11 archive
96>8	string		__.SYMDEF	random library
97#
98# From "pdp" (but why a 4-byte quantity?)
99#
1000	lelong		0x39bed		PDP-11 old archive
1010	lelong		0x39bee		PDP-11 4.0 archive
102
103# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
104#
105# The first byte is the magic (0x1a), byte 2 is the compression type for
106# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
107# filename of the first file (null terminated).  Since some types collide
108# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
109# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%).  0x01 collides with terminfo.
1100	lelong&0x8080ffff	0x0000081a	ARC archive data, dynamic LZW
1110	lelong&0x8080ffff	0x0000091a	ARC archive data, squashed
1120	lelong&0x8080ffff	0x0000021a	ARC archive data, uncompressed
1130	lelong&0x8080ffff	0x0000031a	ARC archive data, packed
1140	lelong&0x8080ffff	0x0000041a	ARC archive data, squeezed
1150	lelong&0x8080ffff	0x0000061a	ARC archive data, crunched
116
117# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk)
118# I can't create either SPARK or ArcFS archives so I have not tested this stuff
119# [GRR:  the original entries collide with ARC, above; replaced with combined
120#  version (not tested)]
121#0	byte		0x1a		RISC OS archive
122#>1	string		archive		(ArcFS format)
1230	string		\032archive	RISC OS archive (ArcFS format)
124
125# ARJ archiver (jason@jarthur.Claremont.EDU)
1260	leshort		0xea60		ARJ archive data
127>5	byte		x		\b, v%d,
128>8	byte		&0x04		multi-volume,
129>8	byte		&0x10		slash-switched,
130>8	byte		&0x20		backup,
131>34	string		x		original name: %s,
132>7	byte		0		os: MS-DOS
133>7	byte		1		os: PRIMOS
134>7	byte		2		os: Unix
135>7	byte		3		os: Amiga
136>7	byte		4		os: Macintosh
137>7	byte		5		os: OS/2
138>7	byte		6		os: Apple ][ GS
139>7	byte		7		os: Atari ST
140>7	byte		8		os: NeXT
141>7	byte		9		os: VAX/VMS
142>3	byte		>0		%d]
143
144# HA archiver (Greg Roelofs, newt@uchicago.edu)
145# This is a really bad format. A file containing HAWAII will match this...
146#0	string		HA		HA archive data,
147#>2	leshort		=1		1 file,
148#>2	leshort		>1		%u files,
149#>4	byte&0x0f	=0		first is type CPY
150#>4	byte&0x0f	=1		first is type ASC
151#>4	byte&0x0f	=2		first is type HSC
152#>4	byte&0x0f	=0x0e		first is type DIR
153#>4	byte&0x0f	=0x0f		first is type SPECIAL
154
155# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
1560	string		HPAK		HPACK archive data
157
158# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
1590	string		\351,\001JAM\		JAM archive,
160>7	string		>\0			version %.4s
161>0x26	byte		=0x27			-
162>>0x2b	string          >\0			label %.11s,
163>>0x27	lelong		x			serial %08x,
164>>0x36	string		>\0			fstype %.8s
165
166# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
1672	string		-lh0-		LHarc 1.x archive data [lh0]
1682	string		-lh1-		LHarc 1.x archive data [lh1]
1692	string		-lz4-		LHarc 1.x archive data [lz4]
1702	string		-lz5-		LHarc 1.x archive data [lz5]
171#	[never seen any but the last; -lh4- reported in comp.compression:]
1722	string		-lzs-		LHa 2.x? archive data [lzs]
1732	string		-lh -		LHa 2.x? archive data [lh ]
1742	string		-lhd-		LHa 2.x? archive data [lhd]
1752	string		-lh2-		LHa 2.x? archive data [lh2]
1762	string		-lh3-		LHa 2.x? archive data [lh3]
1772	string		-lh4-		LHa (2.x) archive data [lh4]
1782	string		-lh5-		LHa (2.x) archive data [lh5]
179>20	byte		x		- header level %d
180
181# RAR archiver (Greg Roelofs, newt@uchicago.edu)
1820	string		Rar!		RAR archive data
183
184# SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
1850	string		SQSH		squished archive data (Acorn RISCOS)
186
187# UC2 archiver (Greg Roelofs, newt@uchicago.edu)
188# I can't figure out the self-extracting form of these buggers...
1890	string		UC2\x1a		UC2 archive data
190
191# ZIP archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
1920	string		PK\003\004	Zip archive data
193>4	byte		0x09		\b, at least v0.9 to extract
194>4	byte		0x0a		\b, at least v1.0 to extract
195>4	byte		0x0b		\b, at least v1.1 to extract
196>4	byte		0x14		\b, at least v2.0 to extract
197
198# Zoo archiver
19920	lelong		0xfdc4a7dc	Zoo archive data
200>4	byte		>48		\b, v%c.
201>>6	byte		>47		\b%c
202>>>7	byte		>47		\b%c
203>32	byte		>0		\b, modify: v%d
204>>33	byte		x		\b.%d+
205>42	lelong		0xfdc4a7dc	\b,
206>>70	byte		>0		extract: v%d
207>>>71	byte		x		\b.%d+
208
209# Shell archives
21010	string		#\ This\ is\ a\ shell\ archive	shell archive text
211