xref: /openbsd/usr.bin/file/magdir/archive (revision 70e96360)
1#	$OpenBSD: archive,v 1.7 2016/03/06 01:33:12 sthen Exp $
2
3#------------------------------------------------------------------------------
4# archive:  file(1) magic for archive formats (see also "msdos" for self-
5#           extracting compressed archives)
6#
7# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
8# pre-POSIX "tar" archives are handled in the C code.
9
10# POSIX tar archives
11257	string		ustar\0		POSIX tar archive
12!:mime	application/x-tar # encoding: posix
13257	string		ustar\040\040\0	GNU tar archive
14!:mime	application/x-tar # encoding: gnu
15
16# Incremental snapshot gnu-tar format from:
17# http://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html
180	string		GNU\ tar-	GNU tar incremental snapshot data
19>&0	regex		[0-9]\.[0-9]+-[0-9]+	version %s
20
21# cpio archives
22#
23# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
24# The idea is to indicate archives produced on machines with the same
25# byte order as the machine running "file" with "cpio archive", and
26# to indicate archives produced on machines with the opposite byte order
27# from the machine running "file" with "byte-swapped cpio archive".
28#
29# The SVR4 "cpio(4)" hints that there are additional formats, but they
30# are defined as "short"s; I think all the new formats are
31# character-header formats and thus are strings, not numbers.
320	short		070707		cpio archive
33!:mime	application/x-cpio
340	short		0143561		byte-swapped cpio archive
35!:mime	application/x-cpio # encoding: swapped
360	string		070707		ASCII cpio archive (pre-SVR4 or odc)
370	string		070701		ASCII cpio archive (SVR4 with no CRC)
380	string		070702		ASCII cpio archive (SVR4 with CRC)
39
40#
41# Various archive formats used by various versions of the "ar"
42# command.
43#
44
45#
46# Original UNIX archive formats.
47# They were written with binary values in host byte order, and
48# the magic number was a host "int", which might have been 16 bits
49# or 32 bits.  We don't say "PDP-11" or "VAX", as there might have
50# been ports to little-endian 16-bit-int or 32-bit-int platforms
51# (x86?) using some of those formats; if none existed, feel free
52# to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian
53# 32-bit.  There might have been big-endian ports of that sort as
54# well.
55#
560	leshort		0177555		very old 16-bit-int little-endian archive
570	beshort		0177555		very old 16-bit-int big-endian archive
580	lelong		0177555		very old 32-bit-int little-endian archive
590	belong		0177555		very old 32-bit-int big-endian archive
60
610	leshort		0177545		old 16-bit-int little-endian archive
62>2	string		__.SYMDEF	random library
630	beshort		0177545		old 16-bit-int big-endian archive
64>2	string		__.SYMDEF	random library
650	lelong		0177545		old 32-bit-int little-endian archive
66>4	string		__.SYMDEF	random library
670	belong		0177545		old 32-bit-int big-endian archive
68>4	string		__.SYMDEF	random library
69
70#
71# From "pdp" (but why a 4-byte quantity?)
72#
730	lelong		0x39bed		PDP-11 old archive
740	lelong		0x39bee		PDP-11 4.0 archive
75
76#
77# XXX - what flavor of APL used this, and was it a variant of
78# some ar archive format?  It's similar to, but not the same
79# as, the APL workspace magic numbers in pdp.
80#
810	long		0100554		apl workspace
82
83#
84# System V Release 1 portable(?) archive format.
85#
860	string		=<ar>		System V Release 1 ar archive
87!:mime	application/x-archive
88
89#
90# Debian package; it's in the portable archive format, and needs to go
91# before the entry for regular portable archives, as it's recognized as
92# a portable archive whose first member has a name beginning with
93# "debian".
94#
950	string		=!<arch>\ndebian
96>8	string		debian-split	part of multipart Debian package
97!:mime	application/vnd.debian.binary-package
98>8	string		debian-binary	Debian binary package
99!:mime	application/vnd.debian.binary-package
100>8	string		!debian
101>68	string		>\0		(format %s)
102# These next two lines do not work, because a bzip2 Debian archive
103# still uses gzip for the control.tar (first in the archive).  Only
104# data.tar varies, and the location of its filename varies too.
105# file/libmagic does not current have support for ascii-string based
106# (offsets) as of 2005-09-15.
107#>81	string		bz2		\b, uses bzip2 compression
108#>84	string		gz		\b, uses gzip compression
109#>136	ledate		x		created: %s
110
111#
112# MIPS archive; they're in the portable archive format, and need to go
113# before the entry for regular portable archives, as it's recognized as
114# a portable archive whose first member has a name beginning with
115# "__________E".
116#
1170	string	=!<arch>\n__________E	MIPS archive
118!:mime	application/x-archive
119>20	string	U			with MIPS Ucode members
120>21	string	L			with MIPSEL members
121>21	string	B			with MIPSEB members
122>19	string	L			and an EL hash table
123>19	string	B			and an EB hash table
124>22	string	X			-- out of date
125
1260	search/1	-h-		Software Tools format archive text
127
128#
129# BSD/SVR2-and-later portable archive formats.
130#
1310	string		=!<arch>		current ar archive
132!:mime	application/x-archive
133>8	string		__.SYMDEF	random library
134>68	string		__.SYMDEF\ SORTED	random library
135
136#
137# "Thin" archive, as can be produced by GNU ar.
138#
1390	string		=!<thin>\n	thin archive with
140>68	belong		0		no symbol entries
141>68	belong		1		%d symbol entry
142>68	belong		>1		%d symbol entries
143
144# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
145#
146# The first byte is the magic (0x1a), byte 2 is the compression type for
147# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
148# filename of the first file (null terminated).  Since some types collide
149# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
150# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%).  0x01 collides with terminfo.
1510	lelong&0x8080ffff	0x0000081a	ARC archive data, dynamic LZW
152!:mime	application/x-arc
1530	lelong&0x8080ffff	0x0000091a	ARC archive data, squashed
154!:mime	application/x-arc
1550	lelong&0x8080ffff	0x0000021a	ARC archive data, uncompressed
156!:mime	application/x-arc
1570	lelong&0x8080ffff	0x0000031a	ARC archive data, packed
158!:mime	application/x-arc
1590	lelong&0x8080ffff	0x0000041a	ARC archive data, squeezed
160!:mime	application/x-arc
1610	lelong&0x8080ffff	0x0000061a	ARC archive data, crunched
162!:mime	application/x-arc
163# [JW] stuff taken from idarc, obviously ARC successors:
1640	lelong&0x8080ffff	0x00000a1a	PAK archive data
165!:mime	application/x-arc
1660	lelong&0x8080ffff	0x0000141a	ARC+ archive data
167!:mime	application/x-arc
1680	lelong&0x8080ffff	0x0000481a	HYP archive data
169!:mime	application/x-arc
170
171# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk)
172# I can't create either SPARK or ArcFS archives so I have not tested this stuff
173# [GRR:  the original entries collide with ARC, above; replaced with combined
174#  version (not tested)]
175#0	byte		0x1a		RISC OS archive (spark format)
1760	string		\032archive	RISC OS archive (ArcFS format)
1770       string          Archive\000     RISC OS archive (ArcFS format)
178
179# All these were taken from idarc, many could not be verified. Unfortunately,
180# there were many low-quality sigs, i.e. easy to trigger false positives.
181# Please notify me of any real-world fishy/ambiguous signatures and I'll try
182# to get my hands on the actual archiver and see if I find something better. [JW]
183# probably many can be enhanced by finding some 0-byte or control char near the start
184
185# idarc calls this Crush/Uncompressed... *shrug*
1860	string	CRUSH Crush archive data
187# Squeeze It (.sqz)
1880	string	HLSQZ Squeeze It archive data
189# SQWEZ
1900	string	SQWEZ SQWEZ archive data
191# HPack (.hpk)
1920	string	HPAK HPack archive data
193# HAP
1940	string	\x91\x33HF HAP archive data
195# MD/MDCD
1960	string	MDmd MDCD archive data
197# LIM
1980	string	LIM\x1a LIM archive data
199# SAR
2003	string	LH5 SAR archive data
201# BSArc/BS2
2020	string	\212\3SB\020\0	BSArc/BS2 archive data
203# Bethesda Softworks Archive (Oblivion)
2040	string	BSA\0 		BSArc archive data
205>4	lelong	x		version %d
206# MAR
2072	string	=-ah MAR archive data
208# ACB
209#0	belong&0x00f800ff	0x00800000 ACB archive data
210# CPZ
211# TODO, this is what idarc says: 0	string	\0\0\0 CPZ archive data
212# JRC
2130	string	JRchive JRC archive data
214# Quantum
2150	string	DS\0 Quantum archive data
216# ReSOF
2170	string	PK\3\6 ReSOF archive data
218# QuArk
2190	string	7\4 QuArk archive data
220# YAC
22114	string	YC YAC archive data
222# X1
2230	string	X1 X1 archive data
2240	string	XhDr X1 archive data
225# CDC Codec (.dqt)
2260	belong&0xffffe000	0x76ff2000 CDC Codec archive data
227# AMGC
2280	string	\xad6" AMGC archive data
229# NuLIB
2300	string	N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data
231# PakLeo
2320	string	LEOLZW PAKLeo archive data
233# ChArc
2340	string	SChF ChArc archive data
235# PSA
2360	string	PSA PSA archive data
237# CrossePAC
2380	string	DSIGDCC CrossePAC archive data
239# Freeze
2400	string	\x1f\x9f\x4a\x10\x0a Freeze archive data
241# KBoom
2420	string	\xc2\xa8MP\xc2\xa8 KBoom archive data
243# NSQ, must go after CDC Codec
2440	string	\x76\xff NSQ archive data
245# DPA
2460	string	Dirk\ Paehl DPA archive data
247# BA
248# TODO: idarc says "bytes 0-2 == bytes 3-5"
249# TTComp
2500	string	\0\6 TTComp archive data
251# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation?
2520	string	ESP ESP archive data
253# ZPack
2540	string	\1ZPK\1 ZPack archive data
255# Sky
2560	string	\xbc\x40 Sky archive data
257# UFA
2580	string	UFA UFA archive data
259# Dry
2600	string	=-H2O DRY archive data
261# FoxSQZ
2620	string	FOXSQZ FoxSQZ archive data
263# AR7
2640	string	,AR7 AR7 archive data
265# PPMZ
2660	string	PPMZ PPMZ archive data
267# MS Compress
2684	string	\x88\xf0\x27 MS Compress archive data
269# updated by Joerg Jenderek
270>9	string	\0
271>>0	string	KWAJ
272>>>7	string	\321\003	MS Compress archive data
273>>>>14	ulong	>0		\b, original size: %d bytes
274>>>>18		ubyte	>0x65
275>>>>>18		string	x       \b, was %.8s
276>>>>>(10.b-4)	string	x       \b.%.3s
277# MP3 (archiver, not lossy audio compression)
2780	string	MP3\x1a MP3-Archiver archive data
279# ZET
2800	string	OZ\xc3\x9d ZET archive data
281# TSComp
2820	string	\x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data
283# ARQ
2840	string	gW\4\1 ARQ archive data
285# Squash
2863	string	OctSqu Squash archive data
287# Terse
2880	string	\5\1\1\0 Terse archive data
289# PUCrunch
2900	string	\x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data
291# UHarc
2920	string	UHA UHarc archive data
293# ABComp
2940	string	\2AB ABComp archive data
2950	string	\3AB2 ABComp archive data
296# CMP
2970	string	CO\0 CMP archive data
298# Splint
2990	string	\x93\xb9\x06 Splint archive data
300# InstallShield
3010	string	\x13\x5d\x65\x8c InstallShield Z archive Data
302# Gather
3031	string	GTH Gather archive data
304# BOA
3050	string	BOA BOA archive data
306# RAX
3070	string	ULEB\xa RAX archive data
308# Xtreme
3090	string	ULEB\0 Xtreme archive data
310# Pack Magic
3110	string	@\xc3\xa2\1\0 Pack Magic archive data
312# BTS
3130	belong&0xfeffffff	0x1a034465 BTS archive data
314# ELI 5750
3150	string	Ora\  ELI 5750 archive data
316# QFC
3170	string	\x1aFC\x1a QFC archive data
3180	string	\x1aQF\x1a QFC archive data
319# PRO-PACK
3200	string	RNC PRO-PACK archive data
321# 777
3220	string	777 777 archive data
323# LZS221
3240	string	sTaC LZS221 archive data
325# HPA
3260	string	HPA HPA archive data
327# Arhangel
3280	string	LG Arhangel archive data
329# EXP1, uses bzip2
3300	string	0123456789012345BZh EXP1 archive data
331# IMP
3320	string	IMP\xa IMP archive data
333# NRV
3340	string	\x00\x9E\x6E\x72\x76\xFF NRV archive data
335# Squish
3360	string	\x73\xb2\x90\xf4 Squish archive data
337# Par
3380	string	PHILIPP Par archive data
3390	string	PAR Par archive data
340# HIT
3410	string	UB HIT archive data
342# SBX
3430	belong&0xfffff000	0x53423000 SBX archive data
344# NaShrink
3450	string	NSK NaShrink archive data
346# SAPCAR
3470	string	#\ CAR\ archive\ header SAPCAR archive data
3480	string	CAR\ 2.00RG SAPCAR archive data
349# Disintegrator
3500	string	DST Disintegrator archive data
351# ASD
3520	string	ASD ASD archive data
353# InstallShield CAB
3540	string	ISc( InstallShield CAB
355# TOP4
3560	string	T4\x1a TOP4 archive data
357# BatComp left out: sig looks like COM executable
358# so TODO: get real 4dos batcomp file and find sig
359# BlakHole
3600	string	BH\5\7 BlakHole archive data
361# BIX
3620	string	BIX0 BIX archive data
363# ChiefLZA
3640	string	ChfLZ ChiefLZA archive data
365# Blink
3660	string	Blink Blink archive data
367# Logitech Compress
3680	string	\xda\xfa Logitech Compress archive data
369# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE)
3701	string	(C)\ STEPANYUK ARS-Sfx archive data
371# AKT/AKT32
3720	string	AKT32 AKT32 archive data
3730	string	AKT AKT archive data
374# NPack
3750	string	MSTSM NPack archive data
376# PFT
3770	string	\0\x50\0\x14 PFT archive data
378# SemOne
3790	string	SEM SemOne archive data
380# PPMD
3810	string	\x8f\xaf\xac\x84 PPMD archive data
382# FIZ
3830	string	FIZ FIZ archive data
384# MSXiE
3850	belong&0xfffff0f0	0x4d530000 MSXiE archive data
386# DeepFreezer
3870	belong&0xfffffff0	0x797a3030 DeepFreezer archive data
388# DC
3890	string	=<DC- DC archive data
390# TPac
3910	string	\4TPAC\3 TPac archive data
392# Ai
3930	string	Ai\1\1\0 Ai archive data
3940	string	Ai\1\0\0 Ai archive data
395# Ai32
3960	string	Ai\2\0 Ai32 archive data
3970	string	Ai\2\1 Ai32 archive data
398# SBC
3990	string	SBC SBC archive data
400# Ybs
4010	string	YBS Ybs archive data
402# DitPack
4030	string	\x9e\0\0 DitPack archive data
404# DMS
4050	string	DMS! DMS archive data
406# EPC
4070	string	\x8f\xaf\xac\x8c EPC archive data
408# VSARC
4090	string	VS\x1a VSARC archive data
410# PDZ
4110	string	PDZ PDZ archive data
412# ReDuq
4130	string	rdqx ReDuq archive data
414# GCA
4150	string	GCAX GCA archive data
416# PPMN
4170	string	pN PPMN archive data
418# WinImage
4193	string	WINIMAGE WinImage archive data
420# Compressia
4210	string	CMP0CMP Compressia archive data
422# UHBC
4230	string	UHB UHBC archive data
424# WinHKI
4250	string	\x61\x5C\x04\x05 WinHKI archive data
426# WWPack data file
4270	string	WWP WWPack archive data
428# BSN (BSA, PTS-DOS)
4290	string	\xffBSG BSN archive data
4301	string	\xffBSG BSN archive data
4313	string	\xffBSG BSN archive data
4321	string	\0\xae\2 BSN archive data
4331	string	\0\xae\3 BSN archive data
4341	string	\0\xae\7 BSN archive data
435# AIN
4360	string	\x33\x18 AIN archive data
4370	string	\x33\x17 AIN archive data
438# XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015
439# SZip (TODO: doesn't catch all versions)
4400	string	SZ\x0a\4 SZip archive data
441# XPack DiskImage
442# *.XDI updated by Joerg Jenderek Sep 2015
443# ftp://ftp.sac.sk/pub/sac/pack/0index.txt
444# GRR: this test is still too general as it catches also text files starting with jm
4450	string	jm
446# only found examples with this additional characteristic 2 bytes
447>2	string	\x2\x4	Xpack DiskImage archive data
448#!:ext xdi
449# XPack Data
450# *.xpa updated by Joerg Jenderek Sep 2015
451# ftp://ftp.elf.stuba.sk/pub/pc/pack/
4520	string	xpa	XPA
453!:ext	xpa
454# XPA32
455# ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip
456# created by XPA32.EXE version 1.0.2 for Windows
457>0	string	xpa\0\1 \b32 archive data
458# created by XPACK.COM version 1.67m or 1.67r with short 0x1800
459>3	ubeshort	!0x0001	\bck archive data
460# XPack Single Data
461# changed by Joerg Jenderek Sep 2015 back to like in version 5.12
462# letter 'I'+ acute accent is equivalent to \xcd
4630	string	\xcd\ jm	Xpack single archive data
464#!:mime	application/x-xpa-compressed
465!:ext xpa
466
467# TODO: missing due to unknown magic/magic at end of file:
468#DWC
469#ARG
470#ZAR
471#PC/3270
472#InstallIt
473#RKive
474#RK
475#XPack Diskimage
476
477# These were inspired by idarc, but actually verified
478# Dzip archiver (.dz)
4790	string	DZ Dzip archive data
480>2	byte	x \b, version %i
481>3	byte	x \b.%i
482# ZZip archiver (.zz)
4830	string	ZZ\ \0\0 ZZip archive data
4840	string	ZZ0 ZZip archive data
485# PAQ archiver (.paq)
4860	string	\xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data
4870	string	PAQ PAQ archive data
488>3	byte&0xf0	0x30
489>>3	byte	x (v%c)
490# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP)
4910xe	string	\x1aJar\x1b JAR (ARJ Software, Inc.) archive data
4920	string	JARCS JAR (ARJ Software, Inc.) archive data
493
494# ARJ archiver (jason@jarthur.Claremont.EDU)
4950	leshort		0xea60		ARJ archive data
496!:mime	application/x-arj
497>5	byte		x		\b, v%d,
498>8	byte		&0x04		multi-volume,
499>8	byte		&0x10		slash-switched,
500>8	byte		&0x20		backup,
501>34	string		x		original name: %s,
502>7	byte		0		os: MS-DOS
503>7	byte		1		os: PRIMOS
504>7	byte		2		os: Unix
505>7	byte		3		os: Amiga
506>7	byte		4		os: Macintosh
507>7	byte		5		os: OS/2
508>7	byte		6		os: Apple ][ GS
509>7	byte		7		os: Atari ST
510>7	byte		8		os: NeXT
511>7	byte		9		os: VAX/VMS
512>3	byte		>0		%d]
513# [JW] idarc says this is also possible
5142	leshort		0xea60		ARJ archive data
515
516# HA archiver (Greg Roelofs, newt@uchicago.edu)
517# This is a really bad format. A file containing HAWAII will match this...
518#0	string		HA		HA archive data,
519#>2	leshort		=1		1 file,
520#>2	leshort		>1		%hu files,
521#>4	byte&0x0f	=0		first is type CPY
522#>4	byte&0x0f	=1		first is type ASC
523#>4	byte&0x0f	=2		first is type HSC
524#>4	byte&0x0f	=0x0e		first is type DIR
525#>4	byte&0x0f	=0x0f		first is type SPECIAL
526# suggestion: at least identify small archives (<1024 files)
5270  belong&0xffff00fc 0x48410000 HA archive data
528>2	leshort		=1		1 file,
529>2	leshort		>1		%u files,
530>4	byte&0x0f	=0		first is type CPY
531>4	byte&0x0f	=1		first is type ASC
532>4	byte&0x0f	=2		first is type HSC
533>4	byte&0x0f	=0x0e		first is type DIR
534>4	byte&0x0f	=0x0f		first is type SPECIAL
535
536# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
5370	string		HPAK		HPACK archive data
538
539# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
5400	string		\351,\001JAM\ 		JAM archive,
541>7	string		>\0			version %.4s
542>0x26	byte		=0x27			-
543>>0x2b	string          >\0			label %.11s,
544>>0x27	lelong		x			serial %08x,
545>>0x36	string		>\0			fstype %.8s
546
547# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
5482	string		-lh0-		LHarc 1.x/ARX archive data [lh0]
549!:mime	application/x-lharc
5502	string		-lh1-		LHarc 1.x/ARX archive data [lh1]
551!:mime	application/x-lharc
5522	string		-lz4-		LHarc 1.x archive data [lz4]
553!:mime	application/x-lharc
5542	string		-lz5-		LHarc 1.x archive data [lz5]
555!:mime	application/x-lharc
556#	[never seen any but the last; -lh4- reported in comp.compression:]
5572	string		-lzs-		LHa/LZS archive data [lzs]
558!:mime	application/x-lha
5592	string		-lh\40-		LHa 2.x? archive data [lh ]
560!:mime	application/x-lha
5612	string		-lhd-		LHa 2.x? archive data [lhd]
562!:mime	application/x-lha
5632	string		-lh2-		LHa 2.x? archive data [lh2]
564!:mime	application/x-lha
5652	string		-lh3-		LHa 2.x? archive data [lh3]
566!:mime	application/x-lha
5672	string		-lh4-		LHa (2.x) archive data [lh4]
568!:mime	application/x-lha
5692	string		-lh5-		LHa (2.x) archive data [lh5]
570!:mime	application/x-lha
5712	string		-lh6-		LHa (2.x) archive data [lh6]
572!:mime	application/x-lha
5732	string		-lh7-		LHa (2.x)/LHark archive data [lh7]
574!:mime	application/x-lha
575>20	byte		x		- header level %d
576# taken from idarc [JW]
5772   string      -lZ         PUT archive data
5782   string      -lz         LZS archive data
5792   string      -sw1-       Swag archive data
580
581# RAR archiver (Greg Roelofs, newt@uchicago.edu)
5820	string		Rar!		RAR archive data,
583!:mime	application/x-rar
584>44	byte		x		v%0x,
585>10	byte		>0		flags:
586>>10	byte		&0x01		Archive volume,
587>>10	byte		&0x02		Commented,
588>>10	byte		&0x04		Locked,
589>>10	byte		&0x08		Solid,
590>>10	byte		&0x20		Authenticated,
591>35	byte		0		os: MS-DOS
592>35	byte		1		os: OS/2
593>35	byte		2		os: Win32
594>35	byte		3		os: Unix
595# some old version? idarc says:
5960   string      RE\x7e\x5e  RAR archive data
597
598# SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
5990	string		SQSH		squished archive data (Acorn RISCOS)
600
601# UC2 archiver (Greg Roelofs, newt@uchicago.edu)
602# [JW] see exe section for self-extracting version
6030	string		UC2\x1a		UC2 archive data
604
605# PKZIP multi-volume archive
6060	string		PK\x07\x08PK\x03\x04	Zip multi-volume archive data, at least PKZIP v2.50 to extract
607!:mime	application/zip
608
609# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
6100	string		PK\005\006	Zip archive data (empty)
6110	string		PK\003\004
612
613# Specialised zip formats which start with a member named 'mimetype'
614# (stored uncompressed, with no 'extra field') containing the file's MIME type.
615# Check for have 8-byte name, 0-byte extra field, name "mimetype", and
616#  contents starting with "application/":
617>26	string		\x8\0\0\0mimetypeapplication/
618
619#  KOffice / OpenOffice & StarOffice / OpenDocument formats
620#    From: Abel Cheung <abel@oaka.org>
621
622#   KOffice (1.2 or above) formats
623#    (mimetype contains "application/vnd.kde.<SUBTYPE>")
624>>50	string	vnd.kde.		KOffice (>=1.2)
625>>>58	string	karbon			Karbon document
626>>>58	string	kchart			KChart document
627>>>58	string	kformula		KFormula document
628>>>58	string	kivio			Kivio document
629>>>58	string	kontour			Kontour document
630>>>58	string	kpresenter		KPresenter document
631>>>58	string	kspread			KSpread document
632>>>58	string	kword			KWord document
633
634#   OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7)
635#    (mimetype contains "application/vnd.sun.xml.<SUBTYPE>")
636>>50	string	vnd.sun.xml.		OpenOffice.org 1.x
637>>>62	string	writer			Writer
638>>>>68	byte	!0x2e			document
639>>>>68	string	.template		template
640>>>>68	string	.global			global document
641>>>62	string	calc			Calc
642>>>>66	byte	!0x2e			spreadsheet
643>>>>66	string	.template		template
644>>>62	string	draw			Draw
645>>>>66	byte	!0x2e			document
646>>>>66	string	.template		template
647>>>62	string	impress			Impress
648>>>>69	byte	!0x2e			presentation
649>>>>69	string	.template		template
650>>>62	string	math			Math document
651>>>62	string	base			Database file
652
653#   OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
654#    http://lists.oasis-open.org/archives/office/200505/msg00006.html
655#    (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>")
656>>50	string	vnd.oasis.opendocument.	OpenDocument
657>>>73	string	text
658>>>>77	byte	!0x2d			Text
659!:mime	application/vnd.oasis.opendocument.text
660>>>>77	string	-template		Text Template
661!:mime	application/vnd.oasis.opendocument.text-template
662>>>>77	string	-web			HTML Document Template
663!:mime	application/vnd.oasis.opendocument.text-web
664>>>>77	string	-master			Master Document
665!:mime	application/vnd.oasis.opendocument.text-master
666>>>73	string	graphics
667>>>>81	byte	!0x2d			Drawing
668!:mime	application/vnd.oasis.opendocument.graphics
669>>>>81	string	-template		Template
670!:mime	application/vnd.oasis.opendocument.graphics-template
671>>>73	string	presentation
672>>>>85	byte	!0x2d			Presentation
673!:mime	application/vnd.oasis.opendocument.presentation
674>>>>85	string	-template		Template
675!:mime	application/vnd.oasis.opendocument.presentation-template
676>>>73	string	spreadsheet
677>>>>84	byte	!0x2d			Spreadsheet
678!:mime	application/vnd.oasis.opendocument.spreadsheet
679>>>>84	string	-template		Template
680!:mime	application/vnd.oasis.opendocument.spreadsheet-template
681>>>73	string	chart
682>>>>78	byte	!0x2d			Chart
683!:mime	application/vnd.oasis.opendocument.chart
684>>>>78	string	-template		Template
685!:mime	application/vnd.oasis.opendocument.chart-template
686>>>73	string	formula
687>>>>80	byte	!0x2d			Formula
688!:mime	application/vnd.oasis.opendocument.formula
689>>>>80	string	-template		Template
690!:mime	application/vnd.oasis.opendocument.formula-template
691>>>73	string	database		Database
692!:mime	application/vnd.oasis.opendocument.database
693>>>73	string	image
694>>>>78	byte	!0x2d			Image
695!:mime	application/vnd.oasis.opendocument.image
696>>>>78	string	-template		Template
697!:mime	application/vnd.oasis.opendocument.image-template
698
699#  EPUB (OEBPS) books using OCF (OEBPS Container Format)
700#    http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4.
701#    From: Ralf Brown <ralf.brown@gmail.com>
702>>50	string	epub+zip	EPUB document
703!:mime application/epub+zip
704
705#  Catch other ZIP-with-mimetype formats
706#	In a ZIP file, the bytes immediately after a member's contents are
707#	always "PK". The 2 regex rules here print the "mimetype" member's
708#	contents up to the first 'P'. Luckily, most MIME types don't contain
709#	any capital 'P's. This is a kludge.
710#    (mimetype contains "application/<OTHER>")
711>>50		string	!epub+zip
712>>>50		string	!vnd.oasis.opendocument.
713>>>>50		string	!vnd.sun.xml.
714>>>>>50		string	!vnd.kde.
715>>>>>>38	regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
716!:mime	application/zip
717#    (mimetype contents other than "application/*")
718>26		string	\x8\0\0\0mimetype
719>>38		string	!application/
720>>>38		regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
721!:mime	application/zip
722
723# Java Jar files
724>(26.s+30)	leshort	0xcafe		Java archive data (JAR)
725!:mime	application/java-archive
726
727# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
728#   Next line excludes specialized formats:
729>(26.s+30)	leshort	!0xcafe
730>>26    string          !\x8\0\0\0mimetype	Zip archive data
731!:mime	application/zip
732>>>4	byte		0x09		\b, at least v0.9 to extract
733>>>4	byte		0x0a		\b, at least v1.0 to extract
734>>>4	byte		0x0b		\b, at least v1.1 to extract
735>>>4	byte		0x14		\b, at least v2.0 to extract
736>>>4	byte		0x2d		\b, at least v4.5 to extract
737>>>0x161	string		WINZIP		\b, WinZIP self-extracting
738
739# StarView Metafile
740# From Pierre Ducroquet <pinaraf@pinaraf.info>
7410	string	VCLMTF	StarView MetaFile
742>6	beshort	x	\b, version %d
743>8	belong	x	\b, size %d
744
745# Zoo archiver
74620	lelong		0xfdc4a7dc	Zoo archive data
747!:mime	application/x-zoo
748>4	byte		>48		\b, v%c.
749>>6	byte		>47		\b%c
750>>>7	byte		>47		\b%c
751>32	byte		>0		\b, modify: v%d
752>>33	byte		x		\b.%d+
753>42	lelong		0xfdc4a7dc	\b,
754>>70	byte		>0		extract: v%d
755>>>71	byte		x		\b.%d+
756
757# Shell archives
75810	string		#\ This\ is\ a\ shell\ archive	shell archive text
759!:mime	application/octet-stream
760
761#
762# LBR. NB: May conflict with the questionable
763#          "binary Computer Graphics Metafile" format.
764#
7650       string  \0\ \ \ \ \ \ \ \ \ \ \ \0\0    LBR archive data
766#
767# PMA (CP/M derivative of LHA)
768#
7692       string          -pm0-           PMarc archive data [pm0]
7702       string          -pm1-           PMarc archive data [pm1]
7712       string          -pm2-           PMarc archive data [pm2]
7722       string          -pms-           PMarc SFX archive (CP/M, DOS)
7735       string          -pc1-           PopCom compressed executable (CP/M)
774
775# From Rafael Laboissiere <rafael@laboissiere.net>
776# The Project Revision Control System (see
777# http://prcs.sourceforge.net) generates a packaged project
778# file which is recognized by the following entry:
7790	leshort		0xeb81	PRCS packaged project
780
781# Microsoft cabinets
782# by David Necas (Yeti) <yeti@physics.muni.cz>
783#0	string	MSCF\0\0\0\0	Microsoft cabinet file data,
784#>25	byte	x		v%d
785#>24	byte	x		\b.%d
786# MPi: All CABs have version 1.3, so this is pointless.
787# Better magic in debian-additions.
788
789# GTKtalog catalogs
790# by David Necas (Yeti) <yeti@physics.muni.cz>
7914	string	gtktalog\ 	GTKtalog catalog data,
792>13	string	3		version 3
793>>14	beshort	0x677a		(gzipped)
794>>14	beshort	!0x677a		(not gzipped)
795>13	string	>3		version %s
796
797############################################################################
798# Parity archive reconstruction file, the 'par' file format now used on Usenet.
7990       string          PAR\0	PARity archive data
800>48	leshort		=0	- Index file
801>48	leshort		>0	- file number %d
802
803# Felix von Leitner <felix-file@fefe.de>
8040	string	d8:announce	BitTorrent file
805!:mime	application/x-bittorrent
806
807# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi>
8080	beshort 0x0e0f		Atari MSA archive data
809>2	beshort x		\b, %d sectors per track
810>4	beshort 0		\b, 1 sided
811>4	beshort 1		\b, 2 sided
812>6	beshort x		\b, starting track: %d
813>8	beshort x		\b, ending track: %d
814
815# Alternate ZIP string (amc@arwen.cs.berkeley.edu)
8160	string	PK00PK\003\004	Zip archive data
817
818# ACE archive (from http://www.wotsit.org/download.asp?f=ace)
819# by Stefan `Sec` Zehl <sec@42.org>
8207	string		**ACE**		ACE archive data
821>15	byte	>0		version %d
822>16	byte	=0x00		\b, from MS-DOS
823>16	byte	=0x01		\b, from OS/2
824>16	byte	=0x02		\b, from Win/32
825>16	byte	=0x03		\b, from Unix
826>16	byte	=0x04		\b, from MacOS
827>16	byte	=0x05		\b, from WinNT
828>16	byte	=0x06		\b, from Primos
829>16	byte	=0x07		\b, from AppleGS
830>16	byte	=0x08		\b, from Atari
831>16	byte	=0x09		\b, from Vax/VMS
832>16	byte	=0x0A		\b, from Amiga
833>16	byte	=0x0B		\b, from Next
834>14	byte	x		\b, version %d to extract
835>5	leshort &0x0080		\b, multiple volumes,
836>>17	byte	x		\b (part %d),
837>5	leshort &0x0002		\b, contains comment
838>5	leshort	&0x0200		\b, sfx
839>5	leshort	&0x0400		\b, small dictionary
840>5	leshort	&0x0800		\b, multi-volume
841>5	leshort	&0x1000		\b, contains AV-String
842>>30	string	\x16*UNREGISTERED\x20VERSION*	(unregistered)
843>5	leshort &0x2000		\b, with recovery record
844>5	leshort &0x4000		\b, locked
845>5	leshort &0x8000		\b, solid
846# Date in MS-DOS format (whatever that is)
847#>18	lelong	x		Created on
848
849# sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann
850# <doj@cubic.org>
8510x1A	string	sfArk		sfArk compressed Soundfont
852>0x15	string	2
853>>0x1	string	>\0		Version %s
854>>0x2A	string	>\0		: %s
855
856# DR-DOS 7.03 Packed File *.??_
8570	string	Packed\ File\ 	Personal NetWare Packed File
858>12	string	x		\b, was "%.12s"
859
860# EET archive
861# From: Tilman Sauerbeck <tilman@code-monkey.de>
8620	belong	0x1ee7ff00	EET archive
863!:mime	application/x-eet
864
865# rzip archives
8660	string	RZIP		rzip compressed data
867>4	byte	x		- version %d
868>5	byte	x		\b.%d
869>6	belong	x		(%d bytes)
870
871# From: "Robert Dale" <robdale@gmail.com>
8720	belong	123		dar archive,
873>4	belong	x		label "%.8x
874>>8	belong	x		%.8x
875>>>12	beshort	x		%.4x"
876>14	byte	0x54		end slice
877>14	beshort	0x4e4e		multi-part
878>14	beshort	0x4e53		multi-part, with -S
879
880# Symbian installation files
881#  http://www.thouky.co.uk/software/psifs/sis.html
882#  http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
8838	lelong	0x10000419	Symbian installation file
884!:mime	application/vnd.symbian.install
885>4	lelong	0x1000006D	(EPOC release 3/4/5)
886>4	lelong	0x10003A12	(EPOC release 6)
8870	lelong	0x10201A7A	Symbian installation file (Symbian OS 9.x)
888!:mime	x-epoc/x-sisx-app
889
890# From "Nelson A. de Oliveira" <naoliv@gmail.com>
8910	string	MPQ\032		MoPaQ (MPQ) archive
892
893# From: Dirk Jagdmann <doj@cubic.org>
894# xar archive format: http://code.google.com/p/xar/
8950	string	xar!		xar archive
896>6	beshort	x		- version %d
897
898# From: "Nelson A. de Oliveira" <naoliv@gmail.com>
899# .kgb
9000	string KGB_arch		KGB Archiver file
901>10	string x		with compression level %.1s
902
903# xar (eXtensible ARchiver) archive
904# From: "David Remahl" <dremahl@apple.com>
9050	string	xar!		xar archive
906#>4	beshort	x		header size %d
907>6	beshort	x		version %d,
908#>8	quad	x		compressed TOC: %d,
909#>16	quad	x		uncompressed TOC: %d,
910>24	belong	0		no checksum
911>24	belong	1		SHA-1 checksum
912>24	belong	2		MD5 checksum
913
914# Type: Parity Archive
915# From: Daniel van Eeden <daniel_e@dds.nl>
9160	string	PAR2		Parity Archive Volume Set
917
918# Bacula volume format. (Volumes always start with a block header.)
919# URL: http://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html
920# From: Adam Buchbinder <adam.buchbinder@gmail.com>
92112	string	BB02		Bacula volume
922>20	bedate	x		\b, started %s
923
924# ePub is XHTML + XML inside a ZIP archive.  The first member of the
925#   archive must be an uncompressed file called 'mimetype' with contents
926#   'application/epub+zip'
927
928
929# From: "Michael Gorny" <mgorny@gentoo.org>
930# ZPAQ: http://mattmahoney.net/dc/zpaq.html
9310	string	zPQ	ZPAQ stream
932>3	byte	x	\b, level %d
933# From: Barry Carter <carter.barry@gmail.com>
934# http://encode.ru/threads/456-zpaq-updates/page32
9350	string	7kSt	ZPAQ file
936
937# BBeB ebook, unencrypted (LRF format)
938# URL: http://www.sven.de/librie/Librie/LrfFormat
939# From: Adam Buchbinder <adam.buchbinder@gmail.com>
9400	string	L\0R\0F\0\0\0	BBeB ebook data, unencrypted
941>8	beshort	x		\b, version %d
942>36	byte	1		\b, front-to-back
943>36	byte	16		\b, back-to-front
944>42	beshort	x		\b, (%dx,
945>44	beshort	x		%d)
946
947# Symantec GHOST image by Joerg Jenderek at May 2014
948# http://us.norton.com/ghost/
949# http://www.garykessler.net/library/file_sigs.html
9500		ubelong&0xFFFFf7f0	0xFEEF0100	Norton GHost image
951# *.GHO
952>2		ubyte&0x08		0x00		\b, first file
953# *.GHS or *.[0-9] with cns program option
954>2		ubyte&0x08		0x08		\b, split file
955# part of split index interesting for *.ghs
956>>4		ubyte			x		id=0x%x
957# compression tag minus one equals numeric compression command line switch z[1-9]
958>3		ubyte			0		\b, no compression
959>3		ubyte			2		\b, fast compression (Z1)
960>3		ubyte			3		\b, medium compression (Z2)
961>3		ubyte			>3
962>>3		ubyte			<11		\b, compression (Z%d-1)
963>2		ubyte&0x08		0x00
964# ~ 30 byte password field only for *.gho
965>>12		ubequad			!0		\b, password protected
966>>44		ubyte			!1
967# 1~Image All, sector-by-sector only for *.gho
968>>>10		ubyte			1		\b, sector copy
969# 1~Image Boot track only for *.gho
970>>>43		ubyte			1		\b, boot track
971# 1~Image Disc only for *.gho implies Image Boot track and sector copy
972>>44		ubyte			1		\b, disc sector copy
973# optional image description only *.gho
974>>0xff		string			>\0		"%-.254s"
975# look for DOS sector end sequence
976>0xE08	search/7776		\x55\xAA
977# XXX indirect not supported yet
978#>>&-512	indirect		x		\b; contains
979
980