1package magic
2
3import (
4	"bytes"
5	"debug/macho"
6	"encoding/binary"
7)
8
9var (
10	// Lnk matches Microsoft lnk binary format.
11	Lnk = prefix([]byte{0x4C, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00})
12	// Wasm matches a web assembly File Format file.
13	Wasm = prefix([]byte{0x00, 0x61, 0x73, 0x6D})
14	// Exe matches a Windows/DOS executable file.
15	Exe = prefix([]byte{0x4D, 0x5A})
16	// Elf matches an Executable and Linkable Format file.
17	Elf = prefix([]byte{0x7F, 0x45, 0x4C, 0x46})
18	// Nes matches a Nintendo Entertainment system ROM file.
19	Nes = prefix([]byte{0x4E, 0x45, 0x53, 0x1A})
20	// TzIf matches a Time Zone Information Format (TZif) file.
21	TzIf = prefix([]byte("TZif"))
22)
23
24// Java bytecode and Mach-O binaries share the same magic number.
25// More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe
26func classOrMachOFat(in []byte) bool {
27	// There should be at least 8 bytes for both of them because the only way to
28	// quickly distinguish them is by comparing byte at position 7
29	if len(in) < 8 {
30		return false
31	}
32
33	return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE})
34}
35
36// Class matches a java class file.
37func Class(raw []byte, limit uint32) bool {
38	return classOrMachOFat(raw) && raw[7] > 30
39}
40
41// MachO matches Mach-O binaries format.
42func MachO(raw []byte, limit uint32) bool {
43	if classOrMachOFat(raw) && raw[7] < 20 {
44		return true
45	}
46
47	if len(raw) < 4 {
48		return false
49	}
50
51	be := binary.BigEndian.Uint32(raw)
52	le := binary.LittleEndian.Uint32(raw)
53
54	return be == macho.Magic32 ||
55		le == macho.Magic32 ||
56		be == macho.Magic64 ||
57		le == macho.Magic64
58}
59
60// Swf matches an Adobe Flash swf file.
61func Swf(raw []byte, limit uint32) bool {
62	return bytes.HasPrefix(raw, []byte("CWS")) ||
63		bytes.HasPrefix(raw, []byte("FWS")) ||
64		bytes.HasPrefix(raw, []byte("ZWS"))
65}
66
67// Dbf matches a dBase file.
68// https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
69func Dbf(raw []byte, limit uint32) bool {
70	if len(raw) < 4 {
71		return false
72	}
73
74	// 3rd and 4th bytes contain the last update month and day of month
75	if !(0 < raw[2] && raw[2] < 13 && 0 < raw[3] && raw[3] < 32) {
76		return false
77	}
78
79	// dbf type is dictated by the first byte
80	dbfTypes := []byte{
81		0x02, 0x03, 0x04, 0x05, 0x30, 0x31, 0x32, 0x42, 0x62, 0x7B, 0x82,
82		0x83, 0x87, 0x8A, 0x8B, 0x8E, 0xB3, 0xCB, 0xE5, 0xF5, 0xF4, 0xFB,
83	}
84	for _, b := range dbfTypes {
85		if raw[0] == b {
86			return true
87		}
88	}
89
90	return false
91}
92
93// ElfObj matches an object file.
94func ElfObj(raw []byte, limit uint32) bool {
95	return len(raw) > 17 && ((raw[16] == 0x01 && raw[17] == 0x00) ||
96		(raw[16] == 0x00 && raw[17] == 0x01))
97}
98
99// ElfExe matches an executable file.
100func ElfExe(raw []byte, limit uint32) bool {
101	return len(raw) > 17 && ((raw[16] == 0x02 && raw[17] == 0x00) ||
102		(raw[16] == 0x00 && raw[17] == 0x02))
103}
104
105// ElfLib matches a shared library file.
106func ElfLib(raw []byte, limit uint32) bool {
107	return len(raw) > 17 && ((raw[16] == 0x03 && raw[17] == 0x00) ||
108		(raw[16] == 0x00 && raw[17] == 0x03))
109}
110
111// ElfDump matches a core dump file.
112func ElfDump(raw []byte, limit uint32) bool {
113	return len(raw) > 17 && ((raw[16] == 0x04 && raw[17] == 0x00) ||
114		(raw[16] == 0x00 && raw[17] == 0x04))
115}
116
117// Dcm matches a DICOM medical format file.
118func Dcm(raw []byte, limit uint32) bool {
119	return len(raw) > 131 &&
120		bytes.Equal(raw[128:132], []byte{0x44, 0x49, 0x43, 0x4D})
121}
122
123// Marc matches a MARC21 (MAchine-Readable Cataloging) file.
124func Marc(raw []byte, limit uint32) bool {
125	// File is at least 24 bytes ("leader" field size).
126	if len(raw) < 24 {
127		return false
128	}
129
130	// Fixed bytes at offset 20.
131	if !bytes.Equal(raw[20:24], []byte("4500")) {
132		return false
133	}
134
135	// First 5 bytes are ASCII digits.
136	for i := 0; i < 5; i++ {
137		if raw[i] < '0' || raw[i] > '9' {
138			return false
139		}
140	}
141
142	// Field terminator is present.
143	return bytes.Contains(raw, []byte{0x1E})
144}
145