1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1989-2012 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *               Glenn Fowler <glenn.s.fowler@gmail.com>                *
18 *                                                                      *
19 ***********************************************************************/
20 #pragma prototyped
21 /*
22  * Glenn Fowler
23  * AT&T Research
24  *
25  * file -- determine file type
26  *
27  * the sum of the hacks {s5,v10,planix} is _____ than the parts
28  */
29 
30 static const char usage[] =
31 "[-?\n@(#)$Id: file (AT&T Research) 2011-08-01 $\n]"
32 USAGE_LICENSE
33 "[+NAME?file - determine file type]"
34 "[+DESCRIPTION?\bfile\b tests and attempts to classify each \afile\a argument."
35 "	Non-regular files are classified by their \bstat\b(2) types. Empty and"
36 "	non-readable regular files are classified as such. Otherwise a data"
37 "	block is read from \afile\a and this is used to match against the"
38 "	\amagic\a file(s) (see \bMAGIC FILE\b below). Files with less than 1024"
39 "	bytes of data are labelled \bsmall\b to note that the sample may"
40 "	be too small for an accurate classification. Failing a content match,"
41 "	the file name extension may be used to classify. As a last resort"
42 "	statistical sampling is done for a small range of languages and"
43 "	applications. Failed matches usually result in the less informative"
44 "	\bascii text\b or \bbinary data\b.]"
45 
46 "[a:all?List all magic table matches.]"
47 "[b:brief|no-filename?Suppress the output line file name prefix.]"
48 "[c:mime?List the \bmime\b(1) classification for each \afile\a. Although the"
49 "	default descriptions are fairly consistent, use \b--mime\b for"
50 "	precise classification matching.]"
51 "[d:default-magic?Equivalent to \b--magic=-\b.]"
52 "[f:files|file-list?\afile\a contains list of file names, one per line, that"
53 "	are classified.]:[file]"
54 "[i:ignore-magic?Equivalent to \b--magic=/dev/null\b.]"
55 "[l:list?The loaded \amagic\a files are listed and then \bfile\b exits.]"
56 "[M:magic?\afile\a is loaded as a \amagic\a file. More than one \b--magic\b"
57 "	option may be specified; the precedence is from left to right. The"
58 "	first \b--magic\b option causes the default system \amagic\a file to"
59 "	be ignored; the file \b-\b may then be specified to explicitly"
60 "	load the default system \amagic\a file. To ignore all magic files"
61 "	specify the file \b/dev/null\b and no others.]:[file]"
62 "[m:append-magic?\afile\a is loaded as a \amagic\a file. Equivalent to the"
63 "	\b--magic\b option, except that the default system \amagic\a file is"
64 "	still loaded last. If \b--magic\b is also specified then the default"
65 "	system \amagic\a is only loaded if explicity specified.]:[file]"
66 "[p:pattern|match?Only files with descriptions matching the \bsh\b(1)"
67 "	match \apattern\a are listed. \bfile\b exits with status 0 if any"
68 "	files match, 0 otherwise.]:[pattern]"
69 "[q:quiet|silent?Do not list matching \b--pattern\b files.]"
70 "[L:logical|dereference?Follow symbolic links.]"
71 "[P|h:physical?Don't follow symbolic links.]"
72 "[w:warn?Enable magic file parse warning messages.]"
73 
74 "\n"
75 "\n[ file ... ]\n"
76 "\n"
77 
78 "[+MAGIC FILE?A \amagic\a file specifies file content and name match"
79 "	expressions, descriptions, and \bmime\b(1) classifications. Each line"
80 "	in the file consists of five \btab\b separated fields:]{"
81 "	[+[op]]offset?\aoffset\a determines tha data location for the content"
82 "		test. \b(@\b\aexpression\a\b)\b specifies an indirect offset,"
83 "		i.e., the offset is the numeric contents of the data"
84 "		location at \aexpression\a. The default indirect numeric size"
85 "		is 4 bytes; a \bB\b suffix denotes 1 byte, \bH\b denotes 2"
86 "		bytes, and \bQ\b denotes 8 bytes. \aoffset\a may also be one"
87 "		of { \batime\b \bblocks\b \bctime\b \bfstype\b \bgid\b"
88 "		\bmode\b \bmtime\b \bname\b \bnlink\b \bsize\b \buid\b } to"
89 "		access \bstat\b(2) information for the current file. The"
90 "		optional \aop\a specifies relationships with surrounding"
91 "		\amagic\a lines:]{"
92 "		[++?previous fields in block match, current optional]"
93 "		[+&?previous and current fields in block match]"
94 "		[+|?previous fields in block do not match, subsequent skipped]"
95 "		[+{?start nesting block]"
96 "		[+}?end nesting block]"
97 "		[+c{?function declaration and call (1 char names)]"
98 "		[+}?function return]"
99 "		[+c()?function call]"
100 "	}"
101 "	[+type?The content data type:]{"
102 "		[+byte?1 byte integer]"
103 "		[+short?2 byte integer]"
104 "		[+long?4 byte integer]"
105 "		[+quad?8 byte integer]"
106 "		[+date?4 byte time_t]"
107 "		[+version?4 byte unsigned integer of the form \aYYYYMMDD\a"
108 "			for \aYYYY-MM-DD\a, 0x\aYYZZ\a for \aYY.ZZ\a, or"
109 "			0x\aWWXXYYZZ\a for \aWW.XX.YY.ZZ\a]"
110 "		[+edit?substitute operator for string data:"
111 "			%\aold\a%\anew\a%[glu]], where \b%\b is any delimiter]"
112 "		[+match?case insensitive \bsh\b(1) match pattern operator"
113 "			for string data]"
114 "	}"
115 "	[+[mask]]operator?\amask\a is an optional \b&\b\anumber\a that is"
116 "		masked (bit \band\b) with the content data before"
117 "		comparison. \aoperator\a is one of { \b< <= > >= != ==\b }."
118 "		Numeric values may be decimal, octal or hex.]"
119 "	[+description?The description text. Care was taken to maintain"
120 "		consistency between all descriptions, i.e., character case,"
121 "		grammatical parts placement, and punctuation, making"
122 "		description pattern matches feasible. \adescription\a may"
123 "		contain one \bprintf\b(3) format specification for the"
124 "		current data value at \aoffset\a.]"
125 "	[+mime?The \bmime\b(1) type/subtype. This provides a standard"
126 "		and consistent matching key space.]"
127 "}"
128 
129 "[+FILES]{"
130 "	[+lib/file/magic?Default magic file on \b$PATH\b.]"
131 "}"
132 
133 "[+SEE ALSO?\bfind\b(1), \bls\b(1), \bmime\b(1), \btw\b(1)]"
134 ;
135 
136 #include <ast.h>
137 #include <magic.h>
138 #include <ctype.h>
139 #include <error.h>
140 
141 #define MAGIC_BRIEF	(MAGIC_USER<<0)
142 #define MAGIC_LIST	(MAGIC_USER<<1)
143 #define MAGIC_LOAD	(MAGIC_USER<<2)
144 #define MAGIC_PHYSICAL	(MAGIC_USER<<3)
145 #define MAGIC_SILENT	(MAGIC_USER<<4)
146 
147 static int
type(Magic_t * mp,char * file,const char * pattern,register Magicdisc_t * disc)148 type(Magic_t* mp, char* file, const char* pattern, register Magicdisc_t* disc)
149 {
150 	char*		s;
151 	char*		e;
152 	Sfio_t*		fp;
153 	struct stat*	sp;
154 	struct stat	st;
155 
156 	sp = ((disc->flags & MAGIC_PHYSICAL) ? lstat(file, &st) : stat(file, &st)) ? (struct stat*)0 : &st;
157 	fp = (sp && S_ISREG(sp->st_mode)) ? sfopen(NiL, file, "r") : (Sfio_t*)0;
158 	s = magictype(mp, fp, file, sp);
159 	if (fp)
160 		sfclose(fp);
161 	e = pathcanon(file, 0, 0);
162 	if (!pattern)
163 	{
164 		if (!(disc->flags & MAGIC_BRIEF))
165 			sfprintf(sfstdout, "%s:\t%s", file, e - file > 6 ? "" : "\t");
166 		sfprintf(sfstdout, "%s\n", s);
167 		return 1;
168 	}
169 	else if (strmatch(s, pattern))
170 	{
171 		if (!(disc->flags & MAGIC_SILENT))
172 			sfprintf(sfstdout, "%s\n", file);
173 		return 1;
174 	}
175 	return 0;
176 }
177 
178 int
main(int argc,register char ** argv)179 main(int argc, register char** argv)
180 {
181 	register Magic_t*	mp;
182 	register char*		p;
183 	char*			pattern = 0;
184 	Sfio_t*			list = 0;
185 	int			hit;
186 	Magicdisc_t		disc;
187 
188 	NoP(argc);
189 	error_info.id = "file";
190 	disc.version = MAGIC_VERSION;
191 	disc.flags = 0;
192 	disc.errorf = errorf;
193 	if (!(mp = magicopen(&disc)))
194 		error(3, "out of space");
195 	for (;;)
196 	{
197 		switch (optget(argv, usage))
198 		{
199 		case 'a':
200 			disc.flags |= MAGIC_ALL;
201 			continue;
202 		case 'b':
203 			disc.flags |= MAGIC_BRIEF;
204 			continue;
205 		case 'c':
206 			disc.flags |= MAGIC_MIME;
207 			continue;
208 		case 'd':
209 			if (magicload(mp, NiL, 0))
210 				error(3, "cannot load default magic file");
211 			disc.flags |= MAGIC_LOAD;
212 			continue;
213 		case 'f':
214 			if (streq(opt_info.arg, "-") || streq(opt_info.arg, "/dev/stdin") || streq(opt_info.arg, "/dev/fd/0"))
215 				list = sfstdin;
216 			else if (!(list = sfopen(NiL, opt_info.arg, "r")))
217 				error(3, "cannot open %s", opt_info.arg);
218 			continue;
219 		case 'i':
220 			disc.flags |= MAGIC_LOAD;
221 			continue;
222 		case 'l':
223 			disc.flags |= MAGIC_LIST|MAGIC_VERBOSE;
224 			continue;
225 		case 'L':
226 			disc.flags &= ~MAGIC_PHYSICAL;
227 			continue;
228 		case 'm':
229 			if (magicload(mp, opt_info.arg, 0))
230 				error(3, "%s: cannot load magic file", opt_info.arg);
231 			continue;
232 		case 'M':
233 			if (magicload(mp, opt_info.arg, 0))
234 				error(3, "%s: cannot load magic file", opt_info.arg);
235 			disc.flags |= MAGIC_LOAD;
236 			continue;
237 		case 'p':
238 			pattern = opt_info.arg;
239 			continue;
240 		case 'P':
241 		case 'h':
242 			disc.flags |= MAGIC_PHYSICAL;
243 			continue;
244 		case 'q':
245 			disc.flags |= MAGIC_SILENT;
246 			continue;
247 		case 'w':
248 			disc.flags |= MAGIC_VERBOSE;
249 			continue;
250 		case '?':
251 			error(ERROR_USAGE|4, "%s", opt_info.arg);
252 			continue;
253 		case ':':
254 			error(2, "%s", opt_info.arg);
255 			continue;
256 		}
257 		break;
258 	}
259 	if (error_info.errors)
260 		error(ERROR_USAGE|4, "%s", optusage(NiL));
261 	argv += opt_info.index;
262 	if (!(disc.flags & MAGIC_LOAD) && magicload(mp, NiL, 0))
263 		error(3, "$%s,%s: cannot load default magic file", MAGIC_FILE_ENV, MAGIC_FILE);
264 	if (disc.flags & MAGIC_LIST)
265 	{
266 		magiclist(mp, sfstdout);
267 		hit = 1;
268 	}
269 	else
270 	{
271 		hit = 0;
272 		if (!list && !*argv)
273 			list = sfstdin;
274 		if (list)
275 			while (p = sfgetr(list, '\n', 1))
276 				if (*p)
277 					hit |= type(mp, p, pattern, &disc);
278 		while (p = *argv++)
279 			if (*p)
280 				hit |= type(mp, p, pattern, &disc);
281 	}
282 	return !hit;
283 }
284