xref: /minix/external/bsd/file/dist/src/readcdf.c (revision 0a6a1f1d)
1 /*	$NetBSD: readcdf.c,v 1.12 2015/01/02 21:15:32 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 Christos Zoulas
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 #include "file.h"
29 
30 #ifndef lint
31 #if 0
32 FILE_RCSID("@(#)$File: readcdf.c,v 1.49 2014/12/04 15:56:46 christos Exp $")
33 #else
34 __RCSID("$NetBSD: readcdf.c,v 1.12 2015/01/02 21:15:32 christos Exp $");
35 #endif
36 #endif
37 
38 #include <assert.h>
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <string.h>
42 #include <time.h>
43 #include <ctype.h>
44 
45 #include "cdf.h"
46 #include "magic.h"
47 
48 #ifndef __arraycount
49 #define __arraycount(a) (sizeof(a) / sizeof(a[0]))
50 #endif
51 
52 #define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
53 
54 static const struct nv {
55 	const char *pattern;
56 	const char *mime;
57 } app2mime[] =  {
58 	{ "Word",			"msword",		},
59 	{ "Excel",			"vnd.ms-excel",		},
60 	{ "Powerpoint",			"vnd.ms-powerpoint",	},
61 	{ "Crystal Reports",		"x-rpt",		},
62 	{ "Advanced Installer",		"vnd.ms-msi",		},
63 	{ "InstallShield",		"vnd.ms-msi",		},
64 	{ "Microsoft Patch Compiler",	"vnd.ms-msi",		},
65 	{ "NAnt",			"vnd.ms-msi",		},
66 	{ "Windows Installer",		"vnd.ms-msi",		},
67 	{ NULL,				NULL,			},
68 }, name2mime[] = {
69 	{ "WordDocument",		"msword",		},
70 	{ "PowerPoint",			"vnd.ms-powerpoint",	},
71 	{ "DigitalSignature",		"vnd.ms-msi",		},
72 	{ NULL,				NULL,			},
73 }, name2desc[] = {
74 	{ "WordDocument",		"Microsoft Office Word",},
75 	{ "PowerPoint",			"Microsoft PowerPoint",	},
76 	{ "DigitalSignature",		"Microsoft Installer",	},
77 	{ NULL,				NULL,			},
78 };
79 
80 static const struct cv {
81 	uint64_t clsid[2];
82 	const char *mime;
83 } clsid2mime[] = {
84 	{
85 		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
86 		"x-msi",
87 	},
88 	{	{ 0,			 0			},
89 		NULL,
90 	},
91 }, clsid2desc[] = {
92 	{
93 		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
94 		"MSI Installer",
95 	},
96 	{	{ 0,			 0			},
97 		NULL,
98 	},
99 };
100 
101 private const char *
cdf_clsid_to_mime(const uint64_t clsid[2],const struct cv * cv)102 cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
103 {
104 	size_t i;
105 	for (i = 0; cv[i].mime != NULL; i++) {
106 		if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
107 			return cv[i].mime;
108 	}
109 	return NULL;
110 }
111 
112 private const char *
cdf_app_to_mime(const char * vbuf,const struct nv * nv)113 cdf_app_to_mime(const char *vbuf, const struct nv *nv)
114 {
115 	size_t i;
116 	const char *rv = NULL;
117 #ifdef USE_C_LOCALE
118 	locale_t old_lc_ctype, c_lc_ctype;
119 
120 	c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
121 	assert(c_lc_ctype != NULL);
122 	old_lc_ctype = uselocale(c_lc_ctype);
123 	assert(old_lc_ctype != NULL);
124 #endif
125 	for (i = 0; nv[i].pattern != NULL; i++)
126 		if (strcasestr(vbuf, nv[i].pattern) != NULL) {
127 			rv = nv[i].mime;
128 			break;
129 		}
130 #ifdef USE_C_LOCALE
131 	(void)uselocale(old_lc_ctype);
132 	freelocale(c_lc_ctype);
133 #endif
134 	return rv;
135 }
136 
137 private int
cdf_file_property_info(struct magic_set * ms,const cdf_property_info_t * info,size_t count,const cdf_directory_t * root_storage)138 cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
139     size_t count, const cdf_directory_t *root_storage)
140 {
141         size_t i;
142         cdf_timestamp_t tp;
143         struct timespec ts;
144         char buf[64];
145         const char *str = NULL;
146         const char *s;
147         int len;
148 
149         if (!NOTMIME(ms) && root_storage)
150 		str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
151 		    clsid2mime);
152 
153         for (i = 0; i < count; i++) {
154                 cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
155                 switch (info[i].pi_type) {
156                 case CDF_NULL:
157                         break;
158                 case CDF_SIGNED16:
159                         if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
160                             info[i].pi_s16) == -1)
161                                 return -1;
162                         break;
163                 case CDF_SIGNED32:
164                         if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
165                             info[i].pi_s32) == -1)
166                                 return -1;
167                         break;
168                 case CDF_UNSIGNED32:
169                         if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
170                             info[i].pi_u32) == -1)
171                                 return -1;
172                         break;
173                 case CDF_FLOAT:
174                         if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
175                             info[i].pi_f) == -1)
176                                 return -1;
177                         break;
178                 case CDF_DOUBLE:
179                         if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
180                             info[i].pi_d) == -1)
181                                 return -1;
182                         break;
183                 case CDF_LENGTH32_STRING:
184                 case CDF_LENGTH32_WSTRING:
185                         len = info[i].pi_str.s_len;
186                         if (len > 1) {
187                                 char vbuf[1024];
188                                 size_t j, k = 1;
189 
190                                 if (info[i].pi_type == CDF_LENGTH32_WSTRING)
191                                     k++;
192                                 s = info[i].pi_str.s_buf;
193                                 for (j = 0; j < sizeof(vbuf) && len--; s += k) {
194                                         if (*s == '\0')
195                                                 break;
196                                         if (isprint((unsigned char)*s))
197                                                 vbuf[j++] = *s;
198                                 }
199                                 if (j == sizeof(vbuf))
200                                         --j;
201                                 vbuf[j] = '\0';
202                                 if (NOTMIME(ms)) {
203                                         if (vbuf[0]) {
204                                                 if (file_printf(ms, ", %s: %s",
205                                                     buf, vbuf) == -1)
206                                                         return -1;
207                                         }
208                                 } else if (str == NULL && info[i].pi_id ==
209 				    CDF_PROPERTY_NAME_OF_APPLICATION) {
210 					str = cdf_app_to_mime(vbuf, app2mime);
211 				}
212 			}
213                         break;
214                 case CDF_FILETIME:
215                         tp = info[i].pi_tp;
216                         if (tp != 0) {
217 				char tbuf[64];
218                                 if (tp < 1000000000000000LL) {
219                                         cdf_print_elapsed_time(tbuf,
220                                             sizeof(tbuf), tp);
221                                         if (NOTMIME(ms) && file_printf(ms,
222                                             ", %s: %s", buf, tbuf) == -1)
223                                                 return -1;
224                                 } else {
225                                         char *c, *ec;
226                                         cdf_timestamp_to_timespec(&ts, tp);
227                                         c = cdf_ctime(&ts.tv_sec, tbuf);
228                                         if (c != NULL &&
229 					    (ec = strchr(c, '\n')) != NULL)
230 						*ec = '\0';
231 
232                                         if (NOTMIME(ms) && file_printf(ms,
233                                             ", %s: %s", buf, c) == -1)
234                                                 return -1;
235                                 }
236                         }
237                         break;
238                 case CDF_CLIPBOARD:
239                         break;
240                 default:
241                         return -1;
242                 }
243         }
244         if (!NOTMIME(ms)) {
245 		if (str == NULL)
246 			return 0;
247                 if (file_printf(ms, "application/%s", str) == -1)
248                         return -1;
249         }
250         return 1;
251 }
252 
253 private int
cdf_file_catalog(struct magic_set * ms,const cdf_header_t * h,const cdf_stream_t * sst)254 cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h,
255     const cdf_stream_t *sst)
256 {
257 	cdf_catalog_t *cat;
258 	size_t i;
259 	char buf[256];
260 	cdf_catalog_entry_t *ce;
261 
262         if (NOTMIME(ms)) {
263 		if (file_printf(ms, "Microsoft Thumbs.db [") == -1)
264 			return -1;
265 		if (cdf_unpack_catalog(h, sst, &cat) == -1)
266 			return -1;
267 		ce = cat->cat_e;
268 		/* skip first entry since it has a , or paren */
269 		for (i = 1; i < cat->cat_num; i++)
270 			if (file_printf(ms, "%s%s",
271 			    cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name),
272 			    i == cat->cat_num - 1 ? "]" : ", ") == -1) {
273 				free(cat);
274 				return -1;
275 			}
276 		free(cat);
277 	} else {
278 		if (file_printf(ms, "application/CDFV2") == -1)
279 			return -1;
280 	}
281 	return 1;
282 }
283 
284 private int
cdf_file_summary_info(struct magic_set * ms,const cdf_header_t * h,const cdf_stream_t * sst,const cdf_directory_t * root_storage)285 cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
286     const cdf_stream_t *sst, const cdf_directory_t *root_storage)
287 {
288         cdf_summary_info_header_t si;
289         cdf_property_info_t *info;
290         size_t count;
291         int m;
292 
293         if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
294                 return -1;
295 
296         if (NOTMIME(ms)) {
297 		const char *str;
298 
299                 if (file_printf(ms, "Composite Document File V2 Document")
300 		    == -1)
301                         return -1;
302 
303                 if (file_printf(ms, ", %s Endian",
304                     si.si_byte_order == 0xfffe ?  "Little" : "Big") == -1)
305                         return -2;
306                 switch (si.si_os) {
307                 case 2:
308                         if (file_printf(ms, ", Os: Windows, Version %d.%d",
309                             si.si_os_version & 0xff,
310                             (uint32_t)si.si_os_version >> 8) == -1)
311                                 return -2;
312                         break;
313                 case 1:
314                         if (file_printf(ms, ", Os: MacOS, Version %d.%d",
315                             (uint32_t)si.si_os_version >> 8,
316                             si.si_os_version & 0xff) == -1)
317                                 return -2;
318                         break;
319                 default:
320                         if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
321                             si.si_os_version & 0xff,
322                             (uint32_t)si.si_os_version >> 8) == -1)
323                                 return -2;
324                         break;
325                 }
326 		if (root_storage) {
327 			str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
328 			    clsid2desc);
329 			if (str) {
330 				if (file_printf(ms, ", %s", str) == -1)
331 					return -2;
332 			}
333 		}
334 	}
335 
336         m = cdf_file_property_info(ms, info, count, root_storage);
337         free(info);
338 
339         return m == -1 ? -2 : m;
340 }
341 
342 #ifdef notdef
343 private char *
format_clsid(char * buf,size_t len,const uint64_t uuid[2])344 format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
345 	snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
346 	    PRIx64 "-%.12" PRIx64,
347 	    (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL,
348 	    (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL,
349 	    (uuid[0] >>  0) & (uint64_t)0x0000000000000ffffULL,
350 	    (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL,
351 	    (uuid[1] >>  0) & (uint64_t)0x0000fffffffffffffULL);
352 	return buf;
353 }
354 #endif
355 
356 protected int
file_trycdf(struct magic_set * ms,int fd,const unsigned char * buf,size_t nbytes)357 file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
358     size_t nbytes)
359 {
360         cdf_info_t info;
361         cdf_header_t h;
362         cdf_sat_t sat, ssat;
363         cdf_stream_t sst, scn;
364         cdf_dir_t dir;
365         int i;
366         const char *expn = "";
367         const char *corrupt = "corrupt: ";
368         const cdf_directory_t *root_storage;
369 
370         info.i_fd = fd;
371         info.i_buf = buf;
372         info.i_len = nbytes;
373         if (ms->flags & MAGIC_APPLE)
374                 return 0;
375         if (cdf_read_header(&info, &h) == -1)
376                 return 0;
377 #ifdef CDF_DEBUG
378         cdf_dump_header(&h);
379 #endif
380 
381         if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
382                 expn = "Can't read SAT";
383                 goto out0;
384         }
385 #ifdef CDF_DEBUG
386         cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
387 #endif
388 
389         if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
390                 expn = "Can't read SSAT";
391                 goto out1;
392         }
393 #ifdef CDF_DEBUG
394         cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
395 #endif
396 
397         if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
398                 expn = "Can't read directory";
399                 goto out2;
400         }
401 
402         if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
403 	    &root_storage)) == -1) {
404                 expn = "Cannot read short stream";
405                 goto out3;
406         }
407 #ifdef CDF_DEBUG
408         cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
409 #endif
410 #ifdef notdef
411 	if (root_storage) {
412 		if (NOTMIME(ms)) {
413 			char clsbuf[128];
414 			if (file_printf(ms, "CLSID %s, ",
415 			    format_clsid(clsbuf, sizeof(clsbuf),
416 			    root_storage->d_storage_uuid)) == -1)
417 				return -1;
418 		}
419 	}
420 #endif
421 
422 	if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
423 	    "FileHeader", &scn)) != -1) {
424 #define HWP5_SIGNATURE "HWP Document File"
425 		if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1
426 		    && memcmp(scn.sst_tab, HWP5_SIGNATURE,
427 		    sizeof(HWP5_SIGNATURE) - 1) == 0) {
428 		    if (NOTMIME(ms)) {
429 			if (file_printf(ms,
430 			    "Hangul (Korean) Word Processor File 5.x") == -1)
431 			    return -1;
432 		    } else {
433 			if (file_printf(ms, "application/x-hwp") == -1)
434 			    return -1;
435 		    }
436 		    i = 1;
437 		    goto out5;
438 		} else {
439 		    free(scn.sst_tab);
440 		    scn.sst_tab = NULL;
441 		    scn.sst_len = 0;
442 		    scn.sst_dirlen = 0;
443 		}
444 	}
445 
446         if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
447             &scn)) == -1) {
448                 if (errno == ESRCH) {
449 			if ((i = cdf_read_catalog(&info, &h, &sat, &ssat, &sst,
450 			    &dir, &scn)) == -1) {
451 				corrupt = expn;
452 				if ((i = cdf_read_encrypted_package(&info, &h,
453 				    &sat, &ssat, &sst, &dir, &scn)) == -1)
454 					expn = "No summary info";
455 				else {
456 					expn = "Encrypted";
457 					i = -1;
458 				}
459 				goto out4;
460 			}
461 #ifdef CDF_DEBUG
462 			cdf_dump_catalog(&h, &scn);
463 #endif
464 			if ((i = cdf_file_catalog(ms, &h, &scn))
465 			    < 0)
466 				expn = "Can't expand catalog";
467                 } else {
468                         expn = "Cannot read summary info";
469                 }
470                 goto out4;
471         }
472 #ifdef CDF_DEBUG
473         cdf_dump_summary_info(&h, &scn);
474 #endif
475         if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0)
476             expn = "Can't expand summary_info";
477 
478 	if (i == 0) {
479 		const char *str = NULL;
480 		cdf_directory_t *d;
481 		char name[__arraycount(d->d_name)];
482 		size_t j, k;
483 
484 		for (j = 0; str == NULL && j < dir.dir_len; j++) {
485 			d = &dir.dir_tab[j];
486 			for (k = 0; k < sizeof(name); k++)
487 				name[k] = (char)cdf_tole2(d->d_name[k]);
488 			str = cdf_app_to_mime(name,
489 			    NOTMIME(ms) ? name2desc : name2mime);
490 		}
491 		if (NOTMIME(ms)) {
492 			if (str != NULL) {
493 				if (file_printf(ms, "%s", str) == -1)
494 					return -1;
495 				i = 1;
496 			}
497 		} else {
498 			if (str == NULL)
499 				str = "vnd.ms-office";
500 			if (file_printf(ms, "application/%s", str) == -1)
501 				return -1;
502 			i = 1;
503 		}
504 	}
505 out5:
506         free(scn.sst_tab);
507 out4:
508         free(sst.sst_tab);
509 out3:
510         free(dir.dir_tab);
511 out2:
512         free(ssat.sat_tab);
513 out1:
514         free(sat.sat_tab);
515 out0:
516 	if (i == -1) {
517 	    if (NOTMIME(ms)) {
518 		if (file_printf(ms,
519 		    "Composite Document File V2 Document") == -1)
520 		    return -1;
521 		if (*expn)
522 		    if (file_printf(ms, ", %s%s", corrupt, expn) == -1)
523 			return -1;
524 	    } else {
525 		if (file_printf(ms, "application/CDFV2-%s",
526 		    *corrupt ? "corrupt" : "encrypted") == -1)
527 		    return -1;
528 	    }
529 	    i = 1;
530 	}
531         return i;
532 }
533