1 /*
2  * zip2john processes input ZIP files into a format suitable for use with JtR.
3  *
4  * This software is Copyright (c) 2011, Dhiru Kholia <dhiru.kholia at gmail.com>,
5  * and it is hereby released to the general public under the following terms:
6  * Redistribution and use in source and binary forms, with or without modification,
7  * are permitted.
8  *
9  * Updated in Aug 2011 by JimF.  Added PKZIP 'old' encryption.  The signature on the
10  * pkzip will be $pkzip$ and does not look like the AES version written by Dhiru
11  * Also fixed some porting issues, such as variables needing declared at top of blocks.
12  *
13  * References:
14  *
15  * 1. http://www.winzip.com/aes_info.htm
16  * 2. http://www.winzip.com/aes_tips.htm
17  * 4. ftp://ftp.info-zip.org/pub/infozip/doc/appnote-iz-latest.zip
18  * 5. Nathan Moinvaziri's work in extending minizip to support AES.
19  * 6. http://oldhome.schmorp.de/marc/fcrackzip.html (coding hints)
20  * 7. http://www.pkware.com/documents/casestudies/APPNOTE.TXT
21  * 8. http://gladman.plushost.co.uk/oldsite/cryptography_technology/fileencrypt/index.php
22  *   (borrowed files have "gladman_" prepended to them). This gladman code has been removed from JtR source tree.
23  *
24  * Usage:
25  *
26  * 1. Run zip2john on zip file(s) as "zip2john [zip files]".
27  *    Output is written to standard output.
28  * 2. Run JtR on the output generated by zip2john as "john [output file]".
29  *
30  * Output Line Format:
31  *
32  * For type = 0, for ZIP files encrypted using AES
33  * filename:$zip$*type*hex(CRC)*encryption_strength*hex(salt)*hex(password_verfication_value):hex(authentication_code)
34  *
35  * For original pkzip encryption:  (JimF, with longer explaination of fields)
36  * filename:$pkzip$C*B*[DT*MT{CL*UL*CR*OF*OX}*CT*DL*CS*DA]*$/pkzip$   (deprecated)
37  * filename:$pkzip2$C*B*[DT*MT{CL*UL*CR*OF*OX}*CT*DL*CS*TC*DA]*$/pkzip2$   (new format, with 2 checksums)
38  * All numeric and 'binary data' fields are stored in hex.
39  *
40  * C   is the count of hashes present (the array of items, inside the []  C can be 1 to 3.).
41  * B   is number of valid bytes in the checksum (1 or 2).  Unix zip is 2 bytes, all others are 1 (NOTE, some can be 0)
42  * ARRAY of data starts here
43  *   DT  is a "Data Type enum".  This will be 1 2 or 3.  1 is 'partial'. 2 and 3 are full file data (2 is inline, 3 is load from file).
44  *   MT  Magic Type enum.  0 is no 'type'.  255 is 'text'. Other types (like MS Doc, GIF, etc), see source.
45  *     NOTE, CL, DL, CRC, OFF are only present if DT != 1
46  *     CL  Compressed length of file blob data (includes 12 byte IV).
47  *     UL  Uncompressed length of the file.
48  *     CR  CRC32 of the 'final' file.
49  *     OF  Offset to the PK\x3\x4 record for this file data. If DT == 2, then this will be a 0, as it is not needed, all of the data is already included in the line.
50  *     OX  Additional offset (past OF), to get to the zip data within the file.
51  *     END OF 'optional' fields.
52  *   CT  Compression type  (0 or 8)  0 is stored, 8 is imploded.
53  *   DL  Length of the DA data.
54  *   CS  2 bytes of checksum data.
55  *   TC  2 bytes of checksun data (fron timestamp)
56  *   DA  This is the 'data'.  It will be hex data if DT == 1 or 2. If DT == 3, then it is a filename (name of the .zip file).
57  * END of array item.  There will be C (count) array items.
58  * The format string will end with $/pkzip$
59  *
60  * The AES-zip format redone by JimF, Summer 2014.  Spent some time to understand the AES authentication code,
61  * and now have placed code to do this. However, this required format change.  The old AES format was:
62  *
63  *    For type = 0, for ZIP files encrypted using AES
64  *    filename:$zip$*type*hex(CRC)*encryption_strength*hex(salt)*hex(password_verfication_value):hex(authentication_code)
65  *     NOTE, the authentication code was NOT part of this, even though documented in this file. nor is hex(CRC) a part.
66  *
67  * The new format is:  (and the $zip$ is deprecated)
68  *
69  *    filename:$zip2$*Ty*Mo*Ma*Sa*Va*Le*DF*Au*$/zip2$
70  *    Ty = type (0) and ignored.
71  *    Mo = mode (1 2 3 for 128/192/256 bit)
72  *    Ma = magic (file magic).  This is reservered for now.  See pkzip_fmt_plug.c or zip2john.c for information.
73  *         For now, this must be a '0'
74  *    Sa = salt(hex).   8, 12 or 16 bytes of salt (depends on mode)
75  *    Va = Verification bytes(hex) (2 byte quick checker)
76  *    Le = real compr len (hex) length of compressed/encrypted data (field DF)
77  *    DF = compressed data DF can be Le*2 hex bytes, and if so, then it is the ENTIRE file blob written 'inline'.
78  *         However, if the data blob is too long, then a .zip ZIPDATA_FILE_PTR_RECORD structure will be the 'contents' of DF
79  *    Au = Authentication code (hex) a 10 byte hex value that is the hmac-sha1 of data over DF. This is the binary() value
80  *
81  *  ZIPDATA_FILE_PTR_RECORD  (this can be the 'DF' of this above hash line).
82  *      *ZFILE*Fn*Oh*Ob*  (Note, the leading and trailing * are the * that 'wrap' the DF object.
83  *  ZFILE This is the literal string ZFILE
84  *  Fn    This is the name of the .zip file.  NOTE the user will need to keep the .zip file in proper locations (same as
85  *        was seen when running zip2john. If the file is removed, this hash line will no longer be valid.
86  *  Oh    Offset to the zip central header record for this blob.
87  *  Ob    Offset to the start of the blob data
88  *
89  *
90  * The new format for PKWARE's Strong Encryption Specification is:
91  *
92  *    filename:$zip3$*Ty*Al*Bi*Ma*Sa*Erd*Le*DF*Au*Fn
93  *    Ty = type (0) and ignored.
94  *    Al = algorithm (1 for AES)
95  *    Bi = bit length (128/192/256 bit)
96  *    Ma = magic (file magic), reserved, must be '0' now
97  *    Sa = salt(hex), 12 or 16 bytes of IV data
98  *    Erd = encrypted random data (max. 256 bytes)
99  *    Le = real compr len (hex) length of compressed/encrypted data (field DF), unused currently
100  *    DF = compressed data DF can be Le*2 hex bytes, and if so, then it is the ENTIRE file blob written 'inline', unused currently
101  *    Au = authentication code, a 8 byte hex value that contains a CRC32 checksum, unused currently
102  *    Fn = filename within zip file
103  */
104 
105 #include <stdint.h>
106 #include <stdio.h>
107 #include <stdlib.h>
108 #include "arch.h"
109 #if !AC_BUILT || HAVE_LIMITS_H
110 #include <limits.h>
111 #endif
112 #include <errno.h>
113 #include <string.h>
114 #include <assert.h>
115 #include <ctype.h>
116 #if (!AC_BUILT || HAVE_UNISTD_H) && !_MSC_VER
117 #include <unistd.h>
118 #endif
119 
120 #include "common.h"
121 #include "jumbo.h"
122 #include "formats.h"
123 #include "memory.h"
124 #include "pkzip.h"
125 #ifdef _MSC_VER
126 #include "missing_getopt.h"
127 #endif
128 #include "johnswap.h"
129 
130 static int checksum_only = 0, use_magic = 0;
131 static int force_2_byte_checksum = 0;
132 static char *ascii_fname, *only_fname;
133 
134 static char *MagicTypes[] = { "", "DOC", "XLS", "DOT", "XLT", "EXE", "DLL", "ZIP", "BMP", "DIB", "GIF", "PDF", "GZ", "TGZ", "BZ2", "TZ2", "FLV", "SWF", "MP3", NULL };
135 static int  MagicToEnum[] = {  0,   1,     1,     1,     1,     2,     2,     3,     4,     4,     5,     6,     7,    7,     8,     8,     9,     10,    11,  0};
136 
print_hex_inline(unsigned char * str,int len)137 static void print_hex_inline(unsigned char *str, int len)
138 {
139 	int i;
140 	for (i = 0; i < len; ++i)
141 		printf("%02x", str[i]);
142 }
143 
144 static void process_old_zip(const char *fname);
process_file(const char * fname)145 static void process_file(const char *fname)
146 {
147 	unsigned char filename[1024];
148 	FILE *fp;
149 	uint64_t i;
150 	char *cur = NULL, *cp;
151 	uint64_t best_len = 0xffffffff;
152 
153 
154 	if (!(fp = fopen(fname, "rb"))) {
155 		fprintf(stderr, "! %s : %s\n", fname, strerror(errno));
156 		return;
157 	}
158 
159 	while (!feof(fp)) {
160 		uint32_t id = fget32LE(fp);
161 		uint32_t store = 0;
162 
163 		if (id == 0x04034b50UL) {	/* local header */
164 			uint16_t version = fget16LE(fp);
165 			uint16_t flags = fget16LE(fp);
166 			uint16_t compression_method = fget16LE(fp);
167 			uint16_t lastmod_time = fget16LE(fp);
168 			uint16_t lastmod_date = fget16LE(fp);
169 			uint32_t crc = fget32LE(fp);
170 			uint64_t compressed_size = fget32LE(fp);
171 			uint64_t uncompressed_size = fget32LE(fp);
172 			uint16_t filename_length = fget16LE(fp);
173 			uint16_t extrafield_length = fget16LE(fp);
174 			/* unused variables */
175 			(void) version;
176 			(void) lastmod_time;
177 			(void) lastmod_date;
178 			(void) crc;
179 			(void) uncompressed_size;
180 
181 			if (filename_length > 250) {
182 				fprintf(stderr, "! %s: Invalid zip file, filename length too long!\n", fname);
183 				return;
184 			}
185 			if (fread(filename, 1, filename_length, fp) != filename_length) {
186 				fprintf(stderr, "Error, in fread of file data!\n");
187 				goto cleanup;
188 			}
189 			filename[filename_length] = 0;
190 
191 			if (compression_method == 99) {	/* AES encryption */
192 #define AES_EXTRA_DATA_LENGTH 11  // http://www.winzip.com/aes_info.htm#authentication-code
193 				uint64_t real_cmpr_len;
194 				uint16_t efh_id;
195 				uint16_t efh_datasize;
196 				uint16_t efh_vendor_version = 0;
197 				uint16_t efh_vendor_id = 0;
198 				char efh_aes_strength = 0;
199 				uint16_t actual_compression_method = 0;
200 				unsigned char salt[16], d;
201 				char *bname;
202 				int found = 0;
203 				int magic_enum = 0;  // reserved at 0 for now, we are not computing this (yet).
204 
205 				// There could be multiple extra fields, so need to process them all.
206 				while (!ferror(fp)  && extrafield_length > 0) {
207 					efh_id = fget16LE(fp);
208 					efh_datasize = fget16LE(fp);
209 
210 					// Adjust the bytes processed for id, size and acutal data so the
211 					// file pointer is moved on correctly,
212 					// - 2 bytes for the efh_id
213 					// - 2 bytes for the efh_datasize
214 					extrafield_length = extrafield_length - 2 - 2 - efh_datasize;
215 					if (efh_id != 0x9901) {
216 #if DEBUG
217 						fprintf(stderr, "[DEBUG] Skipping over efh_id (%x) with size %d.\n", efh_id, efh_datasize);
218 #endif
219 						fseek(fp, efh_datasize, SEEK_CUR);
220 					} else {
221 						found = 1;
222 						// Data size: this value is currently 7, but because it is possible that this
223 						// specification will be modified in the future to store additional data in
224 						// this extra field, vendors should not assume that it will always remain 7.
225 						if (efh_datasize != 7) {
226 							fprintf(stderr, "AES_EXTRA_DATA_LENGTH is not 11 for %s, please report this to us!\n", fname);
227 							goto cleanup;
228 						}
229 						efh_vendor_version = fget16LE(fp);
230 						efh_vendor_id = fget16LE(fp);
231 						efh_aes_strength = fgetc(fp);
232 						actual_compression_method = fget16LE(fp);
233 					}
234 				}
235 				if (!found)
236 					goto cleanup;
237 
238 				bname = jtr_basename(fname);
239 				cp = cur;
240 				if (best_len < compressed_size) {
241 #if DEBUG
242 					fprintf(stderr, "This buffer not used, it is not 'best' size\n");
243 #endif
244 				} else {
245 					store = 1;
246 					best_len = compressed_size;
247 					MEM_FREE(cur);
248 					cur = mem_alloc(compressed_size * 2 + 400);
249 					cp = cur;
250 				}
251 
252 #if DEBUG
253 				fprintf(stderr,
254 				    "%s/%s is using AES encryption, extrafield_length is %d\n",
255 				    bname, filename, extrafield_length);
256 #endif
257 				/* unused variables */
258 				(void) efh_id;
259 				(void) efh_datasize;
260 				(void) efh_vendor_version;
261 				(void) efh_vendor_id;
262 				(void) actual_compression_method; /* we need this!! */
263 
264 				if (store)
265 					cp += sprintf(cp, "%s/%s:$zip2$*0*%x*%x*",
266 					              bname, filename, efh_aes_strength,
267 					              magic_enum);
268 				if (sizeof(salt) < 4 + 4 * efh_aes_strength ||
269 					fread(salt, 1, 4+4*efh_aes_strength, fp) != 4+4*efh_aes_strength) {
270 						fprintf(stderr, "Error, in fread of file data!\n");
271 						goto cleanup;
272 				}
273 
274 				for (i = 0; i < 4+4*efh_aes_strength; i++) {
275 					if (store)
276 						cp += sprintf(cp, "%c%c",
277 						              itoa16[ARCH_INDEX(salt[i] >> 4)],
278 						              itoa16[ARCH_INDEX(salt[i] & 0x0f)]);
279 				}
280 				if (store)
281 					cp += sprintf(cp, "*");
282 				// since in the format we read/compare this one, we do it char by
283 				// char, so there is no endianity swapping needed. (validator)
284 				for (i = 0; i < 2; i++) {
285 					d = fgetc(fp);
286 					if (store)
287 						cp += sprintf(cp, "%c%c",
288 						              itoa16[ARCH_INDEX(d >> 4)],
289 						              itoa16[ARCH_INDEX(d & 0x0f)]);
290 				}
291 				// Password verification value -> 2 bytes, Salt value -> (4 + 4 * efh_aes_strength)
292 				real_cmpr_len = compressed_size - 2 - (4 + 4 * efh_aes_strength) - AES_EXTRA_DATA_LENGTH;
293 				// not quite sure why the real_cmpr_len is 'off by 1' ????
294 				++real_cmpr_len;
295 				if (store)
296 					cp += sprintf(cp, "*%"PRIx64"*", real_cmpr_len);
297 
298 				for (i = 0; i < real_cmpr_len; i++) {
299 					d = fgetc(fp);
300 					if (store)
301 						cp += sprintf(cp, "%c%c",
302 						              itoa16[ARCH_INDEX(d >> 4)],
303 						              itoa16[ARCH_INDEX(d & 0x0f)]);
304 				}
305 				if (store)
306 					cp += sprintf(cp, "*");
307 				for (i = 0; i < 10; i++) {
308 					d = fgetc(fp);
309 					if (store)
310 						cp += sprintf(cp, "%c%c",
311 						              itoa16[ARCH_INDEX(d >> 4)],
312 						              itoa16[ARCH_INDEX(d & 0x0f)]);
313 				}
314 				for (d = ' ' + 1; d < '~'; ++d) {
315 					if (!strchr(fname, d) && d != ':' && !isxdigit(d))
316 						break;
317 				}
318 				if (store)
319 					cp += sprintf(cp, "*$/zip2$:%s:%s:%s\n",
320 					              filename, bname, fname);
321 				if (cur) {
322 					printf("%s", cur);
323 					MEM_FREE(cur);  // dirty hack to avoid printing of last hash twice
324 				}
325 			} else if (flags & 1 && (version == 51 || version == 52 || version >= 61)) {	/* Strong Encryption?, APPNOTE-6.3.4.TXT, bit 6 check doesn't really work */
326 				// fseek(fp, filename_length, SEEK_CUR);
327 				// fseek(fp, extrafield_length, SEEK_CUR);
328 				// continue;
329 				unsigned char iv[16];
330 				unsigned char Erd[256];
331 				uint32_t Size;
332 				uint32_t Format;
333 				uint16_t AlgId;
334 				uint16_t Bitlen;
335 				uint16_t Flags;
336 				uint16_t ErdSize;
337 				uint32_t Reserved1;
338 				uint16_t VSize;
339 				uint16_t IVSize;
340 				char *bname;
341 				long previous_position;
342 
343 				// unused
344 				(void) Flags;
345 				(void) Bitlen;
346 				(void) Reserved1;
347 				(void) Size;
348 
349 				bname = jtr_basename(fname);
350 				previous_position = ftell(fp);
351 				IVSize = fget16LE(fp);
352 				if (IVSize > sizeof(iv))
353 					goto bail;
354 				if (fread(iv, 1, IVSize, fp) != IVSize)
355 					goto bail;
356 				Size = fget32LE(fp);
357 				Format = fget16LE(fp);
358 				if (Format != 3) {
359 					goto bail;
360 				}
361 				AlgId = fget16LE(fp);
362 				if (AlgId == 0x660E || AlgId == 0x660F || AlgId ==  0x6610)
363 					AlgId = 1;
364 				else if (AlgId == 0x6603 || AlgId == 0x6609 || AlgId == 0x6720 || AlgId == 0x6721 || AlgId == 0x6801) {
365 					fprintf(stderr, "AlgId (%x) is currently unsupported, please report this to us!\n", AlgId);
366 					goto bail;
367 				} else
368 					goto bail;
369 				if (IVSize == 0) {
370 					memset(iv, 0, 16);
371 #if !ARCH_LITTLE_ENDIAN
372 					crc = JOHNSWAP(crc);
373 					uncompressed_size = JOHNSWAP64(uncompressed_size);
374 #endif
375 					memcpy(iv, &crc, 4);
376 					memcpy(iv + 4, &uncompressed_size, 8);
377 					IVSize = 12;
378 				}
379 				Bitlen = fget16LE(fp);
380 				Flags = fget16LE(fp);
381 				ErdSize = fget16LE(fp);
382 				if (ErdSize > sizeof(Erd))
383 					goto bail;
384 				if (fread(Erd, 1, ErdSize, fp) != ErdSize)
385 					goto bail;
386 				Reserved1 = fget32LE(fp);
387 				if (Reserved1 != 0) {
388 					fprintf(stderr, "Reserved1 is %u (non-zero), please report this bug to us!\n", Reserved1);
389 					goto bail;
390 				}
391 				VSize = fget16LE(fp);
392 				fseek(fp, VSize, SEEK_CUR);
393 
394 				printf("%s:$zip3$*%d*%d*%d*%d*", bname, 0, AlgId, Bitlen, 0);
395 				print_hex_inline(iv, IVSize);  // getting this right isn't important ;)
396 				printf("*");
397 				print_hex_inline(Erd, ErdSize);
398 				printf("*0*0*0*%s\n", filename);
399 				continue;
400 bail:
401 				fseek(fp, previous_position, SEEK_SET);
402 				fseek(fp, filename_length, SEEK_CUR);
403 				fseek(fp, extrafield_length, SEEK_CUR);
404 				fseek(fp, compressed_size, SEEK_CUR);
405 			}
406 			if (flags & 1) {	/* old encryption */
407 				fclose(fp);
408 				fp = 0;
409 				process_old_zip(fname);
410 				return;
411 			} else {
412 				fprintf(stderr, "%s/%s is not encrypted!\n",
413 				        jtr_basename(fname), filename);
414 				fseek(fp, extrafield_length, SEEK_CUR);
415 				fseek(fp, compressed_size, SEEK_CUR);
416 			}
417 		} else if (id == 0x08074b50UL) {	/* data descriptor */
418 			fseek(fp, 12, SEEK_CUR);
419 		} else if (id == 0x02014b50UL) {	/* central directory structures */
420 			/* uint16_t version_maker = fget16LE(fp);
421 			uint16_t version_needed = fget16LE(fp);
422 			uint16_t filename_length;
423 			uint16_t extrafield_length;
424 			uint16_t comment_length;
425 			(void) fget16LE(fp);
426 			(void) fget16LE(fp);
427 			(void) fget16LE(fp);
428 			(void) fget16LE(fp);
429 			(void) fget32LE(fp);
430 			(void) fget32LE(fp);
431 			(void) fget32LE(fp);
432 			filename_length = fget16LE(fp);
433 			extrafield_length = fget16LE(fp);
434 			comment_length = fget16LE(fp);
435 			(void) fget16LE(fp);
436 			(void) fget16LE(fp);
437 			(void) fget32LE(fp);
438 			(void) fget32LE(fp);
439 			(void) version_maker;
440 			(void) version_needed;
441 
442 			if (fread(filename, 1, filename_length, fp) != filename_length) {
443 				fprintf(stderr, "Error, in fread of file data!\n");
444 				goto cleanup;
445 			}
446 			filename[filename_length] = 0;
447 			fseek(fp, extrafield_length, SEEK_CUR);
448 			fseek(fp, comment_length, SEEK_CUR); */
449 			goto cleanup;
450 		} else if (id == 0x06054b50UL) { /* end of central dir  */
451 			goto cleanup;
452 		}
453 	}
454 
455 cleanup:
456 	if (cur)
457 		printf("%s", cur);
458 	MEM_FREE(cur);
459 	fclose(fp);
460 }
461 
462 /* instead of using anything from the process_file, we simply detected a encrypted old style
463  * password, close the file, and call this function.  This function handles the older pkzip
464  * password, while the process_file handles ONLY the AES from WinZip
465  */
466 typedef struct _zip_ptr
467 {
468 	char         *hash_data;
469 	char         *file_name;
470 	uint32_t      crc;
471 	uint64_t      offset, offex;
472 	uint64_t      cmp_len, decomp_len;
473 	uint16_t      magic_type, cmptype;
474 	char          chksum[5];
475 	char          chksum2[5];
476 } zip_ptr;
477 
478 typedef struct _zip_file
479 {
480 	int unix_made;
481 	int check_in_crc;
482 	int check_bytes;
483 } zip_file;
484 
magic_type(const char * filename)485 static int magic_type(const char *filename) {
486 	char *Buf = str_alloc_copy((char*)filename), *cp;
487 	int i;
488 
489 	if (!use_magic)
490 		return 0;
491 
492 	strupr(Buf);
493 	if (ascii_fname && !strcasecmp(Buf, ascii_fname))
494 		return 255;
495 
496 	cp = strrchr(Buf, '.');
497 	if (!cp)
498 		return 0;
499 	++cp;
500 	for (i = 1; MagicTypes[i]; ++i)
501 		if (!strcmp(cp, MagicTypes[i]))
502 			return MagicToEnum[i];
503 	return 0;
504 }
505 
print_hex(unsigned char * p,uint64_t len)506 static void print_hex(unsigned char *p, uint64_t len) {
507 	while (len--)
508 		printf("%02x", *p++);
509 	printf("*");
510 }
511 
512 // If archive was created from a non-seekable stream, we need to find CRC and
513 // sizes AFTER file data which means we're in a hen-and-egg situation since we
514 // don't know the size... I think the below is enough but there may be edge
515 // cases where we need to also recognize some other kind of start-of-whatever
516 // and seek back 16 bytes.
scan_for_eod(FILE ** fp,zip_ptr * p,int size64)517 static void scan_for_eod (FILE **fp, zip_ptr *p, int size64)
518 {
519 	long saved_pos = ftell(*fp);
520 
521 	fprintf(stderr, "Scanning for EOD... ");
522 	while (!feof(*fp)) {
523 		if (fgetc(*fp) == 0x50) {
524 			if (fgetc(*fp) == 0x4b) {
525 				if (fgetc(*fp) == 0x07) {
526 					if (fgetc(*fp) == 0x08) {
527 						fprintf(stderr, "FOUND Extended local header\n");
528 						p->crc = fget32LE(*fp);
529 						if (size64) {
530 							p->cmp_len = fget64LE(*fp);
531 							p->decomp_len = fget64LE(*fp);
532 						} else {
533 							p->cmp_len = fget32LE(*fp);
534 							p->decomp_len = fget32LE(*fp);
535 						}
536 						break;
537 					}
538 				}
539 				else if (fgetc(*fp) == 0x03) {
540 					if (fgetc(*fp) == 0x04) {
541 						fprintf(stderr, "FOUND next Local file header\n");
542 						if (size64)
543 							fseek(*fp, -24, SEEK_CUR);
544 						else
545 							fseek(*fp, -16, SEEK_CUR);
546 						p->crc = fget32LE(*fp);
547 						if (size64) {
548 							p->cmp_len = fget64LE(*fp);
549 							p->decomp_len = fget64LE(*fp);
550 						} else {
551 							p->cmp_len = fget32LE(*fp);
552 							p->decomp_len = fget32LE(*fp);
553 						}
554 						break;
555 					}
556 				}
557 				else if (fgetc(*fp) == 0x01) {
558 					if (fgetc(*fp) == 0x02) {
559 						fprintf(stderr, "FOUND Central directory\n");
560 						if (size64)
561 							fseek(*fp, -24, SEEK_CUR);
562 						else
563 							fseek(*fp, -16, SEEK_CUR);
564 						p->crc = fget32LE(*fp);
565 						if (size64) {
566 							p->cmp_len = fget64LE(*fp);
567 							p->decomp_len = fget64LE(*fp);
568 						} else {
569 							p->cmp_len = fget32LE(*fp);
570 							p->decomp_len = fget32LE(*fp);
571 						}
572 						break;
573 					}
574 				}
575 			}
576 		}
577 	}
578 
579 	fseek(*fp, saved_pos, SEEK_SET);
580 }
581 
LoadZipBlob(FILE * fp,zip_ptr * p,zip_file * zfp,const char * zip_fname)582 static int LoadZipBlob(FILE *fp, zip_ptr *p, zip_file *zfp, const char *zip_fname)
583 {
584 	uint16_t version,flags,lastmod_time,lastmod_date,filename_length,extrafield_length;
585 	unsigned char filename[1024];
586 	int size64 = 0;
587 
588 	memset(p, 0, sizeof(*p));
589 
590 	p->offset = ftell(fp)-4;
591 	version = fget16LE(fp);
592 	flags = fget16LE(fp);
593 	p->cmptype = fget16LE(fp);
594 	lastmod_time = fget16LE(fp);
595 	lastmod_date = fget16LE(fp);
596 	p->crc = fget32LE(fp);
597 	p->cmp_len = fget32LE(fp);
598 	p->decomp_len = fget32LE(fp);
599 	filename_length = fget16LE(fp);
600 	extrafield_length = fget16LE(fp);
601 	p->hash_data = NULL;
602 	p->file_name = NULL;
603 	/* unused variables */
604 	(void) lastmod_date;
605 
606 	if (sizeof(filename) < filename_length ||
607 		fread(filename, 1, filename_length, fp) != filename_length) {
608 		fprintf(stderr, "Error, fread could not read the data from the file: %s\n", zip_fname);
609 		return 0;
610 	}
611 	filename[filename_length] = 0;
612 	p->magic_type = magic_type((char*)filename);
613 
614 	p->offex = 30 + filename_length + extrafield_length;
615 
616 	if (!only_fname || !strcmp(only_fname, (char*)filename))
617 		fprintf(stderr, "ver %d.%d ", version / 10, version % 10);
618 
619 	// we only handle implode or store.
620 	// 0x314 (788) was seen at 2012 CMIYC ?? I have to look into that one.
621 	if ( (flags & 1) &&
622 	     (version == 10 || version == 20 || version == 45 || version == 788)) {
623 		uint16_t extra_len_used = 0;
624 
625 		if (flags & (1 << 3)) {
626 			while (extra_len_used < extrafield_length) {
627 				uint16_t efh_id = fget16LE(fp);
628 				uint16_t efh_datasize = fget16LE(fp);
629 
630 				if (!only_fname || !strcmp(only_fname, (char*)filename))
631 					fprintf(stderr, "efh %04x ", efh_id);
632 
633 				if (efh_id == 0x0001) {
634 					size64 = 1;
635 					p->decomp_len = fget64LE(fp);
636 					p->cmp_len = fget64LE(fp);
637 					extra_len_used += 16;
638 					efh_datasize -= 16;
639 				}
640 				fseek(fp, efh_datasize, SEEK_CUR);
641 
642 				extra_len_used += 4 + efh_datasize;
643 				if (efh_id == 0x07c8 ||  // Info-ZIP Macintosh (old, J. Lee)
644 					efh_id == 0x334d ||  // Info-ZIP Macintosh (new, D. Haase's 'Mac3' field)
645 					efh_id == 0x4d49 ||  // Info-ZIP OpenVMS (obsolete)
646 					efh_id == 0x5855 ||  // Info-ZIP UNIX (original; also OS/2, NT, etc.)
647 					efh_id == 0x6375 ||  // Info-ZIP UTF-8 comment field
648 					efh_id == 0x7075 ||  // Info-ZIP UTF-8 name field
649 					efh_id == 0x7855 ||  // Info-ZIP UNIX (16-bit UID/GID info)
650 					efh_id == 0x7875)    // Info-ZIP UNIX 3rd generation (generic UID/GID, ...)
651 
652 					// 7zip ALSO is 2 byte checksum, but I have no way to find them.  NOTE, it is 2 bytes of CRC, not timestamp like InfoZip.
653 					// OLD winzip (I think 8.01 or before), is also supposed to be 2 byte.
654 					// old v1 pkzip (the DOS builds) are 2 byte checksums.
655 				{
656 					zfp->unix_made = 1;
657 					zfp->check_bytes = 2;
658 					zfp->check_in_crc = 0;
659 				}
660 			}
661 		}
662 		else if (extrafield_length)
663 			fseek(fp, extrafield_length, SEEK_CUR);
664 
665 		if (p->cmp_len == 0 && p->decomp_len == 0)
666 			scan_for_eod(&fp, p, size64);
667 
668 		if (only_fname && strcmp(only_fname, (char*)filename)) {
669 			fseek(fp, p->cmp_len, SEEK_CUR);
670 			return 0;
671 		}
672 
673 		if (force_2_byte_checksum)
674 			zfp->check_bytes = 2;
675 
676 		fprintf(stderr,
677 		        "%s/%s PKZIP%s Encr:%s%s cmplen=%"PRIu64", decmplen=%"PRIu64", crc=%X\n",
678 		        jtr_basename(zip_fname), filename,
679 		        size64 ? "64" : "",
680 		        zfp->check_bytes == 2 ? " 2b chk," : "",
681 		        zfp->check_in_crc ? "" : " TS_chk,",
682 		        p->cmp_len, p->decomp_len, p->crc);
683 
684 		MEM_FREE(p->hash_data);
685 		MEM_FREE(p->file_name);
686 		p->hash_data = mem_alloc(p->cmp_len + 1);
687 		p->file_name = mem_alloc(strlen((char*)filename) + 1);
688 		strcpy(p->file_name, (char*)filename);
689 		if (fread(p->hash_data, 1, p->cmp_len, fp) != p->cmp_len) {
690 			fprintf(stderr, "Error, fread could not read the data from the file: %s\n", zip_fname);
691 			return 0;
692 		}
693 
694 		// Ok, now set checksum bytes.  This will depend upon if from crc, or from timestamp
695 		sprintf(p->chksum, "%02x%02x", (p->crc>>24)&0xFF, (p->crc>>16)&0xFF);
696 		sprintf(p->chksum2, "%02x%02x", lastmod_time>>8, lastmod_time&0xFF);
697 
698 		return 1;
699 	}
700 
701 	if (p->cmp_len == 0 && p->decomp_len == 0 && flags & (1 << 3))
702 		scan_for_eod(&fp, p, version >= 45);
703 
704 	fprintf(stderr, "%s/%s is not encrypted, or stored with non-handled compression type\n", zip_fname, filename);
705 	fseek(fp, extrafield_length, SEEK_CUR);
706 	fseek(fp, p->cmp_len, SEEK_CUR);
707 
708 	return 0;
709 }
710 
process_old_zip(const char * fname)711 static void process_old_zip(const char *fname)
712 {
713 	FILE *fp;
714 	int count_of_hashes = 0;
715 	zip_ptr hashes[3], curzip;
716 	zip_file zfp;
717 
718 	memset(hashes, 0, sizeof(hashes));
719 
720 	zfp.check_in_crc = 1;
721 	zfp.check_bytes = 1;
722 	zfp.unix_made = 0;
723 
724 	if (!(fp = fopen(fname, "rb"))) {
725 		fprintf(stderr, "! %s : %s\n", fname, strerror(errno));
726 		return;
727 	}
728 
729 	while (!feof(fp)) {
730 		uint32_t id = fget32LE(fp);
731 
732 		if (id == 0x04034b50UL) {	/* local header */
733 			if (LoadZipBlob(fp, &curzip, &zfp, fname) && curzip.decomp_len > 3) {
734 				if (!count_of_hashes)
735 					memcpy(&(hashes[count_of_hashes++]), &curzip, sizeof(curzip));
736 				else {
737 					if (count_of_hashes == 1) {
738 						if (curzip.cmp_len < hashes[0].cmp_len) {
739 							memcpy(&(hashes[count_of_hashes++]), &(hashes[0]), sizeof(curzip));
740 							memcpy(&(hashes[0]), &curzip, sizeof(curzip));
741 						} else
742 							memcpy(&(hashes[count_of_hashes++]), &curzip, sizeof(curzip));
743 					}
744 					else if (count_of_hashes == 2) {
745 						if (curzip.cmp_len < hashes[0].cmp_len) {
746 							memcpy(&(hashes[count_of_hashes++]), &(hashes[1]), sizeof(curzip));
747 							memcpy(&(hashes[1]), &(hashes[0]), sizeof(curzip));
748 							memcpy(&(hashes[0]), &curzip, sizeof(curzip));
749 						} else if (curzip.cmp_len < hashes[1].cmp_len) {
750 							memcpy(&(hashes[count_of_hashes++]), &(hashes[1]), sizeof(curzip));
751 							memcpy(&(hashes[1]), &curzip, sizeof(curzip));
752 						} else
753 							memcpy(&(hashes[count_of_hashes++]), &curzip, sizeof(curzip));
754 					}
755 					else {
756 						int done = 0;
757 						if (curzip.magic_type && curzip.cmp_len > hashes[0].cmp_len) {
758 							// if we have a magic type, we will replace any NON magic type, for the 2nd and 3rd largest, without caring about
759 							// the size.
760 							if (hashes[1].magic_type == 0) {
761 								if (hashes[2].cmp_len < curzip.cmp_len) {
762 									MEM_FREE(hashes[1].hash_data);
763 									memcpy(&(hashes[1]), &(hashes[2]), sizeof(curzip));
764 									memcpy(&(hashes[2]), &curzip, sizeof(curzip));
765 									done = 1;
766 								} else {
767 									MEM_FREE(hashes[1].hash_data);
768 									memcpy(&(hashes[1]), &curzip, sizeof(curzip));
769 									done = 1;
770 								}
771 							} else if (hashes[2].magic_type == 0) {
772 								if (hashes[1].cmp_len < curzip.cmp_len) {
773 									MEM_FREE(hashes[2].hash_data);
774 									memcpy(&(hashes[2]), &curzip, sizeof(curzip));
775 									done = 1;
776 								} else {
777 									MEM_FREE(hashes[2].hash_data);
778 									memcpy(&(hashes[2]), &(hashes[1]), sizeof(curzip));
779 									memcpy(&(hashes[1]), &curzip, sizeof(curzip));
780 									done = 1;
781 								}
782 							}
783 						}
784 						if (!done && curzip.cmp_len < hashes[0].cmp_len) {
785 							// we 'only' replace the smallest zip, and always keep as many any other magic as possible.
786 							if (hashes[0].magic_type == 0) {
787 								MEM_FREE(hashes[0].hash_data);
788 								memcpy(&(hashes[0]), &curzip, sizeof(curzip));
789 							} else {
790 								// Ok, the 1st is a magic, we WILL keep it.
791 								if (hashes[1].magic_type) {  // Ok, we found our 2
792 									MEM_FREE(hashes[2].hash_data);
793 									memcpy(&(hashes[2]), &(hashes[1]), sizeof(curzip));
794 									memcpy(&(hashes[1]), &(hashes[0]), sizeof(curzip));
795 									memcpy(&(hashes[0]), &curzip, sizeof(curzip));
796 								} else if (hashes[2].magic_type) {  // Ok, we found our 2
797 									MEM_FREE(hashes[1].hash_data);
798 									memcpy(&(hashes[1]), &(hashes[0]), sizeof(curzip));
799 									memcpy(&(hashes[0]), &curzip, sizeof(curzip));
800 								} else {
801 									// found none.  So we will simply roll them down (like when #1 was a magic also).
802 									MEM_FREE(hashes[2].hash_data);
803 									memcpy(&(hashes[2]), &(hashes[1]), sizeof(curzip));
804 									memcpy(&(hashes[1]), &(hashes[0]), sizeof(curzip));
805 									memcpy(&(hashes[0]), &curzip, sizeof(curzip));
806 								}
807 							}
808 						}
809 					}
810 				}
811 			}
812 		} else if (id == 0x08074b50UL) {	/* data descriptor */
813 			fseek(fp, 12, SEEK_CUR);
814 		} else if (id == 0x02014b50UL || id == 0x06054b50UL) {	/* central directory structures */
815 			goto print_and_cleanup;
816 		}
817 	}
818 
819 print_and_cleanup:;
820 	if (count_of_hashes) {
821 		int i = 1;
822 		char *bname;
823 		static int once;
824 		char *filenames = strdup(hashes[0].file_name);
825 
826 		bname = jtr_basename(fname);
827 
828 		printf("%s%s%s:$pkzip2$%x*%x*", bname,
829 		       count_of_hashes == 1 ? "/" : "",
830 		       count_of_hashes == 1 ? hashes[0].file_name : "",
831 		       count_of_hashes, zfp.check_bytes);
832 		if (checksum_only)
833 			i = 0;
834 		for (; i < count_of_hashes; ++i) {
835 			uint64_t len = 12+24;
836 
837 			if (i) {
838 				filenames = mem_realloc(filenames,
839 				                        strlen(filenames) +
840 				                        strlen(hashes[i].file_name) + 3);
841 				strcat(filenames, ", ");
842 				strcat(filenames, hashes[i].file_name);
843 			}
844 			if (hashes[i].magic_type)
845 				len = 12+180;
846 			if (len > hashes[i].cmp_len)
847 				len = hashes[i].cmp_len; // even though we 'could' output a '2', we do not.  We only need one full inflate CRC check file.
848 			printf("1*%x*%x*%"PRIx64"*%s*%s*", hashes[i].magic_type, hashes[i].cmptype, (uint64_t)len, hashes[i].chksum, hashes[i].chksum2);
849 			print_hex((unsigned char*)hashes[i].hash_data, len);
850 		}
851 		// Ok, now output the 'little' one (the first).
852 		if (!checksum_only) {
853 			printf("%x*%x*%"PRIx64"*%"PRIx64"*%x*%"PRIx64"*%"PRIx64"*%x*", 2, hashes[0].magic_type, hashes[0].cmp_len, hashes[0].decomp_len, hashes[0].crc, hashes[0].offset, hashes[0].offex, hashes[0].cmptype);
854 			printf("%"PRIx64"*%s*%s*", hashes[0].cmp_len, hashes[0].chksum, hashes[0].chksum2);
855 			print_hex((unsigned char*)hashes[0].hash_data, hashes[0].cmp_len);
856 		}
857 		if (count_of_hashes > 1)
858 			printf("$/pkzip2$::%s:%s:%s\n", bname, filenames, fname);
859 		else
860 			printf("$/pkzip2$:%s:%s::%s\n", filenames, bname, fname);
861 
862 		if (count_of_hashes > 1 && !once++)
863 			fprintf(stderr,
864 "NOTE: It is assumed that all files in each archive have the same password.\n"
865 "If that is not the case, the hash may be uncrackable. To avoid this, use\n"
866 "option -o to pick a file at a time.\n");
867 
868 		for (i = 0; i < count_of_hashes; ++i) {
869 			MEM_FREE(hashes[i].hash_data);
870 			MEM_FREE(hashes[i].file_name);
871 		}
872 		MEM_FREE(filenames);
873 	}
874 	fclose(fp);
875 }
876 
usage(char * name)877 static int usage(char *name)
878 {
879 	fprintf(stderr, "Usage: %s [options] [zip file(s)]\n", name);
880 	fprintf(stderr, "Options for 'old' PKZIP encrypted files only:\n");
881 	fprintf(stderr, " -a <filename>   This is a 'known' ASCII file. This can be faster, IF all\n");
882 	fprintf(stderr, "    files are larger, and you KNOW that at least one of them starts out as\n");
883 	fprintf(stderr, "    'pure' ASCII data.\n");
884 	fprintf(stderr, " -o <filename>   Only use this file from the .zip file.\n");
885 	fprintf(stderr, " -c This will create a 'checksum only' hash.  If there are many encrypted\n");
886 	fprintf(stderr, "    files in the .zip file, then this may be an option, and there will be\n");
887 	fprintf(stderr, "    enough data that false possitives will not be seen.  If the .zip is 2\n");
888 	fprintf(stderr, "    byte checksums, and there are 3 or more of them, then we have 48 bits\n");
889 	fprintf(stderr, "    knowledge, which 'may' be enough to crack the password, without having\n");
890 	fprintf(stderr, "    to force the user to have the .zip file present.\n");
891 	fprintf(stderr, " -m Use \"file magic\" as known-plain if applicable. This can be faster but\n");
892 	fprintf(stderr, "    not 100%% safe in all situations.\n");
893 	fprintf(stderr, " -2 Force 2 byte checksum computation.\n");
894 	fprintf(stderr, "\nNOTE: By default it is assumed that all files in each archive have the same\n");
895 	fprintf(stderr, "password. If that's not the case, the produced hash may be uncrackable.\n");
896 	fprintf(stderr, "To avoid this, use -o option to pick a file at a time.\n");
897 
898 	return EXIT_FAILURE;
899 }
900 
zip2john(int argc,char ** argv)901 int zip2john(int argc, char **argv)
902 {
903 	int c;
904 
905 	/* Parse command line */
906 	while ((c = getopt(argc, argv, "a:o:cm2")) != -1) {
907 		switch (c) {
908 		case 'a':
909 			ascii_fname = optarg;
910 			fprintf(stderr, "Using file %s as an 'ASCII' quick check file\n", ascii_fname);
911 			break;
912 		case 'o':
913 			only_fname = optarg;
914 			fprintf(stderr, "Using file %s as only file to check\n", only_fname);
915 			break;
916 		case 'c':
917 			checksum_only = 1;
918 			fprintf(stderr, "Outputing hashes that are 'checksum ONLY' hashes\n");
919 			break;
920 		case 'm':
921 			use_magic = 1;
922 			fprintf(stderr, "Using file 'magic' signatures if applicable (not 100%% safe)\n");
923 			break;
924 		case '2':
925 			force_2_byte_checksum = 1;
926 			fprintf(stderr, "Forcing a 2 byte checksum detection\n");
927 			break;
928 		case '?':
929 		default:
930 			return usage(argv[0]);
931 		}
932 	}
933 	argc -= optind;
934 	if (argc == 0)
935 		return usage(argv[0]);
936 	argv += optind;
937 
938 	while(argc--)
939 		process_file(*argv++);
940 
941 	cleanup_tiny_memory();
942 
943 	return EXIT_SUCCESS;
944 }
945