1 /*
2 
3 Copyright (c) 2011, 2012, Simon Howard
4 
5 Permission to use, copy, modify, and/or distribute this software
6 for any purpose with or without fee is hereby granted, provided
7 that the above copyright notice and this permission notice appear
8 in all copies.
9 
10 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
14 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15 LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
16 NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
17 CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 
19  */
20 
21 #include <stdlib.h>
22 #include <string.h>
23 #include <ctype.h>
24 #include <time.h>
25 
26 #include "lha_endian.h"
27 #include "lha_file_header.h"
28 #include "ext_header.h"
29 #include "crc16.h"
30 
31 #define COMMON_HEADER_LEN 22 /* bytes */
32 
33 // Minimum length of a level 0 header (with zero-length filename).
34 #define LEVEL_0_MIN_HEADER_LEN 22 /* bytes */
35 
36 // Minimum length of a level 1 base header (with zero-length filename).
37 #define LEVEL_1_MIN_HEADER_LEN 25 /* bytes */
38 
39 // Length of a level 2 base header.
40 #define LEVEL_2_HEADER_LEN 26 /* bytes */
41 
42 // Length of a level 3 base header.
43 #define LEVEL_3_HEADER_LEN 32 /* bytes */
44 
45 // Maximum length of a level 3 header (including extended headers).
46 #define LEVEL_3_MAX_HEADER_LEN (1024 * 1024) /* 1 MB */
47 
48 // Length of a level 0 Unix extended area.
49 #define LEVEL_0_UNIX_EXTENDED_LEN 12 /* bytes */
50 
51 // Length of a level 0 OS-9 extended area.
52 #define LEVEL_0_OS9_EXTENDED_LEN 22 /* bytes */
53 
54 #define RAW_DATA(hdr_ptr, off)  ((*hdr_ptr)->raw_data[off])
55 #define RAW_DATA_LEN(hdr_ptr)   ((*hdr_ptr)->raw_data_len)
56 
lha_file_header_full_path(LHAFileHeader * header)57 char *lha_file_header_full_path(LHAFileHeader *header)
58 {
59 	char *path;
60 	char *filename;
61 	char *result;
62 
63 	if (header->path != NULL) {
64 		path = header->path;
65 	} else {
66 		path = "";
67 	}
68 
69 	if (header->filename != NULL) {
70 		filename = header->filename;
71 	} else {
72 		filename = "";
73 	}
74 
75 	result = malloc(strlen(path) + strlen(filename) + 1);
76 
77 	if (result == NULL) {
78 		return NULL;
79 	}
80 
81 	sprintf(result, "%s%s", path, filename);
82 
83 	return result;
84 }
85 
86 /**
87  * Given a file header with the filename set, split it into separate
88  * path and filename components, if necessary.
89  *
90  * @param header         Point to the file header structure.
91  * @return               Non-zero for success, or zero for failure.
92  */
93 
split_header_filename(LHAFileHeader * header)94 static int split_header_filename(LHAFileHeader *header)
95 {
96 	char *sep;
97 	char *new_filename;
98 
99 	// Is there a directory separator in the path?  If so, we need to
100 	// split into directory name and filename.
101 
102 	sep = strrchr(header->filename, '/');
103 
104 	if (sep != NULL) {
105 		new_filename = strdup(sep + 1);
106 
107 		if (new_filename == NULL) {
108 			return 0;
109 		}
110 
111 		*(sep + 1) = '\0';
112 		header->path = header->filename;
113 		header->filename = new_filename;
114 	}
115 
116 	return 1;
117 }
118 
119 // Perform checksum of header contents.
120 
check_l0_checksum(uint8_t * header,size_t header_len,size_t csum)121 static int check_l0_checksum(uint8_t *header, size_t header_len, size_t csum)
122 {
123 	unsigned int result;
124 	unsigned int i;
125 
126 	result = 0;
127 
128 	for (i = 0; i < header_len; ++i) {
129 		result += header[i];
130 	}
131 
132 	return (result & 0xff) == csum;
133 }
134 
135 // Perform full-header CRC check, based on CRC from "common" extended header.
136 
check_common_crc(LHAFileHeader * header)137 static int check_common_crc(LHAFileHeader *header)
138 {
139 	uint16_t crc;
140 
141 	crc = 0;
142 	lha_crc16_buf(&crc, header->raw_data, header->raw_data_len);
143 
144 	return crc == header->common_crc;
145 }
146 
147 // Decode MS-DOS timestamp.
148 
decode_ftime(uint8_t * buf)149 static unsigned int decode_ftime(uint8_t *buf)
150 {
151 	int raw;
152 	struct tm datetime;
153 
154 	raw = (int) lha_decode_uint32(buf);
155 
156 	if (raw == 0) {
157 		return 0;
158 	}
159 
160 	// Deconstruct the contents of the MS-DOS time value and populate the
161 	// 'datetime' structure. Note that 'mktime' generates a timestamp for
162 	// the local time zone: this is unfortunate, but probably the best
163 	// that can be done, due to the limited data stored in MS-DOS time
164 	// values.
165 
166 	memset(&datetime, 0, sizeof(struct tm));
167 
168 	datetime.tm_sec = (raw << 1) & 0x3e;
169 	datetime.tm_min = (raw >> 5) & 0x3f;
170 	datetime.tm_hour = (raw >> 11) & 0x1f;
171 	datetime.tm_mday = (raw >> 16) & 0x1f;
172 	datetime.tm_mon = ((raw >> 21) & 0xf) - 1;
173 	datetime.tm_year = 80 + ((raw >> 25) & 0x7f);
174 	datetime.tm_wday = 0;
175 	datetime.tm_yday = 0;
176 	datetime.tm_isdst = -1;
177 
178 	return (unsigned int) mktime(&datetime);
179 }
180 
181 // MS-DOS archives (and archives from similar systems) may have paths and
182 // filenames that are in all-caps. Detect these and convert them to
183 // lower-case.
184 
fix_msdos_allcaps(LHAFileHeader * header)185 static void fix_msdos_allcaps(LHAFileHeader *header)
186 {
187 	unsigned int i;
188 	int is_allcaps;
189 
190 	// Check both path and filename to see if there are any lower-case
191 	// characters.
192 
193 	is_allcaps = 1;
194 
195 	if (header->path != NULL) {
196 		for (i = 0; header->path[i] != '\0'; ++i) {
197 			if (islower((unsigned) header->path[i])) {
198 				is_allcaps = 0;
199 				break;
200 			}
201 		}
202 	}
203 
204 	if (is_allcaps && header->filename != NULL) {
205 		for (i = 0; header->filename[i] != '\0'; ++i) {
206 			if (islower((unsigned) header->filename[i])) {
207 				is_allcaps = 0;
208 				break;
209 			}
210 		}
211 	}
212 
213 	// If both are all-caps, convert them all to lower-case.
214 
215 	if (is_allcaps) {
216 		if (header->path != NULL) {
217 			for (i = 0; header->path[i] != '\0'; ++i) {
218 				header->path[i]
219 				    = tolower((unsigned) header->path[i]);
220 			}
221 		}
222 		if (header->filename != NULL) {
223 			for (i = 0; header->filename[i] != '\0'; ++i) {
224 				header->filename[i]
225 				    = tolower((unsigned) header->filename[i]);
226 			}
227 		}
228 	}
229 }
230 
231 // Process the OS-9 permissions field and translate into the equivalent
232 // Unix permissions.
233 
os9_to_unix_permissions(LHAFileHeader * header)234 static void os9_to_unix_permissions(LHAFileHeader *header)
235 {
236 	unsigned int or, ow, oe, pr, pw, pe, d;
237 
238 	// Translate into equivalent Unix permissions. OS-9 just has
239 	// owner and public, so double up public for the owner field.
240 
241 	or = (header->os9_perms & 0x01) != 0;
242 	ow = (header->os9_perms & 0x02) != 0;
243 	oe = (header->os9_perms & 0x04) != 0;
244 	pr = (header->os9_perms & 0x08) != 0;
245 	pw = (header->os9_perms & 0x10) != 0;
246 	pe = (header->os9_perms & 0x20) != 0;
247 	d = (header->os9_perms & 0x80) != 0;
248 
249 	header->extra_flags |= LHA_FILE_UNIX_PERMS;
250 	header->unix_perms = (d << 14)
251 	                   | (or << 8) | (ow << 7) | (oe << 6)  // owner
252 	                   | (pr << 5) | (pw << 4) | (pe << 3)  // group
253 	                   | (pr << 2) | (pw << 1) | (pe << 0); // everyone
254 }
255 
256 // Parse a Unix symbolic link. These are stored in the format:
257 // filename = symlink|target
258 
parse_symlink(LHAFileHeader * header)259 static int parse_symlink(LHAFileHeader *header)
260 {
261 	char *fullpath;
262 	char *p;
263 
264 	// Although the format is always the same, some files have
265 	// symlink headers where the path is split between the path
266 	// and filename headers. For example:
267 	//    path = etc|../../
268 	//    filename = etc
269 
270 	fullpath = lha_file_header_full_path(header);
271 
272 	if (fullpath == NULL) {
273 		return 0;
274 	}
275 
276 	p = strchr(fullpath, '|');
277 
278 	if (p == NULL) {
279 		free(fullpath);
280 		return 0;
281 	}
282 
283 	header->symlink_target = strdup(p + 1);
284 
285 	if (header->symlink_target == NULL) {
286 		free(fullpath);
287 		return 0;
288 	}
289 
290 	// Cut the string in half at the separator. Keep the left side
291 	// as the value for filename.
292 
293 	*p = '\0';
294 
295 	free(header->path);
296 	free(header->filename);
297 	header->path = NULL;
298 	header->filename = fullpath;
299 
300 	// Having joined path and filename together during processing,
301 	// we now have the opposite problem: header->filename might
302 	// contain a full path rather than just a filename. Split back
303 	// into two again.
304 
305 	return split_header_filename(header);
306 }
307 
308 // Decode the path field in the header.
309 
process_level0_path(LHAFileHeader * header,uint8_t * data,size_t data_len)310 static int process_level0_path(LHAFileHeader *header, uint8_t *data,
311                                size_t data_len)
312 {
313 	unsigned int i;
314 
315 	// Zero-length filename probably means that this is a directory
316 	// entry. Leave the filename field as NULL - this makes us
317 	// consistent with level 2/3 headers.
318 
319 	if (data_len == 0) {
320 		return 1;
321 	}
322 
323 	header->filename = malloc(data_len + 1);
324 
325 	if (header->filename == NULL) {
326 		return 0;
327 	}
328 
329 	memcpy(header->filename, data, data_len);
330 	header->filename[data_len] = '\0';
331 
332 	// Convert MS-DOS path separators to Unix path separators.
333 
334 	for (i = 0; i < data_len; ++i) {
335 		if (header->filename[i] == '\\') {
336 			header->filename[i] = '/';
337 		}
338 	}
339 
340 	return split_header_filename(header);
341 }
342 
343 // Read some more data from the input stream, extending the raw_data
344 // array (and the size of the header).
345 
extend_raw_data(LHAFileHeader ** header,LHAInputStream * stream,size_t nbytes)346 static uint8_t *extend_raw_data(LHAFileHeader **header,
347                                 LHAInputStream *stream,
348                                 size_t nbytes)
349 {
350 	LHAFileHeader *new_header;
351 	size_t new_raw_len;
352 	uint8_t *result;
353 
354 	if (nbytes > LEVEL_3_MAX_HEADER_LEN) {
355 		return NULL;
356 	}
357 
358 	// Reallocate the header and raw_data area to be larger.
359 
360 	new_raw_len = RAW_DATA_LEN(header) + nbytes;
361 	new_header = realloc(*header, sizeof(LHAFileHeader) + new_raw_len);
362 
363 	if (new_header == NULL) {
364 		return NULL;
365 	}
366 
367 	// Update the header pointer to point to the new area.
368 
369 	*header = new_header;
370 	new_header->raw_data = (uint8_t *) (new_header + 1);
371 	result = new_header->raw_data + new_header->raw_data_len;
372 
373 	// Read data from stream into new area.
374 
375 	if (!lha_input_stream_read(stream, result, nbytes)) {
376 		return NULL;
377 	}
378 
379 	new_header->raw_data_len = new_raw_len;
380 
381 	return result;
382 }
383 
384 // Starting at the specified offset in the raw_data array, walk
385 // through the list of extended headers and parse them.
386 
decode_extended_headers(LHAFileHeader ** header,unsigned int offset)387 static int decode_extended_headers(LHAFileHeader **header,
388                                    unsigned int offset)
389 {
390 	unsigned int field_size;
391 	uint8_t *ext_header;
392 	size_t ext_header_len;
393 	size_t available_length;
394 
395 	// Level 3 headers use 32-bit length fields; all others use
396 	// 16-bit fields.
397 
398 	if ((*header)->header_level == 3) {
399 		field_size = 4;
400 	} else {
401 		field_size = 2;
402 	}
403 
404 	available_length = RAW_DATA_LEN(header) - offset - field_size;
405 
406 	while (offset <= RAW_DATA_LEN(header) - field_size) {
407 		ext_header = &RAW_DATA(header, offset + field_size);
408 
409 		if (field_size == 4) {
410 			ext_header_len
411 			    = lha_decode_uint32(&RAW_DATA(header, offset));
412 		} else {
413 			ext_header_len
414 			    = lha_decode_uint16(&RAW_DATA(header, offset));
415 		}
416 
417 		// Header length zero indicates end of chain. Otherwise, sanity
418 		// check the header length is valid.
419 
420 		if (ext_header_len == 0) {
421 			break;
422 		} else if (ext_header_len < field_size + 1
423 		        || ext_header_len > available_length) {
424 			return 0;
425 		}
426 
427 		// Process header:
428 
429 		lha_ext_header_decode(*header, ext_header[0], ext_header + 1,
430 		                      ext_header_len - field_size - 1);
431 
432 		// Advance to next header.
433 
434 		offset += ext_header_len;
435 		available_length -= ext_header_len;
436 	}
437 
438 	return 1;
439 }
440 
read_next_ext_header(LHAFileHeader ** header,LHAInputStream * stream,uint8_t ** ext_header,size_t * ext_header_len)441 static int read_next_ext_header(LHAFileHeader **header,
442                                 LHAInputStream *stream,
443                                 uint8_t **ext_header,
444                                 size_t *ext_header_len)
445 {
446 	// Last two bytes of the header raw data contain the size
447 	// of the next header.
448 
449 	*ext_header_len
450 	    = lha_decode_uint16(&RAW_DATA(header, RAW_DATA_LEN(header) - 2));
451 
452 	// No more headers?
453 
454 	if (*ext_header_len == 0) {
455 		*ext_header = NULL;
456 		return 1;
457 	}
458 
459 	*ext_header = extend_raw_data(header, stream, *ext_header_len);
460 
461 	return *ext_header != NULL;
462 }
463 
464 // Read extended headers for a level 1 header, extending the
465 // raw_data block to include them.
466 
read_l1_extended_headers(LHAFileHeader ** header,LHAInputStream * stream)467 static int read_l1_extended_headers(LHAFileHeader **header,
468                                     LHAInputStream *stream)
469 {
470 	uint8_t *ext_header;
471 	size_t ext_header_len;
472 
473 	for (;;) {
474 		// Try to read the next header.
475 
476 		if (!read_next_ext_header(header, stream,
477 		                          &ext_header, &ext_header_len)) {
478 			return 0;
479 		}
480 
481 		// Last header?
482 
483 		if (ext_header_len == 0) {
484 			break;
485 		}
486 
487 		// For backwards compatibility with level 0 headers,
488 		// the compressed length field is actually "compressed
489 		// length + length of all extended headers":
490 
491 		if ((*header)->compressed_length < ext_header_len) {
492 			return 0;
493 		}
494 
495 		(*header)->compressed_length -= ext_header_len;
496 
497 		// Must be at least 3 bytes - 1 byte header type
498 		// + 2 bytes for next header length
499 
500 		if (ext_header_len < 3) {
501 			return 0;
502 		}
503 	}
504 
505 	return 1;
506 }
507 
508 // Process a level 0 Unix extended area.
509 
process_level0_unix_area(LHAFileHeader * header,uint8_t * data,size_t data_len)510 static void process_level0_unix_area(LHAFileHeader *header,
511                                      uint8_t *data, size_t data_len)
512 {
513 	// A typical Unix extended area:
514 	//
515 	// 00000000  55 00 00 3b 3d 4b 80 81  e8 03 e8 03
516 
517 	// Sanity check.
518 
519 	if (data_len < LEVEL_0_UNIX_EXTENDED_LEN || data[1] != 0x00) {
520 		return;
521 	}
522 
523 	// OS-9/68k generates an extended area that is broadly compatible
524 	// with the Unix one.
525 
526 	// Fill in the header fields from the data from the extended area.
527 	// There's one minor point to note here: OS-9/68k LHA includes the
528 	// timestamp twice - I have no idea why. In order to support both
529 	// variants, read the end fields from the end of the extended area.
530 
531 	header->os_type = data[0];
532 	header->timestamp = lha_decode_uint32(data + 2);
533 
534 	header->unix_perms = lha_decode_uint16(data + data_len - 6);
535 	header->unix_uid = lha_decode_uint16(data + data_len - 4);
536 	header->unix_gid = lha_decode_uint16(data + data_len - 2);
537 
538 	header->extra_flags |= LHA_FILE_UNIX_PERMS | LHA_FILE_UNIX_UID_GID;
539 }
540 
541 // Process a level 0 OS-9 extended area.
542 
process_level0_os9_area(LHAFileHeader * header,uint8_t * data,size_t data_len)543 static void process_level0_os9_area(LHAFileHeader *header,
544                                     uint8_t *data, size_t data_len)
545 {
546 	// A typical OS-9 extended area:
547 	//
548 	// 00000000  39 13 00 00 c3 16 00 0f  00 cc 18 07 09 03 01 16
549 	// 00000010  00 13 00 00 00 00
550 
551 	// Sanity checks:
552 
553 	if (data_len < LEVEL_0_OS9_EXTENDED_LEN
554 	 || data[9] != 0xcc || data[1] != data[17] || data[2] != data[18]) {
555 		return;
556 	}
557 
558 	// The contents resemble the contents of the OS-9 extended header.
559 	// We just want the permissions field.
560 
561 	header->os_type = LHA_OS_TYPE_OS9;
562 	header->os9_perms = lha_decode_uint16(data + 1);
563 	header->extra_flags |= LHA_FILE_OS9_PERMS;
564 }
565 
566 // Handling for level 0 extended areas.
567 
process_level0_extended_area(LHAFileHeader * header,uint8_t * data,size_t data_len)568 static void process_level0_extended_area(LHAFileHeader *header,
569                                          uint8_t *data, size_t data_len)
570 {
571 	// PMarc archives can include comments that are stored in the
572 	// extended area. It is possible that this could conflict with
573 	// the logic below, so specifically exclude them.
574 
575 	if (!strncmp(header->compress_method, "-pm", 3)) {
576 		return;
577 	}
578 
579 	// Different tools include different extended areas. Try to
580 	// identify which tool generated this one, based on the first
581 	// byte.
582 
583 	switch (data[0]) {
584 		case LHA_OS_TYPE_UNIX:
585 		case LHA_OS_TYPE_OS9_68K:
586 			process_level0_unix_area(header, data, data_len);
587 			break;
588 
589 		case LHA_OS_TYPE_OS9:
590 			process_level0_os9_area(header, data, data_len);
591 			break;
592 
593 		default:
594 			break;
595 	}
596 }
597 
598 // Decode a level 0 or 1 header.
599 
decode_level0_header(LHAFileHeader ** header,LHAInputStream * stream)600 static int decode_level0_header(LHAFileHeader **header, LHAInputStream *stream)
601 {
602 	uint8_t header_len;
603 	uint8_t header_csum;
604 	size_t path_len;
605 	size_t min_len;
606 
607 	header_len = RAW_DATA(header, 0);
608 	header_csum = RAW_DATA(header, 1);
609 
610 	// Sanity check header length.  This is the minimum header length
611 	// for a header that has a zero-length path.
612 
613 	switch ((*header)->header_level) {
614 		case 0:
615 			min_len = LEVEL_0_MIN_HEADER_LEN;
616 			break;
617 		case 1:
618 			min_len = LEVEL_1_MIN_HEADER_LEN;
619 			break;
620 
621 		default:
622 			return 0;
623 	}
624 
625 	if (header_len < min_len) {
626 		return 0;
627 	}
628 
629 	// We only have a partial header so far. Read the full header.
630 
631 	if (!extend_raw_data(header, stream,
632 	                     header_len + 2 - RAW_DATA_LEN(header))) {
633 		return 0;
634 	}
635 
636 	// Checksum the header.
637 
638 	if (!check_l0_checksum(&RAW_DATA(header, 2),
639 	                       RAW_DATA_LEN(header) - 2,
640 	                       header_csum)) {
641 		return 0;
642 	}
643 
644 	// Compression method:
645 
646 	memcpy((*header)->compress_method, &RAW_DATA(header, 2), 5);
647 	(*header)->compress_method[5] = '\0';
648 
649 	// File lengths:
650 
651 	(*header)->compressed_length = lha_decode_uint32(&RAW_DATA(header, 7));
652 	(*header)->length = lha_decode_uint32(&RAW_DATA(header, 11));
653 
654 	// Timestamp:
655 
656 	(*header)->timestamp = decode_ftime(&RAW_DATA(header, 15));
657 
658 	// Read path.  Check path length field - is the header long enough
659 	// to hold this full path?
660 
661 	path_len = RAW_DATA(header, 21);
662 
663 	if (min_len + path_len > header_len) {
664 		return 0;
665 	}
666 
667 	// OS type?
668 
669 	if ((*header)->header_level == 0) {
670 		(*header)->os_type = LHA_OS_TYPE_UNKNOWN;
671 	} else {
672 		(*header)->os_type = RAW_DATA(header, 24 + path_len);
673 	}
674 
675 	// Read filename field:
676 
677 	if (!process_level0_path(*header, &RAW_DATA(header, 22), path_len)) {
678 		return 0;
679 	}
680 
681 	// CRC field.
682 
683 	(*header)->crc = lha_decode_uint16(&RAW_DATA(header, 22 + path_len));
684 
685 	// Level 0 headers can contain extended data through different schemes
686 	// to the extended header system used in level 1+.
687 
688 	if ((*header)->header_level == 0
689 	 && header_len > LEVEL_0_MIN_HEADER_LEN + path_len) {
690 		process_level0_extended_area(*header,
691 		  &RAW_DATA(header, LEVEL_0_MIN_HEADER_LEN + 2 + path_len),
692 		  header_len - LEVEL_0_MIN_HEADER_LEN - path_len);
693 	}
694 
695 	return 1;
696 }
697 
decode_level1_header(LHAFileHeader ** header,LHAInputStream * stream)698 static int decode_level1_header(LHAFileHeader **header, LHAInputStream *stream)
699 {
700 	unsigned int ext_header_start;
701 
702 	if (!decode_level0_header(header, stream)) {
703 		return 0;
704 	}
705 
706 	// Level 1 headers can have extended headers, so parse them.
707 
708 	ext_header_start = RAW_DATA_LEN(header) - 2;
709 
710 	if (!read_l1_extended_headers(header, stream)
711 	 || !decode_extended_headers(header, ext_header_start)) {
712 		return 0;
713 	}
714 
715 	return 1;
716 }
717 
decode_level2_header(LHAFileHeader ** header,LHAInputStream * stream)718 static int decode_level2_header(LHAFileHeader **header, LHAInputStream *stream)
719 {
720 	unsigned int header_len;
721 
722 	header_len = lha_decode_uint16(&RAW_DATA(header, 0));
723 
724 	if (header_len < LEVEL_2_HEADER_LEN) {
725 		return 0;
726 	}
727 
728 	// Read the full header.
729 
730 	if (!extend_raw_data(header, stream,
731 	                     header_len - RAW_DATA_LEN(header))) {
732 		return 0;
733 	}
734 
735 	// Compression method:
736 
737 	memcpy((*header)->compress_method, &RAW_DATA(header, 2), 5);
738 	(*header)->compress_method[5] = '\0';
739 
740 	// File lengths:
741 
742 	(*header)->compressed_length = lha_decode_uint32(&RAW_DATA(header, 7));
743 	(*header)->length = lha_decode_uint32(&RAW_DATA(header, 11));
744 
745 	// Timestamp. Unlike level 0/1, this is a Unix-style timestamp.
746 
747 	(*header)->timestamp = lha_decode_uint32(&RAW_DATA(header, 15));
748 
749 	// CRC.
750 
751 	(*header)->crc = lha_decode_uint16(&RAW_DATA(header, 21));
752 
753 	// OS type:
754 
755 	(*header)->os_type = RAW_DATA(header, 23);
756 
757 	// LHA for OS-9/68k generates broken level 2 archives: the header
758 	// length field is the length of the remainder of the header, not
759 	// the complete header length. As a result it's two bytes too
760 	// short. We can use the OS type field to detect these archives
761 	// and compensate.
762 
763 	if ((*header)->os_type == LHA_OS_TYPE_OS9_68K) {
764 		if (!extend_raw_data(header, stream, 2)) {
765 			return 0;
766 		}
767 	}
768 
769 	if (!decode_extended_headers(header, 24)) {
770 		return 0;
771 	}
772 
773 	return 1;
774 }
775 
decode_level3_header(LHAFileHeader ** header,LHAInputStream * stream)776 static int decode_level3_header(LHAFileHeader **header, LHAInputStream *stream)
777 {
778 	unsigned int header_len;
779 
780 	// The first field at the start of a level 3 header is supposed to
781 	// indicate word size, with the idea being that the header format
782 	// can be extended beyond 32-bit words in the future. In practise,
783 	// nothing supports anything other than 32-bit (4 bytes), and neither
784 	// do we.
785 
786 	if (lha_decode_uint16(&RAW_DATA(header, 0)) != 4) {
787 		return 0;
788 	}
789 
790 	// Read the full header.
791 
792 	if (!extend_raw_data(header, stream,
793 	                     LEVEL_3_HEADER_LEN - RAW_DATA_LEN(header))) {
794 		return 0;
795 	}
796 
797 	// Read the header length field (including extended headers), and
798 	// extend to this full length. Because this is a 32-bit value,
799 	// we must place a sensible limit on the amount of data that will
800 	// be read, to avoid possibly allocating gigabytes of memory.
801 
802 	header_len = lha_decode_uint32(&RAW_DATA(header, 24));
803 
804 	if (header_len > LEVEL_3_MAX_HEADER_LEN
805 	 || header_len < RAW_DATA_LEN(header)) {
806 		return 0;
807 	}
808 
809 	if (!extend_raw_data(header, stream,
810 	                     header_len - RAW_DATA_LEN(header))) {
811 		return 0;
812 	}
813 
814 	// Compression method:
815 
816 	memcpy((*header)->compress_method, &RAW_DATA(header, 2), 5);
817 	(*header)->compress_method[5] = '\0';
818 
819 	// File lengths:
820 
821 	(*header)->compressed_length = lha_decode_uint32(&RAW_DATA(header, 7));
822 	(*header)->length = lha_decode_uint32(&RAW_DATA(header, 11));
823 
824 	// Unix-style timestamp.
825 
826 	(*header)->timestamp = lha_decode_uint32(&RAW_DATA(header, 15));
827 
828 	// CRC.
829 
830 	(*header)->crc = lha_decode_uint16(&RAW_DATA(header, 21));
831 
832 	// OS type:
833 
834 	(*header)->os_type = RAW_DATA(header, 23);
835 
836 	if (!decode_extended_headers(header, 28)) {
837 		return 0;
838 	}
839 
840 	return 1;
841 }
842 
843 
844 // "Collapse" a path down, by removing all instances of "." and ".."
845 // paths. This is to protect against malicious archives that might include
846 // ".." in a path to break out of the extract directory.
847 
collapse_path(char * filename)848 static void collapse_path(char *filename)
849 {
850 	unsigned int currpath_len;
851 	char *currpath;
852 	char *r, *w;
853 
854 	// If the path starts with a /, it is an absolute path; skip over
855 	// that first character and don't remove it.
856 
857 	if (filename[0] == '/') {
858 		++filename;
859 	}
860 
861 	// Step through each character, copying it from 'r' to 'w'. It
862 	// is always the case that w <= r, and the final string will
863 	// be equal in length or shorter than the original.
864 
865 	currpath = filename;
866 	w = filename;
867 
868 	for (r = filename; *r != '\0'; ++r) {
869 		*w++ = *r;
870 
871 		// Each time a new path separator is found, examine the
872 		// path that was just written.
873 
874 		if (*r == '/') {
875 
876 			currpath_len = w - currpath - 1;
877 
878 			// Empty path (//) or current directory (.)?
879 
880 			if (currpath_len == 0
881 			 || (currpath_len == 1 && currpath[0] == '.')) {
882 				w = currpath;
883 
884 			// Parent directory (..)?
885 
886 			} else if (currpath_len == 2
887 			        && currpath[0] == '.' && currpath[1] == '.') {
888 
889 				// Walk back up by one directory. Don't go
890 				// past the start of the string.
891 
892 				if (currpath == filename) {
893 					w = filename;
894 				} else {
895 					w = currpath - 1;
896 
897 					while (w > filename) {
898 						if (*(w - 1) == '/') {
899 							break;
900 						}
901 						--w;
902 					}
903 
904 					currpath = w;
905 				}
906 
907 			// Save for next time we start a new path.
908 
909 			} else {
910 				currpath = w;
911 			}
912 		}
913 	}
914 
915 	*w = '\0';
916 }
917 
lha_file_header_read(LHAInputStream * stream)918 LHAFileHeader *lha_file_header_read(LHAInputStream *stream)
919 {
920 	LHAFileHeader *header;
921 	int success;
922 
923 	// We cannot decode the file header until we identify the
924 	// header level (as different header level formats are
925 	// decoded in different ways. The header level field is
926 	// located at byte offset 20 within the header, so we
927 	// must read the first 21 bytes to read it (actually this
928 	// reads one byte more, so that we get the filename length
929 	// byte for level 1 headers as well).
930 
931 	// Allocate result structure.
932 
933 	header = calloc(1, sizeof(LHAFileHeader) + COMMON_HEADER_LEN);
934 
935 	if (header == NULL) {
936 		return NULL;
937 	}
938 
939 	memset(header, 0, sizeof(LHAFileHeader));
940 
941 	header->_refcount = 1;
942 
943 	// Read first chunk of header.
944 
945 	header->raw_data = (uint8_t *) (header + 1);
946 	header->raw_data_len = COMMON_HEADER_LEN;
947 
948 	if (!lha_input_stream_read(stream, header->raw_data,
949 	                           header->raw_data_len)) {
950 		goto fail;
951 	}
952 
953 	// Identify header level, and decode header depending on
954 	// the value encountered.
955 
956 	header->header_level = header->raw_data[20];
957 
958 	switch (header->header_level) {
959 		case 0:
960 			success = decode_level0_header(&header, stream);
961 			break;
962 
963 		case 1:
964 			success = decode_level1_header(&header, stream);
965 			break;
966 
967 		case 2:
968 			success = decode_level2_header(&header, stream);
969 			break;
970 
971 		case 3:
972 			success = decode_level3_header(&header, stream);
973 			break;
974 
975 		default:
976 			success = 0;
977 			break;
978 	}
979 
980 	if (!success) {
981 		goto fail;
982 	}
983 
984 	// Sanity check that we got some headers, at least.
985 	// Directory entries must have a path, and files must have a
986 	// filename. Symlinks are stored using the same compression method
987 	// field string (-lhd-) as directories.
988 
989 	if (strcmp(header->compress_method, LHA_COMPRESS_TYPE_DIR) != 0) {
990 		if (header->filename == NULL) {
991 			goto fail;
992 		}
993 	} else if (!strcmp(header->compress_method, LHA_COMPRESS_TYPE_DIR)
994 	        && LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_PERMS)
995 		&& (header->path != NULL || header->filename != NULL)
996 		&& (header->unix_perms & 0170000) == 0120000) {
997 
998 		if (!parse_symlink(header)) {
999 			goto fail;
1000 		}
1001 
1002 	} else {
1003 		if (header->path == NULL) {
1004 			goto fail;
1005 		}
1006 	}
1007 
1008 	// Is the path an all-caps filename?  If so, it is a DOS path that
1009 	// should be translated to lower case.
1010 
1011 	if (header->os_type == LHA_OS_TYPE_UNKNOWN
1012 	 || header->os_type == LHA_OS_TYPE_MSDOS
1013 	 || header->os_type == LHA_OS_TYPE_ATARI
1014 	 || header->os_type == LHA_OS_TYPE_OS2) {
1015 		fix_msdos_allcaps(header);
1016 	}
1017 
1018 	// Collapse special directory paths to ensure the path is clean.
1019 
1020 	if (header->path != NULL) {
1021 		collapse_path(header->path);
1022 	}
1023 
1024 	// Is this header generated by OS-9/68k LHA? If so, any Unix
1025 	// permissions are actually OS-9 permissions.
1026 
1027 	if (header->os_type == LHA_OS_TYPE_OS9_68K
1028 	 && LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_PERMS)) {
1029 		header->os9_perms = header->unix_perms;
1030 		header->extra_flags |= LHA_FILE_OS9_PERMS;
1031 	}
1032 
1033 	// If OS-9 permissions were read, translate into Unix permissions.
1034 
1035 	if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_OS9_PERMS)) {
1036 		os9_to_unix_permissions(header);
1037 	}
1038 
1039 	// Was the "common" extended header read, which contains a CRC of
1040 	// the full header? If so, perform a CRC check now.
1041 
1042 	if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_COMMON_CRC)
1043 	 && !check_common_crc(header)) {
1044 		goto fail;
1045 	}
1046 
1047 	return header;
1048 fail:
1049 	lha_file_header_free(header);
1050 	return NULL;
1051 }
1052 
lha_file_header_free(LHAFileHeader * header)1053 void lha_file_header_free(LHAFileHeader *header)
1054 {
1055 	// Sanity check:
1056 
1057 	if (header->_refcount == 0) {
1058 		return;
1059 	}
1060 
1061 	// Count down references and only free when all have been removed.
1062 
1063 	--header->_refcount;
1064 
1065 	if (header->_refcount > 0) {
1066 		return;
1067 	}
1068 
1069 	free(header->filename);
1070 	free(header->path);
1071 	free(header->symlink_target);
1072 	free(header->unix_username);
1073 	free(header->unix_group);
1074 	free(header);
1075 }
1076 
lha_file_header_add_ref(LHAFileHeader * header)1077 void lha_file_header_add_ref(LHAFileHeader *header)
1078 {
1079 	++header->_refcount;
1080 }
1081 
1082