1 /*
2
3 Copyright (c) 2011, 2012, Simon Howard
4
5 Permission to use, copy, modify, and/or distribute this software
6 for any purpose with or without fee is hereby granted, provided
7 that the above copyright notice and this permission notice appear
8 in all copies.
9
10 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
14 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15 LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
16 NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
17 CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
19 */
20
21 #include <stdlib.h>
22 #include <string.h>
23 #include <ctype.h>
24 #include <time.h>
25
26 #include "lha_endian.h"
27 #include "lha_file_header.h"
28 #include "ext_header.h"
29 #include "crc16.h"
30
31 #define COMMON_HEADER_LEN 22 /* bytes */
32
33 // Minimum length of a level 0 header (with zero-length filename).
34 #define LEVEL_0_MIN_HEADER_LEN 22 /* bytes */
35
36 // Minimum length of a level 1 base header (with zero-length filename).
37 #define LEVEL_1_MIN_HEADER_LEN 25 /* bytes */
38
39 // Length of a level 2 base header.
40 #define LEVEL_2_HEADER_LEN 26 /* bytes */
41
42 // Length of a level 3 base header.
43 #define LEVEL_3_HEADER_LEN 32 /* bytes */
44
45 // Maximum length of a level 3 header (including extended headers).
46 #define LEVEL_3_MAX_HEADER_LEN (1024 * 1024) /* 1 MB */
47
48 // Length of a level 0 Unix extended area.
49 #define LEVEL_0_UNIX_EXTENDED_LEN 12 /* bytes */
50
51 // Length of a level 0 OS-9 extended area.
52 #define LEVEL_0_OS9_EXTENDED_LEN 22 /* bytes */
53
54 #define RAW_DATA(hdr_ptr, off) ((*hdr_ptr)->raw_data[off])
55 #define RAW_DATA_LEN(hdr_ptr) ((*hdr_ptr)->raw_data_len)
56
lha_file_header_full_path(LHAFileHeader * header)57 char *lha_file_header_full_path(LHAFileHeader *header)
58 {
59 char *path;
60 char *filename;
61 char *result;
62
63 if (header->path != NULL) {
64 path = header->path;
65 } else {
66 path = "";
67 }
68
69 if (header->filename != NULL) {
70 filename = header->filename;
71 } else {
72 filename = "";
73 }
74
75 result = malloc(strlen(path) + strlen(filename) + 1);
76
77 if (result == NULL) {
78 return NULL;
79 }
80
81 sprintf(result, "%s%s", path, filename);
82
83 return result;
84 }
85
86 /**
87 * Given a file header with the filename set, split it into separate
88 * path and filename components, if necessary.
89 *
90 * @param header Point to the file header structure.
91 * @return Non-zero for success, or zero for failure.
92 */
93
split_header_filename(LHAFileHeader * header)94 static int split_header_filename(LHAFileHeader *header)
95 {
96 char *sep;
97 char *new_filename;
98
99 // Is there a directory separator in the path? If so, we need to
100 // split into directory name and filename.
101
102 sep = strrchr(header->filename, '/');
103
104 if (sep != NULL) {
105 new_filename = strdup(sep + 1);
106
107 if (new_filename == NULL) {
108 return 0;
109 }
110
111 *(sep + 1) = '\0';
112 header->path = header->filename;
113 header->filename = new_filename;
114 }
115
116 return 1;
117 }
118
119 // Perform checksum of header contents.
120
check_l0_checksum(uint8_t * header,size_t header_len,size_t csum)121 static int check_l0_checksum(uint8_t *header, size_t header_len, size_t csum)
122 {
123 unsigned int result;
124 unsigned int i;
125
126 result = 0;
127
128 for (i = 0; i < header_len; ++i) {
129 result += header[i];
130 }
131
132 return (result & 0xff) == csum;
133 }
134
135 // Perform full-header CRC check, based on CRC from "common" extended header.
136
check_common_crc(LHAFileHeader * header)137 static int check_common_crc(LHAFileHeader *header)
138 {
139 uint16_t crc;
140
141 crc = 0;
142 lha_crc16_buf(&crc, header->raw_data, header->raw_data_len);
143
144 return crc == header->common_crc;
145 }
146
147 // Decode MS-DOS timestamp.
148
decode_ftime(uint8_t * buf)149 static unsigned int decode_ftime(uint8_t *buf)
150 {
151 int raw;
152 struct tm datetime;
153
154 raw = (int) lha_decode_uint32(buf);
155
156 if (raw == 0) {
157 return 0;
158 }
159
160 // Deconstruct the contents of the MS-DOS time value and populate the
161 // 'datetime' structure. Note that 'mktime' generates a timestamp for
162 // the local time zone: this is unfortunate, but probably the best
163 // that can be done, due to the limited data stored in MS-DOS time
164 // values.
165
166 memset(&datetime, 0, sizeof(struct tm));
167
168 datetime.tm_sec = (raw << 1) & 0x3e;
169 datetime.tm_min = (raw >> 5) & 0x3f;
170 datetime.tm_hour = (raw >> 11) & 0x1f;
171 datetime.tm_mday = (raw >> 16) & 0x1f;
172 datetime.tm_mon = ((raw >> 21) & 0xf) - 1;
173 datetime.tm_year = 80 + ((raw >> 25) & 0x7f);
174 datetime.tm_wday = 0;
175 datetime.tm_yday = 0;
176 datetime.tm_isdst = -1;
177
178 return (unsigned int) mktime(&datetime);
179 }
180
181 // MS-DOS archives (and archives from similar systems) may have paths and
182 // filenames that are in all-caps. Detect these and convert them to
183 // lower-case.
184
fix_msdos_allcaps(LHAFileHeader * header)185 static void fix_msdos_allcaps(LHAFileHeader *header)
186 {
187 unsigned int i;
188 int is_allcaps;
189
190 // Check both path and filename to see if there are any lower-case
191 // characters.
192
193 is_allcaps = 1;
194
195 if (header->path != NULL) {
196 for (i = 0; header->path[i] != '\0'; ++i) {
197 if (islower((unsigned) header->path[i])) {
198 is_allcaps = 0;
199 break;
200 }
201 }
202 }
203
204 if (is_allcaps && header->filename != NULL) {
205 for (i = 0; header->filename[i] != '\0'; ++i) {
206 if (islower((unsigned) header->filename[i])) {
207 is_allcaps = 0;
208 break;
209 }
210 }
211 }
212
213 // If both are all-caps, convert them all to lower-case.
214
215 if (is_allcaps) {
216 if (header->path != NULL) {
217 for (i = 0; header->path[i] != '\0'; ++i) {
218 header->path[i]
219 = tolower((unsigned) header->path[i]);
220 }
221 }
222 if (header->filename != NULL) {
223 for (i = 0; header->filename[i] != '\0'; ++i) {
224 header->filename[i]
225 = tolower((unsigned) header->filename[i]);
226 }
227 }
228 }
229 }
230
231 // Process the OS-9 permissions field and translate into the equivalent
232 // Unix permissions.
233
os9_to_unix_permissions(LHAFileHeader * header)234 static void os9_to_unix_permissions(LHAFileHeader *header)
235 {
236 unsigned int or, ow, oe, pr, pw, pe, d;
237
238 // Translate into equivalent Unix permissions. OS-9 just has
239 // owner and public, so double up public for the owner field.
240
241 or = (header->os9_perms & 0x01) != 0;
242 ow = (header->os9_perms & 0x02) != 0;
243 oe = (header->os9_perms & 0x04) != 0;
244 pr = (header->os9_perms & 0x08) != 0;
245 pw = (header->os9_perms & 0x10) != 0;
246 pe = (header->os9_perms & 0x20) != 0;
247 d = (header->os9_perms & 0x80) != 0;
248
249 header->extra_flags |= LHA_FILE_UNIX_PERMS;
250 header->unix_perms = (d << 14)
251 | (or << 8) | (ow << 7) | (oe << 6) // owner
252 | (pr << 5) | (pw << 4) | (pe << 3) // group
253 | (pr << 2) | (pw << 1) | (pe << 0); // everyone
254 }
255
256 // Parse a Unix symbolic link. These are stored in the format:
257 // filename = symlink|target
258
parse_symlink(LHAFileHeader * header)259 static int parse_symlink(LHAFileHeader *header)
260 {
261 char *fullpath;
262 char *p;
263
264 // Although the format is always the same, some files have
265 // symlink headers where the path is split between the path
266 // and filename headers. For example:
267 // path = etc|../../
268 // filename = etc
269
270 fullpath = lha_file_header_full_path(header);
271
272 if (fullpath == NULL) {
273 return 0;
274 }
275
276 p = strchr(fullpath, '|');
277
278 if (p == NULL) {
279 free(fullpath);
280 return 0;
281 }
282
283 header->symlink_target = strdup(p + 1);
284
285 if (header->symlink_target == NULL) {
286 free(fullpath);
287 return 0;
288 }
289
290 // Cut the string in half at the separator. Keep the left side
291 // as the value for filename.
292
293 *p = '\0';
294
295 free(header->path);
296 free(header->filename);
297 header->path = NULL;
298 header->filename = fullpath;
299
300 // Having joined path and filename together during processing,
301 // we now have the opposite problem: header->filename might
302 // contain a full path rather than just a filename. Split back
303 // into two again.
304
305 return split_header_filename(header);
306 }
307
308 // Decode the path field in the header.
309
process_level0_path(LHAFileHeader * header,uint8_t * data,size_t data_len)310 static int process_level0_path(LHAFileHeader *header, uint8_t *data,
311 size_t data_len)
312 {
313 unsigned int i;
314
315 // Zero-length filename probably means that this is a directory
316 // entry. Leave the filename field as NULL - this makes us
317 // consistent with level 2/3 headers.
318
319 if (data_len == 0) {
320 return 1;
321 }
322
323 header->filename = malloc(data_len + 1);
324
325 if (header->filename == NULL) {
326 return 0;
327 }
328
329 memcpy(header->filename, data, data_len);
330 header->filename[data_len] = '\0';
331
332 // Convert MS-DOS path separators to Unix path separators.
333
334 for (i = 0; i < data_len; ++i) {
335 if (header->filename[i] == '\\') {
336 header->filename[i] = '/';
337 }
338 }
339
340 return split_header_filename(header);
341 }
342
343 // Read some more data from the input stream, extending the raw_data
344 // array (and the size of the header).
345
extend_raw_data(LHAFileHeader ** header,LHAInputStream * stream,size_t nbytes)346 static uint8_t *extend_raw_data(LHAFileHeader **header,
347 LHAInputStream *stream,
348 size_t nbytes)
349 {
350 LHAFileHeader *new_header;
351 size_t new_raw_len;
352 uint8_t *result;
353
354 if (nbytes > LEVEL_3_MAX_HEADER_LEN) {
355 return NULL;
356 }
357
358 // Reallocate the header and raw_data area to be larger.
359
360 new_raw_len = RAW_DATA_LEN(header) + nbytes;
361 new_header = realloc(*header, sizeof(LHAFileHeader) + new_raw_len);
362
363 if (new_header == NULL) {
364 return NULL;
365 }
366
367 // Update the header pointer to point to the new area.
368
369 *header = new_header;
370 new_header->raw_data = (uint8_t *) (new_header + 1);
371 result = new_header->raw_data + new_header->raw_data_len;
372
373 // Read data from stream into new area.
374
375 if (!lha_input_stream_read(stream, result, nbytes)) {
376 return NULL;
377 }
378
379 new_header->raw_data_len = new_raw_len;
380
381 return result;
382 }
383
384 // Starting at the specified offset in the raw_data array, walk
385 // through the list of extended headers and parse them.
386
decode_extended_headers(LHAFileHeader ** header,unsigned int offset)387 static int decode_extended_headers(LHAFileHeader **header,
388 unsigned int offset)
389 {
390 unsigned int field_size;
391 uint8_t *ext_header;
392 size_t ext_header_len;
393 size_t available_length;
394
395 // Level 3 headers use 32-bit length fields; all others use
396 // 16-bit fields.
397
398 if ((*header)->header_level == 3) {
399 field_size = 4;
400 } else {
401 field_size = 2;
402 }
403
404 available_length = RAW_DATA_LEN(header) - offset - field_size;
405
406 while (offset <= RAW_DATA_LEN(header) - field_size) {
407 ext_header = &RAW_DATA(header, offset + field_size);
408
409 if (field_size == 4) {
410 ext_header_len
411 = lha_decode_uint32(&RAW_DATA(header, offset));
412 } else {
413 ext_header_len
414 = lha_decode_uint16(&RAW_DATA(header, offset));
415 }
416
417 // Header length zero indicates end of chain. Otherwise, sanity
418 // check the header length is valid.
419
420 if (ext_header_len == 0) {
421 break;
422 } else if (ext_header_len < field_size + 1
423 || ext_header_len > available_length) {
424 return 0;
425 }
426
427 // Process header:
428
429 lha_ext_header_decode(*header, ext_header[0], ext_header + 1,
430 ext_header_len - field_size - 1);
431
432 // Advance to next header.
433
434 offset += ext_header_len;
435 available_length -= ext_header_len;
436 }
437
438 return 1;
439 }
440
read_next_ext_header(LHAFileHeader ** header,LHAInputStream * stream,uint8_t ** ext_header,size_t * ext_header_len)441 static int read_next_ext_header(LHAFileHeader **header,
442 LHAInputStream *stream,
443 uint8_t **ext_header,
444 size_t *ext_header_len)
445 {
446 // Last two bytes of the header raw data contain the size
447 // of the next header.
448
449 *ext_header_len
450 = lha_decode_uint16(&RAW_DATA(header, RAW_DATA_LEN(header) - 2));
451
452 // No more headers?
453
454 if (*ext_header_len == 0) {
455 *ext_header = NULL;
456 return 1;
457 }
458
459 *ext_header = extend_raw_data(header, stream, *ext_header_len);
460
461 return *ext_header != NULL;
462 }
463
464 // Read extended headers for a level 1 header, extending the
465 // raw_data block to include them.
466
read_l1_extended_headers(LHAFileHeader ** header,LHAInputStream * stream)467 static int read_l1_extended_headers(LHAFileHeader **header,
468 LHAInputStream *stream)
469 {
470 uint8_t *ext_header;
471 size_t ext_header_len;
472
473 for (;;) {
474 // Try to read the next header.
475
476 if (!read_next_ext_header(header, stream,
477 &ext_header, &ext_header_len)) {
478 return 0;
479 }
480
481 // Last header?
482
483 if (ext_header_len == 0) {
484 break;
485 }
486
487 // For backwards compatibility with level 0 headers,
488 // the compressed length field is actually "compressed
489 // length + length of all extended headers":
490
491 if ((*header)->compressed_length < ext_header_len) {
492 return 0;
493 }
494
495 (*header)->compressed_length -= ext_header_len;
496
497 // Must be at least 3 bytes - 1 byte header type
498 // + 2 bytes for next header length
499
500 if (ext_header_len < 3) {
501 return 0;
502 }
503 }
504
505 return 1;
506 }
507
508 // Process a level 0 Unix extended area.
509
process_level0_unix_area(LHAFileHeader * header,uint8_t * data,size_t data_len)510 static void process_level0_unix_area(LHAFileHeader *header,
511 uint8_t *data, size_t data_len)
512 {
513 // A typical Unix extended area:
514 //
515 // 00000000 55 00 00 3b 3d 4b 80 81 e8 03 e8 03
516
517 // Sanity check.
518
519 if (data_len < LEVEL_0_UNIX_EXTENDED_LEN || data[1] != 0x00) {
520 return;
521 }
522
523 // OS-9/68k generates an extended area that is broadly compatible
524 // with the Unix one.
525
526 // Fill in the header fields from the data from the extended area.
527 // There's one minor point to note here: OS-9/68k LHA includes the
528 // timestamp twice - I have no idea why. In order to support both
529 // variants, read the end fields from the end of the extended area.
530
531 header->os_type = data[0];
532 header->timestamp = lha_decode_uint32(data + 2);
533
534 header->unix_perms = lha_decode_uint16(data + data_len - 6);
535 header->unix_uid = lha_decode_uint16(data + data_len - 4);
536 header->unix_gid = lha_decode_uint16(data + data_len - 2);
537
538 header->extra_flags |= LHA_FILE_UNIX_PERMS | LHA_FILE_UNIX_UID_GID;
539 }
540
541 // Process a level 0 OS-9 extended area.
542
process_level0_os9_area(LHAFileHeader * header,uint8_t * data,size_t data_len)543 static void process_level0_os9_area(LHAFileHeader *header,
544 uint8_t *data, size_t data_len)
545 {
546 // A typical OS-9 extended area:
547 //
548 // 00000000 39 13 00 00 c3 16 00 0f 00 cc 18 07 09 03 01 16
549 // 00000010 00 13 00 00 00 00
550
551 // Sanity checks:
552
553 if (data_len < LEVEL_0_OS9_EXTENDED_LEN
554 || data[9] != 0xcc || data[1] != data[17] || data[2] != data[18]) {
555 return;
556 }
557
558 // The contents resemble the contents of the OS-9 extended header.
559 // We just want the permissions field.
560
561 header->os_type = LHA_OS_TYPE_OS9;
562 header->os9_perms = lha_decode_uint16(data + 1);
563 header->extra_flags |= LHA_FILE_OS9_PERMS;
564 }
565
566 // Handling for level 0 extended areas.
567
process_level0_extended_area(LHAFileHeader * header,uint8_t * data,size_t data_len)568 static void process_level0_extended_area(LHAFileHeader *header,
569 uint8_t *data, size_t data_len)
570 {
571 // PMarc archives can include comments that are stored in the
572 // extended area. It is possible that this could conflict with
573 // the logic below, so specifically exclude them.
574
575 if (!strncmp(header->compress_method, "-pm", 3)) {
576 return;
577 }
578
579 // Different tools include different extended areas. Try to
580 // identify which tool generated this one, based on the first
581 // byte.
582
583 switch (data[0]) {
584 case LHA_OS_TYPE_UNIX:
585 case LHA_OS_TYPE_OS9_68K:
586 process_level0_unix_area(header, data, data_len);
587 break;
588
589 case LHA_OS_TYPE_OS9:
590 process_level0_os9_area(header, data, data_len);
591 break;
592
593 default:
594 break;
595 }
596 }
597
598 // Decode a level 0 or 1 header.
599
decode_level0_header(LHAFileHeader ** header,LHAInputStream * stream)600 static int decode_level0_header(LHAFileHeader **header, LHAInputStream *stream)
601 {
602 uint8_t header_len;
603 uint8_t header_csum;
604 size_t path_len;
605 size_t min_len;
606
607 header_len = RAW_DATA(header, 0);
608 header_csum = RAW_DATA(header, 1);
609
610 // Sanity check header length. This is the minimum header length
611 // for a header that has a zero-length path.
612
613 switch ((*header)->header_level) {
614 case 0:
615 min_len = LEVEL_0_MIN_HEADER_LEN;
616 break;
617 case 1:
618 min_len = LEVEL_1_MIN_HEADER_LEN;
619 break;
620
621 default:
622 return 0;
623 }
624
625 if (header_len < min_len) {
626 return 0;
627 }
628
629 // We only have a partial header so far. Read the full header.
630
631 if (!extend_raw_data(header, stream,
632 header_len + 2 - RAW_DATA_LEN(header))) {
633 return 0;
634 }
635
636 // Checksum the header.
637
638 if (!check_l0_checksum(&RAW_DATA(header, 2),
639 RAW_DATA_LEN(header) - 2,
640 header_csum)) {
641 return 0;
642 }
643
644 // Compression method:
645
646 memcpy((*header)->compress_method, &RAW_DATA(header, 2), 5);
647 (*header)->compress_method[5] = '\0';
648
649 // File lengths:
650
651 (*header)->compressed_length = lha_decode_uint32(&RAW_DATA(header, 7));
652 (*header)->length = lha_decode_uint32(&RAW_DATA(header, 11));
653
654 // Timestamp:
655
656 (*header)->timestamp = decode_ftime(&RAW_DATA(header, 15));
657
658 // Read path. Check path length field - is the header long enough
659 // to hold this full path?
660
661 path_len = RAW_DATA(header, 21);
662
663 if (min_len + path_len > header_len) {
664 return 0;
665 }
666
667 // OS type?
668
669 if ((*header)->header_level == 0) {
670 (*header)->os_type = LHA_OS_TYPE_UNKNOWN;
671 } else {
672 (*header)->os_type = RAW_DATA(header, 24 + path_len);
673 }
674
675 // Read filename field:
676
677 if (!process_level0_path(*header, &RAW_DATA(header, 22), path_len)) {
678 return 0;
679 }
680
681 // CRC field.
682
683 (*header)->crc = lha_decode_uint16(&RAW_DATA(header, 22 + path_len));
684
685 // Level 0 headers can contain extended data through different schemes
686 // to the extended header system used in level 1+.
687
688 if ((*header)->header_level == 0
689 && header_len > LEVEL_0_MIN_HEADER_LEN + path_len) {
690 process_level0_extended_area(*header,
691 &RAW_DATA(header, LEVEL_0_MIN_HEADER_LEN + 2 + path_len),
692 header_len - LEVEL_0_MIN_HEADER_LEN - path_len);
693 }
694
695 return 1;
696 }
697
decode_level1_header(LHAFileHeader ** header,LHAInputStream * stream)698 static int decode_level1_header(LHAFileHeader **header, LHAInputStream *stream)
699 {
700 unsigned int ext_header_start;
701
702 if (!decode_level0_header(header, stream)) {
703 return 0;
704 }
705
706 // Level 1 headers can have extended headers, so parse them.
707
708 ext_header_start = RAW_DATA_LEN(header) - 2;
709
710 if (!read_l1_extended_headers(header, stream)
711 || !decode_extended_headers(header, ext_header_start)) {
712 return 0;
713 }
714
715 return 1;
716 }
717
decode_level2_header(LHAFileHeader ** header,LHAInputStream * stream)718 static int decode_level2_header(LHAFileHeader **header, LHAInputStream *stream)
719 {
720 unsigned int header_len;
721
722 header_len = lha_decode_uint16(&RAW_DATA(header, 0));
723
724 if (header_len < LEVEL_2_HEADER_LEN) {
725 return 0;
726 }
727
728 // Read the full header.
729
730 if (!extend_raw_data(header, stream,
731 header_len - RAW_DATA_LEN(header))) {
732 return 0;
733 }
734
735 // Compression method:
736
737 memcpy((*header)->compress_method, &RAW_DATA(header, 2), 5);
738 (*header)->compress_method[5] = '\0';
739
740 // File lengths:
741
742 (*header)->compressed_length = lha_decode_uint32(&RAW_DATA(header, 7));
743 (*header)->length = lha_decode_uint32(&RAW_DATA(header, 11));
744
745 // Timestamp. Unlike level 0/1, this is a Unix-style timestamp.
746
747 (*header)->timestamp = lha_decode_uint32(&RAW_DATA(header, 15));
748
749 // CRC.
750
751 (*header)->crc = lha_decode_uint16(&RAW_DATA(header, 21));
752
753 // OS type:
754
755 (*header)->os_type = RAW_DATA(header, 23);
756
757 // LHA for OS-9/68k generates broken level 2 archives: the header
758 // length field is the length of the remainder of the header, not
759 // the complete header length. As a result it's two bytes too
760 // short. We can use the OS type field to detect these archives
761 // and compensate.
762
763 if ((*header)->os_type == LHA_OS_TYPE_OS9_68K) {
764 if (!extend_raw_data(header, stream, 2)) {
765 return 0;
766 }
767 }
768
769 if (!decode_extended_headers(header, 24)) {
770 return 0;
771 }
772
773 return 1;
774 }
775
decode_level3_header(LHAFileHeader ** header,LHAInputStream * stream)776 static int decode_level3_header(LHAFileHeader **header, LHAInputStream *stream)
777 {
778 unsigned int header_len;
779
780 // The first field at the start of a level 3 header is supposed to
781 // indicate word size, with the idea being that the header format
782 // can be extended beyond 32-bit words in the future. In practise,
783 // nothing supports anything other than 32-bit (4 bytes), and neither
784 // do we.
785
786 if (lha_decode_uint16(&RAW_DATA(header, 0)) != 4) {
787 return 0;
788 }
789
790 // Read the full header.
791
792 if (!extend_raw_data(header, stream,
793 LEVEL_3_HEADER_LEN - RAW_DATA_LEN(header))) {
794 return 0;
795 }
796
797 // Read the header length field (including extended headers), and
798 // extend to this full length. Because this is a 32-bit value,
799 // we must place a sensible limit on the amount of data that will
800 // be read, to avoid possibly allocating gigabytes of memory.
801
802 header_len = lha_decode_uint32(&RAW_DATA(header, 24));
803
804 if (header_len > LEVEL_3_MAX_HEADER_LEN
805 || header_len < RAW_DATA_LEN(header)) {
806 return 0;
807 }
808
809 if (!extend_raw_data(header, stream,
810 header_len - RAW_DATA_LEN(header))) {
811 return 0;
812 }
813
814 // Compression method:
815
816 memcpy((*header)->compress_method, &RAW_DATA(header, 2), 5);
817 (*header)->compress_method[5] = '\0';
818
819 // File lengths:
820
821 (*header)->compressed_length = lha_decode_uint32(&RAW_DATA(header, 7));
822 (*header)->length = lha_decode_uint32(&RAW_DATA(header, 11));
823
824 // Unix-style timestamp.
825
826 (*header)->timestamp = lha_decode_uint32(&RAW_DATA(header, 15));
827
828 // CRC.
829
830 (*header)->crc = lha_decode_uint16(&RAW_DATA(header, 21));
831
832 // OS type:
833
834 (*header)->os_type = RAW_DATA(header, 23);
835
836 if (!decode_extended_headers(header, 28)) {
837 return 0;
838 }
839
840 return 1;
841 }
842
843
844 // "Collapse" a path down, by removing all instances of "." and ".."
845 // paths. This is to protect against malicious archives that might include
846 // ".." in a path to break out of the extract directory.
847
collapse_path(char * filename)848 static void collapse_path(char *filename)
849 {
850 unsigned int currpath_len;
851 char *currpath;
852 char *r, *w;
853
854 // If the path starts with a /, it is an absolute path; skip over
855 // that first character and don't remove it.
856
857 if (filename[0] == '/') {
858 ++filename;
859 }
860
861 // Step through each character, copying it from 'r' to 'w'. It
862 // is always the case that w <= r, and the final string will
863 // be equal in length or shorter than the original.
864
865 currpath = filename;
866 w = filename;
867
868 for (r = filename; *r != '\0'; ++r) {
869 *w++ = *r;
870
871 // Each time a new path separator is found, examine the
872 // path that was just written.
873
874 if (*r == '/') {
875
876 currpath_len = w - currpath - 1;
877
878 // Empty path (//) or current directory (.)?
879
880 if (currpath_len == 0
881 || (currpath_len == 1 && currpath[0] == '.')) {
882 w = currpath;
883
884 // Parent directory (..)?
885
886 } else if (currpath_len == 2
887 && currpath[0] == '.' && currpath[1] == '.') {
888
889 // Walk back up by one directory. Don't go
890 // past the start of the string.
891
892 if (currpath == filename) {
893 w = filename;
894 } else {
895 w = currpath - 1;
896
897 while (w > filename) {
898 if (*(w - 1) == '/') {
899 break;
900 }
901 --w;
902 }
903
904 currpath = w;
905 }
906
907 // Save for next time we start a new path.
908
909 } else {
910 currpath = w;
911 }
912 }
913 }
914
915 *w = '\0';
916 }
917
lha_file_header_read(LHAInputStream * stream)918 LHAFileHeader *lha_file_header_read(LHAInputStream *stream)
919 {
920 LHAFileHeader *header;
921 int success;
922
923 // We cannot decode the file header until we identify the
924 // header level (as different header level formats are
925 // decoded in different ways. The header level field is
926 // located at byte offset 20 within the header, so we
927 // must read the first 21 bytes to read it (actually this
928 // reads one byte more, so that we get the filename length
929 // byte for level 1 headers as well).
930
931 // Allocate result structure.
932
933 header = calloc(1, sizeof(LHAFileHeader) + COMMON_HEADER_LEN);
934
935 if (header == NULL) {
936 return NULL;
937 }
938
939 memset(header, 0, sizeof(LHAFileHeader));
940
941 header->_refcount = 1;
942
943 // Read first chunk of header.
944
945 header->raw_data = (uint8_t *) (header + 1);
946 header->raw_data_len = COMMON_HEADER_LEN;
947
948 if (!lha_input_stream_read(stream, header->raw_data,
949 header->raw_data_len)) {
950 goto fail;
951 }
952
953 // Identify header level, and decode header depending on
954 // the value encountered.
955
956 header->header_level = header->raw_data[20];
957
958 switch (header->header_level) {
959 case 0:
960 success = decode_level0_header(&header, stream);
961 break;
962
963 case 1:
964 success = decode_level1_header(&header, stream);
965 break;
966
967 case 2:
968 success = decode_level2_header(&header, stream);
969 break;
970
971 case 3:
972 success = decode_level3_header(&header, stream);
973 break;
974
975 default:
976 success = 0;
977 break;
978 }
979
980 if (!success) {
981 goto fail;
982 }
983
984 // Sanity check that we got some headers, at least.
985 // Directory entries must have a path, and files must have a
986 // filename. Symlinks are stored using the same compression method
987 // field string (-lhd-) as directories.
988
989 if (strcmp(header->compress_method, LHA_COMPRESS_TYPE_DIR) != 0) {
990 if (header->filename == NULL) {
991 goto fail;
992 }
993 } else if (!strcmp(header->compress_method, LHA_COMPRESS_TYPE_DIR)
994 && LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_PERMS)
995 && (header->path != NULL || header->filename != NULL)
996 && (header->unix_perms & 0170000) == 0120000) {
997
998 if (!parse_symlink(header)) {
999 goto fail;
1000 }
1001
1002 } else {
1003 if (header->path == NULL) {
1004 goto fail;
1005 }
1006 }
1007
1008 // Is the path an all-caps filename? If so, it is a DOS path that
1009 // should be translated to lower case.
1010
1011 if (header->os_type == LHA_OS_TYPE_UNKNOWN
1012 || header->os_type == LHA_OS_TYPE_MSDOS
1013 || header->os_type == LHA_OS_TYPE_ATARI
1014 || header->os_type == LHA_OS_TYPE_OS2) {
1015 fix_msdos_allcaps(header);
1016 }
1017
1018 // Collapse special directory paths to ensure the path is clean.
1019
1020 if (header->path != NULL) {
1021 collapse_path(header->path);
1022 }
1023
1024 // Is this header generated by OS-9/68k LHA? If so, any Unix
1025 // permissions are actually OS-9 permissions.
1026
1027 if (header->os_type == LHA_OS_TYPE_OS9_68K
1028 && LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_PERMS)) {
1029 header->os9_perms = header->unix_perms;
1030 header->extra_flags |= LHA_FILE_OS9_PERMS;
1031 }
1032
1033 // If OS-9 permissions were read, translate into Unix permissions.
1034
1035 if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_OS9_PERMS)) {
1036 os9_to_unix_permissions(header);
1037 }
1038
1039 // Was the "common" extended header read, which contains a CRC of
1040 // the full header? If so, perform a CRC check now.
1041
1042 if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_COMMON_CRC)
1043 && !check_common_crc(header)) {
1044 goto fail;
1045 }
1046
1047 return header;
1048 fail:
1049 lha_file_header_free(header);
1050 return NULL;
1051 }
1052
lha_file_header_free(LHAFileHeader * header)1053 void lha_file_header_free(LHAFileHeader *header)
1054 {
1055 // Sanity check:
1056
1057 if (header->_refcount == 0) {
1058 return;
1059 }
1060
1061 // Count down references and only free when all have been removed.
1062
1063 --header->_refcount;
1064
1065 if (header->_refcount > 0) {
1066 return;
1067 }
1068
1069 free(header->filename);
1070 free(header->path);
1071 free(header->symlink_target);
1072 free(header->unix_username);
1073 free(header->unix_group);
1074 free(header);
1075 }
1076
lha_file_header_add_ref(LHAFileHeader * header)1077 void lha_file_header_add_ref(LHAFileHeader *header)
1078 {
1079 ++header->_refcount;
1080 }
1081
1082