1 /* This file is part of libmspack.
2  * (C) 2003-2018 Stuart Caie.
3  *
4  * libmspack is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU Lesser General Public License (LGPL) version 2.1
6  *
7  * For further details, see the file COPYING.LIB distributed with libmspack
8  */
9 
10 /* CHM decompression implementation */
11 
12 #include <system.h>
13 #include <chm.h>
14 
15 /* prototypes */
16 static struct mschmd_header * chmd_open(
17   struct mschm_decompressor *base, const char *filename);
18 static struct mschmd_header * chmd_fast_open(
19   struct mschm_decompressor *base, const char *filename);
20 static struct mschmd_header *chmd_real_open(
21   struct mschm_decompressor *base, const char *filename, int entire);
22 static void chmd_close(
23   struct mschm_decompressor *base, struct mschmd_header *chm);
24 static int chmd_read_headers(
25   struct mspack_system *sys, struct mspack_file *fh,
26   struct mschmd_header *chm, int entire);
27 static int chmd_fast_find(
28   struct mschm_decompressor *base, struct mschmd_header *chm,
29   const char *filename, struct mschmd_file *f_ptr, int f_size);
30 static unsigned char *read_chunk(
31   struct mschm_decompressor_p *self, struct mschmd_header *chm,
32   struct mspack_file *fh, unsigned int chunk);
33 static int search_chunk(
34   struct mschmd_header *chm, const unsigned char *chunk, const char *filename,
35   const unsigned char **result, const unsigned char **result_end);
36 static inline int compare(
37   const char *s1, const char *s2, int l1, int l2);
38 static int chmd_extract(
39   struct mschm_decompressor *base, struct mschmd_file *file,
40   const char *filename);
41 static int chmd_sys_write(
42   struct mspack_file *file, void *buffer, int bytes);
43 static int chmd_init_decomp(
44   struct mschm_decompressor_p *self, struct mschmd_file *file);
45 static int read_reset_table(
46   struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec,
47   unsigned int entry, off_t *length_ptr, off_t *offset_ptr);
48 static int read_spaninfo(
49   struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec,
50   off_t *length_ptr);
51 static int find_sys_file(
52   struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec,
53   struct mschmd_file **f_ptr, const char *name);
54 static unsigned char *read_sys_file(
55   struct mschm_decompressor_p *self, struct mschmd_file *file);
56 static int chmd_error(
57   struct mschm_decompressor *base);
58 static int read_off64(
59   off_t *var, unsigned char *mem, struct mspack_system *sys,
60   struct mspack_file *fh);
61 
62 /* filenames of the system files used for decompression.
63  * Content and ControlData are essential.
64  * ResetTable is preferred, but SpanInfo can be used if not available
65  */
66 static const char *content_name  = "::DataSpace/Storage/MSCompressed/Content";
67 static const char *control_name  = "::DataSpace/Storage/MSCompressed/ControlData";
68 static const char *spaninfo_name = "::DataSpace/Storage/MSCompressed/SpanInfo";
69 static const char *rtable_name   = "::DataSpace/Storage/MSCompressed/Transform/"
70   "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable";
71 
72 /***************************************
73  * MSPACK_CREATE_CHM_DECOMPRESSOR
74  ***************************************
75  * constructor
76  */
77 struct mschm_decompressor *
mspack_create_chm_decompressor(struct mspack_system * sys)78   mspack_create_chm_decompressor(struct mspack_system *sys)
79 {
80   struct mschm_decompressor_p *self = NULL;
81 
82   if (!sys) sys = mspack_default_system;
83   if (!mspack_valid_system(sys)) return NULL;
84 
85   if ((self = (struct mschm_decompressor_p *) sys->alloc(sys, sizeof(struct mschm_decompressor_p)))) {
86     self->base.open       = &chmd_open;
87     self->base.close      = &chmd_close;
88     self->base.extract    = &chmd_extract;
89     self->base.last_error = &chmd_error;
90     self->base.fast_open  = &chmd_fast_open;
91     self->base.fast_find  = &chmd_fast_find;
92     self->system          = sys;
93     self->error           = MSPACK_ERR_OK;
94     self->d               = NULL;
95   }
96   return (struct mschm_decompressor *) self;
97 }
98 
99 /***************************************
100  * MSPACK_DESTROY_CAB_DECOMPRESSOR
101  ***************************************
102  * destructor
103  */
mspack_destroy_chm_decompressor(struct mschm_decompressor * base)104 void mspack_destroy_chm_decompressor(struct mschm_decompressor *base) {
105   struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base;
106   if (self) {
107     struct mspack_system *sys = self->system;
108     if (self->d) {
109       if (self->d->infh)  sys->close(self->d->infh);
110       if (self->d->state) lzxd_free(self->d->state);
111       sys->free(self->d);
112     }
113     sys->free(self);
114   }
115 }
116 
117 /***************************************
118  * CHMD_OPEN
119  ***************************************
120  * opens a file and tries to read it as a CHM file.
121  * Calls chmd_real_open() with entire=1.
122  */
chmd_open(struct mschm_decompressor * base,const char * filename)123 static struct mschmd_header *chmd_open(struct mschm_decompressor *base,
124                                        const char *filename)
125 {
126   return chmd_real_open(base, filename, 1);
127 }
128 
129 /***************************************
130  * CHMD_FAST_OPEN
131  ***************************************
132  * opens a file and tries to read it as a CHM file, but does not read
133  * the file headers. Calls chmd_real_open() with entire=0
134  */
chmd_fast_open(struct mschm_decompressor * base,const char * filename)135 static struct mschmd_header *chmd_fast_open(struct mschm_decompressor *base,
136                                             const char *filename)
137 {
138   return chmd_real_open(base, filename, 0);
139 }
140 
141 /***************************************
142  * CHMD_REAL_OPEN
143  ***************************************
144  * the real implementation of chmd_open() and chmd_fast_open(). It simply
145  * passes the "entire" parameter to chmd_read_headers(), which will then
146  * either read all headers, or a bare mininum.
147  */
chmd_real_open(struct mschm_decompressor * base,const char * filename,int entire)148 static struct mschmd_header *chmd_real_open(struct mschm_decompressor *base,
149                                             const char *filename, int entire)
150 {
151   struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base;
152   struct mschmd_header *chm = NULL;
153   struct mspack_system *sys;
154   struct mspack_file *fh;
155   int error;
156 
157   if (!base) return NULL;
158   sys = self->system;
159 
160   if ((fh = sys->open(sys, filename, MSPACK_SYS_OPEN_READ))) {
161     if ((chm = (struct mschmd_header *) sys->alloc(sys, sizeof(struct mschmd_header)))) {
162       chm->filename = filename;
163       error = chmd_read_headers(sys, fh, chm, entire);
164       if (error) {
165         /* if the error is DATAFORMAT, and there are some results, return
166          * partial results with a warning, rather than nothing */
167         if (error == MSPACK_ERR_DATAFORMAT && (chm->files || chm->sysfiles)) {
168           sys->message(fh, "WARNING; contents are corrupt");
169           error = MSPACK_ERR_OK;
170         }
171         else {
172           chmd_close(base, chm);
173           chm = NULL;
174         }
175       }
176       self->error = error;
177     }
178     else {
179       self->error = MSPACK_ERR_NOMEMORY;
180     }
181     sys->close(fh);
182   }
183   else {
184     self->error = MSPACK_ERR_OPEN;
185   }
186   return chm;
187 }
188 
189 /***************************************
190  * CHMD_CLOSE
191  ***************************************
192  * frees all memory associated with a given mschmd_header
193  */
chmd_close(struct mschm_decompressor * base,struct mschmd_header * chm)194 static void chmd_close(struct mschm_decompressor *base,
195                        struct mschmd_header *chm)
196 {
197   struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base;
198   struct mschmd_file *fi, *nfi;
199   struct mspack_system *sys;
200   unsigned int i;
201 
202   if (!base) return;
203   sys = self->system;
204 
205   self->error = MSPACK_ERR_OK;
206 
207   /* free files */
208   for (fi = chm->files; fi; fi = nfi) {
209     nfi = fi->next;
210     sys->free(fi);
211   }
212   for (fi = chm->sysfiles; fi; fi = nfi) {
213     nfi = fi->next;
214     sys->free(fi);
215   }
216 
217   /* if this CHM was being decompressed, free decompression state */
218   if (self->d && (self->d->chm == chm)) {
219     if (self->d->infh) sys->close(self->d->infh);
220     if (self->d->state) lzxd_free(self->d->state);
221     sys->free(self->d);
222     self->d = NULL;
223   }
224 
225   /* if this CHM had a chunk cache, free it and contents */
226   if (chm->chunk_cache) {
227       for (i = 0; i < chm->num_chunks; i++) sys->free(chm->chunk_cache[i]);
228       sys->free(chm->chunk_cache);
229   }
230 
231   sys->free(chm);
232 }
233 
234 /***************************************
235  * CHMD_READ_HEADERS
236  ***************************************
237  * reads the basic CHM file headers. If the "entire" parameter is
238  * non-zero, all file entries will also be read. fills out a pre-existing
239  * mschmd_header structure, allocates memory for files as necessary
240  */
241 
242 /* The GUIDs found in CHM headers */
243 static const unsigned char guids[32] = {
244   /* {7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC} */
245   0x10, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11,
246   0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC,
247   /* {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC} */
248   0x11, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11,
249   0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC
250 };
251 
252 /* reads an encoded integer into a variable; 7 bits of data per byte,
253  * the high bit is used to indicate that there is another byte */
254 #define READ_ENCINT(var) do {                   \
255     (var) = 0;                                  \
256     do {                                        \
257         if (p >= end) goto chunk_end;           \
258         (var) = ((var) << 7) | (*p & 0x7F);     \
259     } while (*p++ & 0x80);                      \
260 } while (0)
261 
chmd_read_headers(struct mspack_system * sys,struct mspack_file * fh,struct mschmd_header * chm,int entire)262 static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh,
263                              struct mschmd_header *chm, int entire)
264 {
265   unsigned int section, name_len, x, errors, num_chunks;
266   unsigned char buf[0x54], *chunk = NULL, *name, *p, *end;
267   struct mschmd_file *fi, *link = NULL;
268   off_t offset, length;
269   int num_entries;
270 
271   /* initialise pointers */
272   chm->files         = NULL;
273   chm->sysfiles      = NULL;
274   chm->chunk_cache   = NULL;
275   chm->sec0.base.chm = chm;
276   chm->sec0.base.id  = 0;
277   chm->sec1.base.chm = chm;
278   chm->sec1.base.id  = 1;
279   chm->sec1.content  = NULL;
280   chm->sec1.control  = NULL;
281   chm->sec1.spaninfo = NULL;
282   chm->sec1.rtable   = NULL;
283 
284   /* read the first header */
285   if (sys->read(fh, &buf[0], chmhead_SIZEOF) != chmhead_SIZEOF) {
286     return MSPACK_ERR_READ;
287   }
288 
289   /* check ITSF signature */
290   if (EndGetI32(&buf[chmhead_Signature]) != 0x46535449) {
291     return MSPACK_ERR_SIGNATURE;
292   }
293 
294   /* check both header GUIDs */
295   if (memcmp(&buf[chmhead_GUID1], &guids[0], 32L) != 0) {
296     D(("incorrect GUIDs"))
297     return MSPACK_ERR_SIGNATURE;
298   }
299 
300   chm->version   = EndGetI32(&buf[chmhead_Version]);
301   chm->timestamp = EndGetM32(&buf[chmhead_Timestamp]);
302   chm->language  = EndGetI32(&buf[chmhead_LanguageID]);
303   if (chm->version > 3) {
304     sys->message(fh, "WARNING; CHM version > 3");
305   }
306 
307   /* read the header section table */
308   if (sys->read(fh, &buf[0], chmhst3_SIZEOF) != chmhst3_SIZEOF) {
309     return MSPACK_ERR_READ;
310   }
311 
312   /* chmhst3_OffsetCS0 does not exist in version 1 or 2 CHM files.
313    * The offset will be corrected later, once HS1 is read.
314    */
315   if (read_off64(&offset,           &buf[chmhst_OffsetHS0],  sys, fh) ||
316       read_off64(&chm->dir_offset,  &buf[chmhst_OffsetHS1],  sys, fh) ||
317       read_off64(&chm->sec0.offset, &buf[chmhst3_OffsetCS0], sys, fh))
318   {
319     return MSPACK_ERR_DATAFORMAT;
320   }
321 
322   /* seek to header section 0 */
323   if (sys->seek(fh, offset, MSPACK_SYS_SEEK_START)) {
324     return MSPACK_ERR_SEEK;
325   }
326 
327   /* read header section 0 */
328   if (sys->read(fh, &buf[0], chmhs0_SIZEOF) != chmhs0_SIZEOF) {
329     return MSPACK_ERR_READ;
330   }
331   if (read_off64(&chm->length, &buf[chmhs0_FileLen], sys, fh)) {
332     return MSPACK_ERR_DATAFORMAT;
333   }
334 
335   /* seek to header section 1 */
336   if (sys->seek(fh, chm->dir_offset, MSPACK_SYS_SEEK_START)) {
337     return MSPACK_ERR_SEEK;
338   }
339 
340   /* read header section 1 */
341   if (sys->read(fh, &buf[0], chmhs1_SIZEOF) != chmhs1_SIZEOF) {
342     return MSPACK_ERR_READ;
343   }
344 
345   chm->dir_offset = sys->tell(fh);
346   chm->chunk_size = EndGetI32(&buf[chmhs1_ChunkSize]);
347   chm->density    = EndGetI32(&buf[chmhs1_Density]);
348   chm->depth      = EndGetI32(&buf[chmhs1_Depth]);
349   chm->index_root = EndGetI32(&buf[chmhs1_IndexRoot]);
350   chm->num_chunks = EndGetI32(&buf[chmhs1_NumChunks]);
351   chm->first_pmgl = EndGetI32(&buf[chmhs1_FirstPMGL]);
352   chm->last_pmgl  = EndGetI32(&buf[chmhs1_LastPMGL]);
353 
354   if (chm->version < 3) {
355     /* versions before 3 don't have chmhst3_OffsetCS0 */
356     chm->sec0.offset = chm->dir_offset + (chm->chunk_size * chm->num_chunks);
357   }
358 
359   /* check if content offset or file size is wrong */
360   if (chm->sec0.offset > chm->length) {
361     D(("content section begins after file has ended"))
362     return MSPACK_ERR_DATAFORMAT;
363   }
364 
365   /* ensure there are chunks and that chunk size is
366    * large enough for signature and num_entries */
367   if (chm->chunk_size < (pmgl_Entries + 2)) {
368     D(("chunk size not large enough"))
369     return MSPACK_ERR_DATAFORMAT;
370   }
371   if (chm->num_chunks == 0) {
372     D(("no chunks"))
373     return MSPACK_ERR_DATAFORMAT;
374   }
375 
376   /* The chunk_cache data structure is not great; large values for num_chunks
377    * or num_chunks*chunk_size can exhaust all memory. Until a better chunk
378    * cache is implemented, put arbitrary limits on num_chunks and chunk size.
379    */
380   if (chm->num_chunks > 100000) {
381     D(("more than 100,000 chunks"))
382     return MSPACK_ERR_DATAFORMAT;
383   }
384   if (chm->chunk_size > 8192) {
385     D(("chunk size over 8192 (get in touch if this is valid)"))
386     return MSPACK_ERR_DATAFORMAT;
387   }
388   if ((off_t)chm->chunk_size * (off_t)chm->num_chunks > chm->length) {
389     D(("chunks larger than entire file"))
390     return MSPACK_ERR_DATAFORMAT;
391   }
392 
393   /* common sense checks on header section 1 fields */
394   if (chm->chunk_size != 4096) {
395     sys->message(fh, "WARNING; chunk size is not 4096");
396   }
397   if (chm->first_pmgl != 0) {
398     sys->message(fh, "WARNING; first PMGL chunk is not zero");
399   }
400   if (chm->first_pmgl > chm->last_pmgl) {
401     D(("first pmgl chunk is after last pmgl chunk"))
402     return MSPACK_ERR_DATAFORMAT;
403   }
404   if (chm->index_root != 0xFFFFFFFF && chm->index_root >= chm->num_chunks) {
405     D(("index_root outside valid range"))
406     return MSPACK_ERR_DATAFORMAT;
407   }
408 
409   /* if we are doing a quick read, stop here! */
410   if (!entire) {
411     return MSPACK_ERR_OK;
412   }
413 
414   /* seek to the first PMGL chunk, and reduce the number of chunks to read */
415   if ((x = chm->first_pmgl) != 0) {
416     if (sys->seek(fh,(off_t) (x * chm->chunk_size), MSPACK_SYS_SEEK_CUR)) {
417       return MSPACK_ERR_SEEK;
418     }
419   }
420   num_chunks = chm->last_pmgl - x + 1;
421 
422   if (!(chunk = (unsigned char *) sys->alloc(sys, (size_t)chm->chunk_size))) {
423     return MSPACK_ERR_NOMEMORY;
424   }
425 
426   /* read and process all chunks from FirstPMGL to LastPMGL */
427   errors = 0;
428   while (num_chunks--) {
429     /* read next chunk */
430     if (sys->read(fh, chunk, (int)chm->chunk_size) != (int)chm->chunk_size) {
431       sys->free(chunk);
432       return MSPACK_ERR_READ;
433     }
434 
435     /* process only directory (PMGL) chunks */
436     if (EndGetI32(&chunk[pmgl_Signature]) != 0x4C474D50) continue;
437 
438     if (EndGetI32(&chunk[pmgl_QuickRefSize]) < 2) {
439       sys->message(fh, "WARNING; PMGL quickref area is too small");
440     }
441     if (EndGetI32(&chunk[pmgl_QuickRefSize]) >
442         (chm->chunk_size - pmgl_Entries))
443     {
444       sys->message(fh, "WARNING; PMGL quickref area is too large");
445     }
446 
447     p = &chunk[pmgl_Entries];
448     end = &chunk[chm->chunk_size - 2];
449     num_entries = EndGetI16(end);
450 
451     while (num_entries--) {
452       READ_ENCINT(name_len);
453       if (name_len > (unsigned int) (end - p)) goto chunk_end;
454       name = p; p += name_len;
455       READ_ENCINT(section);
456       READ_ENCINT(offset);
457       READ_ENCINT(length);
458 
459       /* ignore blank or one-char (e.g. "/") filenames we'd return as blank */
460       if (name_len < 2 || !name[0] || !name[1]) continue;
461 
462       /* empty files and directory names are stored as a file entry at
463        * offset 0 with length 0. We want to keep empty files, but not
464        * directory names, which end with a "/" */
465       if ((offset == 0) && (length == 0)) {
466         if ((name_len > 0) && (name[name_len-1] == '/')) continue;
467       }
468 
469       if (section > 1) {
470         sys->message(fh, "invalid section number '%u'.", section);
471         continue;
472       }
473 
474       if (!(fi = (struct mschmd_file *) sys->alloc(sys, sizeof(struct mschmd_file) + name_len + 1))) {
475         sys->free(chunk);
476         return MSPACK_ERR_NOMEMORY;
477       }
478 
479       fi->next     = NULL;
480       fi->filename = (char *) &fi[1];
481       fi->section  = ((section == 0) ? (struct mschmd_section *) (&chm->sec0)
482                                      : (struct mschmd_section *) (&chm->sec1));
483       fi->offset   = offset;
484       fi->length   = length;
485       sys->copy(name, fi->filename, (size_t) name_len);
486       fi->filename[name_len] = '\0';
487 
488       if (name[0] == ':' && name[1] == ':') {
489         /* system file */
490         if (name_len == 40 && memcmp(name, content_name, 40) == 0) {
491           chm->sec1.content = fi;
492         }
493         else if (name_len == 44 && memcmp(name, control_name, 44) == 0) {
494           chm->sec1.control = fi;
495         }
496         else if (name_len == 41 && memcmp(name, spaninfo_name, 41) == 0) {
497           chm->sec1.spaninfo = fi;
498         }
499         else if (name_len == 105 && memcmp(name, rtable_name, 105) == 0) {
500           chm->sec1.rtable = fi;
501         }
502         fi->next = chm->sysfiles;
503         chm->sysfiles = fi;
504       }
505       else {
506         /* normal file */
507         if (link) link->next = fi; else chm->files = fi;
508         link = fi;
509       }
510     }
511 
512     /* this is reached either when num_entries runs out, or if
513      * reading data from the chunk reached a premature end of chunk */
514   chunk_end:
515     if (num_entries >= 0) {
516       D(("chunk ended before all entries could be read"))
517       errors++;
518     }
519 
520   }
521   sys->free(chunk);
522   return (errors > 0) ? MSPACK_ERR_DATAFORMAT : MSPACK_ERR_OK;
523 }
524 
525 /***************************************
526  * CHMD_FAST_FIND
527  ***************************************
528  * uses PMGI index chunks and quickref data to quickly locate a file
529  * directly from the on-disk index.
530  *
531  * TODO: protect against infinite loops in chunks (where pgml_NextChunk
532  * or a PMGI index entry point to an already visited chunk)
533  */
chmd_fast_find(struct mschm_decompressor * base,struct mschmd_header * chm,const char * filename,struct mschmd_file * f_ptr,int f_size)534 static int chmd_fast_find(struct mschm_decompressor *base,
535                           struct mschmd_header *chm, const char *filename,
536                           struct mschmd_file *f_ptr, int f_size)
537 {
538     struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base;
539     struct mspack_system *sys;
540     struct mspack_file *fh;
541     /* p and end are initialised to prevent MSVC warning about "potentially"
542      * uninitialised usage. This is provably untrue, but MS won't fix:
543      * https://developercommunity.visualstudio.com/content/problem/363489/c4701-false-positive-warning.html */
544     const unsigned char *chunk, *p = NULL, *end = NULL;
545     int err = MSPACK_ERR_OK, result = -1;
546     unsigned int n, sec;
547 
548     if (!self || !chm || !f_ptr || (f_size != sizeof(struct mschmd_file))) {
549         return MSPACK_ERR_ARGS;
550     }
551     sys = self->system;
552 
553     /* clear the results structure */
554     memset(f_ptr, 0, f_size);
555 
556     if (!(fh = sys->open(sys, chm->filename, MSPACK_SYS_OPEN_READ))) {
557         return MSPACK_ERR_OPEN;
558     }
559 
560     /* go through PMGI chunk hierarchy to reach PMGL chunk */
561     if (chm->index_root < chm->num_chunks) {
562         n = chm->index_root;
563         for (;;) {
564             if (!(chunk = read_chunk(self, chm, fh, n))) {
565                 sys->close(fh);
566                 return self->error;
567             }
568 
569             /* search PMGI/PMGL chunk. exit early if no entry found */
570             if ((result = search_chunk(chm, chunk, filename, &p, &end)) <= 0) {
571                 break;
572             }
573 
574             /* found result. loop around for next chunk if this is PMGI */
575             if (chunk[3] == 0x4C) break; else READ_ENCINT(n);
576         }
577     }
578     else {
579         /* PMGL chunks only, search from first_pmgl to last_pmgl */
580         for (n = chm->first_pmgl; n <= chm->last_pmgl;
581              n = EndGetI32(&chunk[pmgl_NextChunk]))
582         {
583             if (!(chunk = read_chunk(self, chm, fh, n))) {
584                 err = self->error;
585                 break;
586             }
587 
588             /* search PMGL chunk. exit if file found */
589             if ((result = search_chunk(chm, chunk, filename, &p, &end)) > 0) {
590                 break;
591             }
592 
593             /* stop simple infinite loops: can't visit the same chunk twice */
594             if (n == EndGetI32(&chunk[pmgl_NextChunk])) {
595                 break;
596             }
597         }
598     }
599 
600     /* if we found a file, read it */
601     if (result > 0) {
602         READ_ENCINT(sec);
603         f_ptr->section  = (sec == 0) ? (struct mschmd_section *) &chm->sec0
604                                      : (struct mschmd_section *) &chm->sec1;
605         READ_ENCINT(f_ptr->offset);
606         READ_ENCINT(f_ptr->length);
607     }
608     else if (result < 0) {
609         err = MSPACK_ERR_DATAFORMAT;
610     }
611 
612     sys->close(fh);
613     return self->error = err;
614 
615  chunk_end:
616     D(("read beyond end of chunk entries"))
617     sys->close(fh);
618     return self->error = MSPACK_ERR_DATAFORMAT;
619 }
620 
621 /* reads the given chunk into memory, storing it in a chunk cache
622  * so it doesn't need to be read from disk more than once
623  */
read_chunk(struct mschm_decompressor_p * self,struct mschmd_header * chm,struct mspack_file * fh,unsigned int chunk_num)624 static unsigned char *read_chunk(struct mschm_decompressor_p *self,
625                                  struct mschmd_header *chm,
626                                  struct mspack_file *fh,
627                                  unsigned int chunk_num)
628 {
629     struct mspack_system *sys = self->system;
630     unsigned char *buf;
631 
632     /* check arguments - most are already checked by chmd_fast_find */
633     if (chunk_num >= chm->num_chunks) return NULL;
634 
635     /* ensure chunk cache is available */
636     if (!chm->chunk_cache) {
637         size_t size = sizeof(unsigned char *) * chm->num_chunks;
638         if (!(chm->chunk_cache = (unsigned char **) sys->alloc(sys, size))) {
639             self->error = MSPACK_ERR_NOMEMORY;
640             return NULL;
641         }
642         memset(chm->chunk_cache, 0, size);
643     }
644 
645     /* try to answer out of chunk cache */
646     if (chm->chunk_cache[chunk_num]) return chm->chunk_cache[chunk_num];
647 
648     /* need to read chunk - allocate memory for it */
649     if (!(buf = (unsigned char *) sys->alloc(sys, chm->chunk_size))) {
650         self->error = MSPACK_ERR_NOMEMORY;
651         return NULL;
652     }
653 
654     /* seek to block and read it */
655     if (sys->seek(fh, (off_t) (chm->dir_offset + (chunk_num * chm->chunk_size)),
656                       MSPACK_SYS_SEEK_START))
657     {
658         self->error = MSPACK_ERR_SEEK;
659         sys->free(buf);
660         return NULL;
661     }
662     if (sys->read(fh, buf, (int)chm->chunk_size) != (int)chm->chunk_size) {
663         self->error = MSPACK_ERR_READ;
664         sys->free(buf);
665         return NULL;
666     }
667 
668     /* check the signature. Is is PMGL or PMGI? */
669     if (!((buf[0] == 0x50) && (buf[1] == 0x4D) && (buf[2] == 0x47) &&
670           ((buf[3] == 0x4C) || (buf[3] == 0x49))))
671     {
672         self->error = MSPACK_ERR_SEEK;
673         sys->free(buf);
674         return NULL;
675     }
676 
677     /* all OK. Store chunk in cache and return it */
678     return chm->chunk_cache[chunk_num] = buf;
679 }
680 
681 /* searches a PMGI/PMGL chunk for a given filename entry. Returns -1 on
682  * data format error, 0 if entry definitely not found, 1 if entry
683  * found. In the latter case, *result and *result_end are set pointing
684  * to that entry's data (either the "next chunk" ENCINT for a PMGI or
685  * the section, offset and length ENCINTs for a PMGL).
686  *
687  * In the case of PMGL chunks, the entry has definitely been
688  * found. In the case of PMGI chunks, the entry which points to the
689  * chunk that may eventually contain that entry has been found.
690  */
search_chunk(struct mschmd_header * chm,const unsigned char * chunk,const char * filename,const unsigned char ** result,const unsigned char ** result_end)691 static int search_chunk(struct mschmd_header *chm,
692                         const unsigned char *chunk,
693                         const char *filename,
694                         const unsigned char **result,
695                         const unsigned char **result_end)
696 {
697     const unsigned char *start, *end, *p;
698     unsigned int qr_size, num_entries, qr_entries, qr_density, name_len;
699     unsigned int L, R, M, fname_len, entries_off, is_pmgl;
700     int cmp;
701 
702     fname_len = strlen(filename);
703 
704     /* PMGL chunk or PMGI chunk? (note: read_chunk() has already
705      * checked the rest of the characters in the chunk signature) */
706     if (chunk[3] == 0x4C) {
707         is_pmgl = 1;
708         entries_off = pmgl_Entries;
709     }
710     else {
711         is_pmgl = 0;
712         entries_off = pmgi_Entries;
713     }
714 
715     /*  Step 1: binary search first filename of each QR entry
716      *  - target filename == entry
717      *    found file
718      *  - target filename < all entries
719      *    file not found
720      *  - target filename > all entries
721      *    proceed to step 2 using final entry
722      *  - target filename between two searched entries
723      *    proceed to step 2
724      */
725     qr_size     = EndGetI32(&chunk[pmgl_QuickRefSize]);
726     start       = &chunk[chm->chunk_size - 2];
727     end         = &chunk[chm->chunk_size - qr_size];
728     num_entries = EndGetI16(start);
729     qr_density  = 1 + (1 << chm->density);
730     qr_entries  = (num_entries + qr_density-1) / qr_density;
731 
732     if (num_entries == 0) {
733         D(("chunk has no entries"))
734         return -1;
735     }
736 
737     if (qr_size > chm->chunk_size) {
738         D(("quickref size > chunk size"))
739         return -1;
740     }
741 
742     *result_end = end;
743 
744     if (((int)qr_entries * 2) > (start - end)) {
745         D(("WARNING; more quickrefs than quickref space"))
746         qr_entries = 0; /* but we can live with it */
747     }
748 
749     if (qr_entries > 0) {
750         L = 0;
751         R = qr_entries - 1;
752         do {
753             /* pick new midpoint */
754             M = (L + R) >> 1;
755 
756             /* compare filename with entry QR points to */
757             p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)];
758             READ_ENCINT(name_len);
759             if (name_len > (unsigned int) (end - p)) goto chunk_end;
760             cmp = compare(filename, (char *)p, fname_len, name_len);
761 
762             if (cmp == 0) break;
763             else if (cmp < 0) { if (M) R = M - 1; else return 0; }
764             else if (cmp > 0) L = M + 1;
765         } while (L <= R);
766         M = (L + R) >> 1;
767 
768         if (cmp == 0) {
769             /* exact match! */
770             p += name_len;
771             *result = p;
772             return 1;
773         }
774 
775         /* otherwise, read the group of entries for QR entry M */
776         p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)];
777         num_entries -= (M * qr_density);
778         if (num_entries > qr_density) num_entries = qr_density;
779     }
780     else {
781         p = &chunk[entries_off];
782     }
783 
784     /* Step 2: linear search through the set of entries reached in step 1.
785      * - filename == any entry
786      *   found entry
787      * - filename < all entries (PMGI) or any entry (PMGL)
788      *   entry not found, stop now
789      * - filename > all entries
790      *   entry not found (PMGL) / maybe found (PMGI)
791      * -
792      */
793     *result = NULL;
794     while (num_entries-- > 0) {
795         READ_ENCINT(name_len);
796         if (name_len > (unsigned int) (end - p)) goto chunk_end;
797         cmp = compare(filename, (char *)p, fname_len, name_len);
798         p += name_len;
799 
800         if (cmp == 0) {
801             /* entry found */
802             *result = p;
803             return 1;
804         }
805 
806         if (cmp < 0) {
807             /* entry not found (PMGL) / maybe found (PMGI) */
808             break;
809         }
810 
811         /* read and ignore the rest of this entry */
812         if (is_pmgl) {
813             READ_ENCINT(R); /* skip section */
814             READ_ENCINT(R); /* skip offset */
815             READ_ENCINT(R); /* skip length */
816         }
817         else {
818             *result = p; /* store potential final result */
819             READ_ENCINT(R); /* skip chunk number */
820         }
821     }
822 
823      /* PMGL? not found. PMGI? maybe found */
824      return (is_pmgl) ? 0 : (*result ? 1 : 0);
825 
826  chunk_end:
827     D(("reached end of chunk data while searching"))
828     return -1;
829 }
830 
831 #if HAVE_TOWLOWER
832 # include <wctype.h>
833 # define TOLOWER(x) towlower(x)
834 #else
835 # include <ctype.h>
836 # define TOLOWER(x) tolower(x)
837 #endif
838 
839 /* decodes a UTF-8 character from s[] into c. Will not read past e.
840  * doesn't test that extension bytes are %10xxxxxx.
841  * allows some overlong encodings.
842  */
843 #define GET_UTF8_CHAR(s, e, c) do {                                     \
844     unsigned char x = *s++;                                             \
845     if (x < 0x80) c = x;                                                \
846     else if (x >= 0xC2 && x < 0xE0 && s < e) {                          \
847         c = (x & 0x1F) << 6 | (*s++ & 0x3F);                            \
848     }                                                                   \
849     else if (x >= 0xE0 && x < 0xF0 && s+1 < e) {                        \
850         c = (x & 0x0F) << 12 | (s[0] & 0x3F) << 6 | (s[1] & 0x3F);      \
851         s += 2;                                                         \
852     }                                                                   \
853     else if (x >= 0xF0 && x <= 0xF5 && s+2 < e) {                       \
854         c = (x & 0x07) << 18 | (s[0] & 0x3F) << 12 |                    \
855             (s[1] & 0x3F) << 6 | (s[2] & 0x3F);                         \
856         if (c > 0x10FFFF) c = 0xFFFD;                                   \
857         s += 3;                                                         \
858     }                                                                   \
859     else c = 0xFFFD;                                                    \
860 } while (0)
861 
862 /* case-insensitively compares two UTF8 encoded strings. String length for
863  * both strings must be provided, null bytes are not terminators */
compare(const char * s1,const char * s2,int l1,int l2)864 static inline int compare(const char *s1, const char *s2, int l1, int l2) {
865     register const unsigned char *p1 = (const unsigned char *) s1;
866     register const unsigned char *p2 = (const unsigned char *) s2;
867     register const unsigned char *e1 = p1 + l1, *e2 = p2 + l2;
868     int c1, c2;
869 
870     while (p1 < e1 && p2 < e2) {
871         GET_UTF8_CHAR(p1, e1, c1);
872         GET_UTF8_CHAR(p2, e2, c2);
873         if (c1 == c2) continue;
874         c1 = TOLOWER(c1);
875         c2 = TOLOWER(c2);
876         if (c1 != c2) return c1 - c2;
877     }
878     return l1 - l2;
879 }
880 
881 
882 /***************************************
883  * CHMD_EXTRACT
884  ***************************************
885  * extracts a file from a CHM helpfile
886  */
chmd_extract(struct mschm_decompressor * base,struct mschmd_file * file,const char * filename)887 static int chmd_extract(struct mschm_decompressor *base,
888                         struct mschmd_file *file, const char *filename)
889 {
890   struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base;
891   struct mspack_system *sys;
892   struct mschmd_header *chm;
893   struct mspack_file *fh;
894   off_t bytes;
895 
896   if (!self) return MSPACK_ERR_ARGS;
897   if (!file || !file->section) return self->error = MSPACK_ERR_ARGS;
898   sys = self->system;
899   chm = file->section->chm;
900 
901   /* create decompression state if it doesn't exist */
902   if (!self->d) {
903     self->d = (struct mschmd_decompress_state *) sys->alloc(sys, sizeof(struct mschmd_decompress_state));
904     if (!self->d) return self->error = MSPACK_ERR_NOMEMORY;
905     self->d->chm       = chm;
906     self->d->offset    = 0;
907     self->d->state     = NULL;
908     self->d->sys       = *sys;
909     self->d->sys.write = &chmd_sys_write;
910     self->d->infh      = NULL;
911     self->d->outfh     = NULL;
912   }
913 
914   /* open input chm file if not open, or the open one is a different chm */
915   if (!self->d->infh || (self->d->chm != chm)) {
916     if (self->d->infh)  sys->close(self->d->infh);
917     if (self->d->state) lzxd_free(self->d->state);
918     self->d->chm    = chm;
919     self->d->offset = 0;
920     self->d->state  = NULL;
921     self->d->infh   = sys->open(sys, chm->filename, MSPACK_SYS_OPEN_READ);
922     if (!self->d->infh) return self->error = MSPACK_ERR_OPEN;
923   }
924 
925   /* open file for output */
926   if (!(fh = sys->open(sys, filename, MSPACK_SYS_OPEN_WRITE))) {
927     return self->error = MSPACK_ERR_OPEN;
928   }
929 
930   /* if file is empty, simply creating it is enough */
931   if (!file->length) {
932     sys->close(fh);
933     return self->error = MSPACK_ERR_OK;
934   }
935 
936   self->error = MSPACK_ERR_OK;
937 
938   switch (file->section->id) {
939   case 0: /* Uncompressed section file */
940     /* simple seek + copy */
941     if (sys->seek(self->d->infh, file->section->chm->sec0.offset
942                   + file->offset, MSPACK_SYS_SEEK_START))
943     {
944       self->error = MSPACK_ERR_SEEK;
945     }
946     else {
947       unsigned char buf[512];
948       off_t length = file->length;
949       while (length > 0) {
950         int run = sizeof(buf);
951         if ((off_t)run > length) run = (int)length;
952         if (sys->read(self->d->infh, &buf[0], run) != run) {
953           self->error = MSPACK_ERR_READ;
954           break;
955         }
956         if (sys->write(fh, &buf[0], run) != run) {
957           self->error = MSPACK_ERR_WRITE;
958           break;
959         }
960         length -= run;
961       }
962     }
963     break;
964 
965   case 1: /* MSCompressed section file */
966     /* (re)initialise compression state if we it is not yet initialised,
967      * or we have advanced too far and have to backtrack
968      */
969     if (!self->d->state || (file->offset < self->d->offset)) {
970       if (self->d->state) {
971         lzxd_free(self->d->state);
972         self->d->state = NULL;
973       }
974       if (chmd_init_decomp(self, file)) break;
975     }
976 
977     /* seek to input data */
978     if (sys->seek(self->d->infh, self->d->inoffset, MSPACK_SYS_SEEK_START)) {
979       self->error = MSPACK_ERR_SEEK;
980       break;
981     }
982 
983     /* get to correct offset. */
984     self->d->outfh = NULL;
985     if ((bytes = file->offset - self->d->offset)) {
986       self->error = lzxd_decompress(self->d->state, bytes);
987     }
988 
989     /* if getting to the correct offset was error free, unpack file */
990     if (!self->error) {
991       self->d->outfh = fh;
992       self->error = lzxd_decompress(self->d->state, file->length);
993     }
994 
995     /* save offset in input source stream, in case there is a section 0
996      * file between now and the next section 1 file extracted */
997     self->d->inoffset = sys->tell(self->d->infh);
998 
999     /* if an LZX error occured, the LZX decompressor is now useless */
1000     if (self->error) {
1001       if (self->d->state) lzxd_free(self->d->state);
1002       self->d->state = NULL;
1003     }
1004     break;
1005   }
1006 
1007   sys->close(fh);
1008   return self->error;
1009 }
1010 
1011 /***************************************
1012  * CHMD_SYS_WRITE
1013  ***************************************
1014  * chmd_sys_write is the internal writer function which the decompressor
1015  * uses. If either writes data to disk (self->d->outfh) with the real
1016  * sys->write() function, or does nothing with the data when
1017  * self->d->outfh == NULL. advances self->d->offset.
1018  */
chmd_sys_write(struct mspack_file * file,void * buffer,int bytes)1019 static int chmd_sys_write(struct mspack_file *file, void *buffer, int bytes) {
1020   struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) file;
1021   self->d->offset += bytes;
1022   if (self->d->outfh) {
1023     return self->system->write(self->d->outfh, buffer, bytes);
1024   }
1025   return bytes;
1026 }
1027 
1028 /***************************************
1029  * CHMD_INIT_DECOMP
1030  ***************************************
1031  * Initialises the LZX decompressor to decompress the compressed stream,
1032  * from the nearest reset offset and length that is needed for the given
1033  * file.
1034  */
chmd_init_decomp(struct mschm_decompressor_p * self,struct mschmd_file * file)1035 static int chmd_init_decomp(struct mschm_decompressor_p *self,
1036                             struct mschmd_file *file)
1037 {
1038   int window_size, window_bits, reset_interval, entry, err;
1039   struct mspack_system *sys = self->system;
1040   struct mschmd_sec_mscompressed *sec;
1041   unsigned char *data;
1042   off_t length, offset;
1043 
1044   sec = (struct mschmd_sec_mscompressed *) file->section;
1045 
1046   /* ensure we have a mscompressed content section */
1047   err = find_sys_file(self, sec, &sec->content, content_name);
1048   if (err) return self->error = err;
1049 
1050   /* ensure we have a ControlData file */
1051   err = find_sys_file(self, sec, &sec->control, control_name);
1052   if (err) return self->error = err;
1053 
1054   /* read ControlData */
1055   if (sec->control->length < lzxcd_SIZEOF) {
1056     D(("ControlData file is too short"))
1057     return self->error = MSPACK_ERR_DATAFORMAT;
1058   }
1059   if (!(data = read_sys_file(self, sec->control))) {
1060     D(("can't read mscompressed control data file"))
1061     return self->error;
1062   }
1063 
1064   /* check LZXC signature */
1065   if (EndGetI32(&data[lzxcd_Signature]) != 0x43585A4C) {
1066     sys->free(data);
1067     return self->error = MSPACK_ERR_SIGNATURE;
1068   }
1069 
1070   /* read reset_interval and window_size and validate version number */
1071   switch (EndGetI32(&data[lzxcd_Version])) {
1072   case 1:
1073     reset_interval = EndGetI32(&data[lzxcd_ResetInterval]);
1074     window_size    = EndGetI32(&data[lzxcd_WindowSize]);
1075     break;
1076   case 2:
1077     reset_interval = EndGetI32(&data[lzxcd_ResetInterval]) * LZX_FRAME_SIZE;
1078     window_size    = EndGetI32(&data[lzxcd_WindowSize])    * LZX_FRAME_SIZE;
1079     break;
1080   default:
1081     D(("bad controldata version"))
1082     sys->free(data);
1083     return self->error = MSPACK_ERR_DATAFORMAT;
1084   }
1085 
1086   /* free ControlData */
1087   sys->free(data);
1088 
1089   /* find window_bits from window_size */
1090   switch (window_size) {
1091   case 0x008000: window_bits = 15; break;
1092   case 0x010000: window_bits = 16; break;
1093   case 0x020000: window_bits = 17; break;
1094   case 0x040000: window_bits = 18; break;
1095   case 0x080000: window_bits = 19; break;
1096   case 0x100000: window_bits = 20; break;
1097   case 0x200000: window_bits = 21; break;
1098   default:
1099     D(("bad controldata window size"))
1100     return self->error = MSPACK_ERR_DATAFORMAT;
1101   }
1102 
1103   /* validate reset_interval */
1104   if (reset_interval == 0 || reset_interval % LZX_FRAME_SIZE) {
1105     D(("bad controldata reset interval"))
1106     return self->error = MSPACK_ERR_DATAFORMAT;
1107   }
1108 
1109   /* which reset table entry would we like? */
1110   entry = file->offset / reset_interval;
1111   /* convert from reset interval multiple (usually 64k) to 32k frames */
1112   entry *= reset_interval / LZX_FRAME_SIZE;
1113 
1114   /* read the reset table entry */
1115   if (read_reset_table(self, sec, entry, &length, &offset)) {
1116     /* the uncompressed length given in the reset table is dishonest.
1117      * the uncompressed data is always padded out from the given
1118      * uncompressed length up to the next reset interval */
1119     length += reset_interval - 1;
1120     length &= -reset_interval;
1121   }
1122   else {
1123     /* if we can't read the reset table entry, just start from
1124      * the beginning. Use spaninfo to get the uncompressed length */
1125     entry = 0;
1126     offset = 0;
1127     err = read_spaninfo(self, sec, &length);
1128   }
1129   if (err) return self->error = err;
1130 
1131   /* get offset of compressed data stream:
1132    * = offset of uncompressed section from start of file
1133    * + offset of compressed stream from start of uncompressed section
1134    * + offset of chosen reset interval from start of compressed stream */
1135   self->d->inoffset = file->section->chm->sec0.offset + sec->content->offset + offset;
1136 
1137   /* set start offset and overall remaining stream length */
1138   self->d->offset = entry * LZX_FRAME_SIZE;
1139   length -= self->d->offset;
1140 
1141   /* initialise LZX stream */
1142   self->d->state = lzxd_init(&self->d->sys, self->d->infh,
1143                              (struct mspack_file *) self, window_bits,
1144                              reset_interval / LZX_FRAME_SIZE,
1145                              4096, length, 0);
1146   if (!self->d->state) self->error = MSPACK_ERR_NOMEMORY;
1147   return self->error;
1148 }
1149 
1150 /***************************************
1151  * READ_RESET_TABLE
1152  ***************************************
1153  * Reads one entry out of the reset table. Also reads the uncompressed
1154  * data length. Writes these to offset_ptr and length_ptr respectively.
1155  * Returns non-zero for success, zero for failure.
1156  */
read_reset_table(struct mschm_decompressor_p * self,struct mschmd_sec_mscompressed * sec,unsigned int entry,off_t * length_ptr,off_t * offset_ptr)1157 static int read_reset_table(struct mschm_decompressor_p *self,
1158                             struct mschmd_sec_mscompressed *sec,
1159                             unsigned int entry,
1160                             off_t *length_ptr, off_t *offset_ptr)
1161 {
1162     struct mspack_system *sys = self->system;
1163     unsigned char *data;
1164     unsigned int pos, entrysize;
1165 
1166     /* do we have a ResetTable file? */
1167     int err = find_sys_file(self, sec, &sec->rtable, rtable_name);
1168     if (err) return 0;
1169 
1170     /* read ResetTable file */
1171     if (sec->rtable->length < lzxrt_headerSIZEOF) {
1172         D(("ResetTable file is too short"))
1173         return 0;
1174     }
1175     if (!(data = read_sys_file(self, sec->rtable))) {
1176         D(("can't read reset table"))
1177         return 0;
1178     }
1179 
1180     /* check sanity of reset table */
1181     if (EndGetI32(&data[lzxrt_FrameLen]) != LZX_FRAME_SIZE) {
1182         D(("bad reset table frame length"))
1183         sys->free(data);
1184         return 0;
1185     }
1186 
1187     /* get the uncompressed length of the LZX stream */
1188     if (read_off64(length_ptr, &data[lzxrt_UncompLen], sys, self->d->infh)) {
1189         sys->free(data);
1190         return 0;
1191     }
1192 
1193     entrysize = EndGetI32(&data[lzxrt_EntrySize]);
1194     pos = EndGetI32(&data[lzxrt_TableOffset]) + (entry * entrysize);
1195 
1196     /* ensure reset table entry for this offset exists */
1197     if (entry < EndGetI32(&data[lzxrt_NumEntries]) &&
1198         pos <= (sec->rtable->length - entrysize))
1199     {
1200         switch (entrysize) {
1201         case 4:
1202             *offset_ptr = EndGetI32(&data[pos]);
1203             err = 0;
1204             break;
1205         case 8:
1206             err = read_off64(offset_ptr, &data[pos], sys, self->d->infh);
1207             break;
1208         default:
1209             D(("reset table entry size neither 4 nor 8"))
1210             err = 1;
1211             break;
1212         }
1213     }
1214     else {
1215         D(("bad reset interval"))
1216         err = 1;
1217     }
1218 
1219     /* free the reset table */
1220     sys->free(data);
1221 
1222     /* return success */
1223     return (err == 0);
1224 }
1225 
1226 /***************************************
1227  * READ_SPANINFO
1228  ***************************************
1229  * Reads the uncompressed data length from the spaninfo file.
1230  * Returns zero for success or a non-zero error code for failure.
1231  */
read_spaninfo(struct mschm_decompressor_p * self,struct mschmd_sec_mscompressed * sec,off_t * length_ptr)1232 static int read_spaninfo(struct mschm_decompressor_p *self,
1233                          struct mschmd_sec_mscompressed *sec,
1234                          off_t *length_ptr)
1235 {
1236     struct mspack_system *sys = self->system;
1237     unsigned char *data;
1238 
1239     /* find SpanInfo file */
1240     int err = find_sys_file(self, sec, &sec->spaninfo, spaninfo_name);
1241     if (err) return MSPACK_ERR_DATAFORMAT;
1242 
1243     /* check it's large enough */
1244     if (sec->spaninfo->length != 8) {
1245         D(("SpanInfo file is wrong size"))
1246         return MSPACK_ERR_DATAFORMAT;
1247     }
1248 
1249     /* read the SpanInfo file */
1250     if (!(data = read_sys_file(self, sec->spaninfo))) {
1251         D(("can't read SpanInfo file"))
1252         return self->error;
1253     }
1254 
1255     /* get the uncompressed length of the LZX stream */
1256     err = read_off64(length_ptr, data, sys, self->d->infh);
1257     sys->free(data);
1258     if (err) return MSPACK_ERR_DATAFORMAT;
1259 
1260     if (*length_ptr <= 0) {
1261         D(("output length is invalid"))
1262         return MSPACK_ERR_DATAFORMAT;
1263     }
1264 
1265     return MSPACK_ERR_OK;
1266 }
1267 
1268 /***************************************
1269  * FIND_SYS_FILE
1270  ***************************************
1271  * Uses chmd_fast_find to locate a system file, and fills out that system
1272  * file's entry and links it into the list of system files. Returns zero
1273  * for success, non-zero for both failure and the file not existing.
1274  */
find_sys_file(struct mschm_decompressor_p * self,struct mschmd_sec_mscompressed * sec,struct mschmd_file ** f_ptr,const char * name)1275 static int find_sys_file(struct mschm_decompressor_p *self,
1276                          struct mschmd_sec_mscompressed *sec,
1277                          struct mschmd_file **f_ptr, const char *name)
1278 {
1279     struct mspack_system *sys = self->system;
1280     struct mschmd_file result;
1281 
1282     /* already loaded */
1283     if (*f_ptr) return MSPACK_ERR_OK;
1284 
1285     /* try using fast_find to find the file - return DATAFORMAT error if
1286      * it fails, or successfully doesn't find the file */
1287     if (chmd_fast_find((struct mschm_decompressor *) self, sec->base.chm,
1288                        name, &result, (int)sizeof(result)) || !result.section)
1289     {
1290         return MSPACK_ERR_DATAFORMAT;
1291     }
1292 
1293     if (!(*f_ptr = (struct mschmd_file *) sys->alloc(sys, sizeof(result)))) {
1294         return MSPACK_ERR_NOMEMORY;
1295     }
1296 
1297     /* copy result */
1298     *(*f_ptr) = result;
1299     (*f_ptr)->filename = (char *) name;
1300 
1301     /* link file into sysfiles list */
1302     (*f_ptr)->next = sec->base.chm->sysfiles;
1303     sec->base.chm->sysfiles = *f_ptr;
1304     return MSPACK_ERR_OK;
1305 }
1306 
1307 /***************************************
1308  * READ_SYS_FILE
1309  ***************************************
1310  * Allocates memory for a section 0 (uncompressed) file and reads it into
1311  * memory.
1312  */
read_sys_file(struct mschm_decompressor_p * self,struct mschmd_file * file)1313 static unsigned char *read_sys_file(struct mschm_decompressor_p *self,
1314                                     struct mschmd_file *file)
1315 {
1316   struct mspack_system *sys = self->system;
1317   unsigned char *data = NULL;
1318   int len;
1319 
1320   if (!file || !file->section || (file->section->id != 0)) {
1321     self->error = MSPACK_ERR_DATAFORMAT;
1322     return NULL;
1323   }
1324 
1325   len = (int) file->length;
1326 
1327   if (!(data = (unsigned char *) sys->alloc(sys, (size_t) len))) {
1328     self->error = MSPACK_ERR_NOMEMORY;
1329     return NULL;
1330   }
1331   if (sys->seek(self->d->infh, file->section->chm->sec0.offset
1332                 + file->offset, MSPACK_SYS_SEEK_START))
1333   {
1334     self->error = MSPACK_ERR_SEEK;
1335     sys->free(data);
1336     return NULL;
1337   }
1338   if (sys->read(self->d->infh, data, len) != len) {
1339     self->error = MSPACK_ERR_READ;
1340     sys->free(data);
1341     return NULL;
1342   }
1343   return data;
1344 }
1345 
1346 /***************************************
1347  * CHMD_ERROR
1348  ***************************************
1349  * returns the last error that occurred
1350  */
chmd_error(struct mschm_decompressor * base)1351 static int chmd_error(struct mschm_decompressor *base) {
1352   struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base;
1353   return (self) ? self->error : MSPACK_ERR_ARGS;
1354 }
1355 
1356 /***************************************
1357  * READ_OFF64
1358  ***************************************
1359  * Reads a 64-bit signed integer from memory in Intel byte order.
1360  * If running on a system with a 64-bit off_t, this is simply done.
1361  * If running on a system with a 32-bit off_t, offsets up to 0x7FFFFFFF
1362  * are accepted, offsets beyond that cause an error message.
1363  */
read_off64(off_t * var,unsigned char * mem,struct mspack_system * sys,struct mspack_file * fh)1364 static int read_off64(off_t *var, unsigned char *mem,
1365                       struct mspack_system *sys, struct mspack_file *fh)
1366 {
1367 #if LARGEFILE_SUPPORT
1368     *var = EndGetI64(mem);
1369 #else
1370     *var = EndGetI32(mem);
1371     if ((*var & 0x80000000) || EndGetI32(mem+4)) {
1372         sys->message(fh, (char *)largefile_msg);
1373         return 1;
1374     }
1375 #endif
1376     return 0;
1377 }
1378