1 /*
2  * winchm.c: direct output of .CHM files.
3  */
4 
5 #include <assert.h>
6 #include <stdio.h>
7 
8 #include "halibut.h"
9 #include "tree234.h"
10 #include "lzx.h"
11 
12 #define PUT_32BIT_LSB_FIRST(cp, value) do { \
13   ((unsigned char *)cp)[0] = 0xFF & (value);      \
14   ((unsigned char *)cp)[1] = 0xFF & ((value) >> 8); \
15   ((unsigned char *)cp)[2] = 0xFF & ((value) >> 16); \
16   ((unsigned char *)cp)[3] = 0xFF & ((value) >> 24); } while (0)
17 
18 #define PUT_32BIT_MSB_FIRST(cp, value) do { \
19   ((unsigned char *)cp)[3] = 0xFF & (value); \
20   ((unsigned char *)cp)[2] = 0xFF & ((value) >> 8); \
21   ((unsigned char *)cp)[1] = 0xFF & ((value) >> 16); \
22   ((unsigned char *)cp)[0] = 0xFF & ((value) >> 24); } while (0)
23 
24 #define PUT_16BIT_LSB_FIRST(cp, value) do { \
25   ((unsigned char *)cp)[0] = 0xFF & (value); \
26   ((unsigned char *)cp)[1] = 0xFF & ((value) >> 8); } while (0)
27 
28 #define RDADD_32BIT_LSB_FIRST(rs, value) do { \
29         unsigned char out[4]; \
30         PUT_32BIT_LSB_FIRST(out, value); \
31         rdaddsn(rs, (void *)out, sizeof(out));  \
32     } while (0)
33 
34 #define RDADD_32BIT_MSB_FIRST(rs, value) do { \
35         unsigned char out[4]; \
36         PUT_32BIT_MSB_FIRST(out, value); \
37         rdaddsn(rs, (void *)out, sizeof(out)); \
38     } while (0)
39 
40 #define RDADD_16BIT_LSB_FIRST(rs, value) do { \
41         unsigned char out[2]; \
42         PUT_16BIT_LSB_FIRST(out, value); \
43         rdaddsn(rs, (void *)out, sizeof(out)); \
44     } while (0)
45 
guid(rdstringc * rs,unsigned long w0,unsigned short h0,unsigned short h1,unsigned char b0,unsigned char b1,unsigned char b2,unsigned char b3,unsigned char b4,unsigned char b5,unsigned char b6,unsigned char b7)46 static void guid(rdstringc *rs, unsigned long w0,
47                  unsigned short h0, unsigned short h1,
48                  unsigned char b0, unsigned char b1,
49                  unsigned char b2, unsigned char b3,
50                  unsigned char b4, unsigned char b5,
51                  unsigned char b6, unsigned char b7)
52 {
53     RDADD_32BIT_LSB_FIRST(rs, w0);
54     RDADD_16BIT_LSB_FIRST(rs, h0);
55     RDADD_16BIT_LSB_FIRST(rs, h1);
56     rdaddc(rs, b0);
57     rdaddc(rs, b1);
58     rdaddc(rs, b2);
59     rdaddc(rs, b3);
60     rdaddc(rs, b4);
61     rdaddc(rs, b5);
62     rdaddc(rs, b6);
63     rdaddc(rs, b7);
64 }
65 
itsf(rdstringc * rs,const rdstringc * directory,const rdstringc * content0)66 static void itsf(rdstringc *rs,
67                  const rdstringc *directory, const rdstringc *content0)
68 {
69     int headersize_field;
70     int headersect_off, headersect_off_field, headersect_size_field;
71     int directory_off_field, content0_off_field, filesize_field;
72 
73     /* Main file header */
74     rdaddsc(rs, "ITSF");               /* main file magic number */
75     RDADD_32BIT_LSB_FIRST(rs, 3);      /* file format version */
76     headersize_field = rs->pos;
77     RDADD_32BIT_LSB_FIRST(rs, 0);      /* size of main header; fill in later */
78     RDADD_32BIT_LSB_FIRST(rs, 1);      /* unknown, always observed to be 1 */
79     RDADD_32BIT_MSB_FIRST(rs, 0x12345678); /* timestamp (FIXME) */
80     RDADD_32BIT_LSB_FIRST(rs, 0x809); /* language code (FIXME: configurable) */
81     guid(rs,0x7C01FD10,0x7BAA,0x11D0,0x9E,0x0C,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
82     guid(rs,0x7C01FD11,0x7BAA,0x11D0,0x9E,0x0C,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
83     headersect_off_field = rs->pos;
84     RDADD_32BIT_LSB_FIRST(rs, 0); /* header section offset; fill in later */
85     RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
86     headersect_size_field = rs->pos;
87     RDADD_32BIT_LSB_FIRST(rs, 0); /* header section size; fill in later */
88     RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
89     directory_off_field = rs->pos;
90     RDADD_32BIT_LSB_FIRST(rs, 0); /* directory offset; fill in later */
91     RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
92     RDADD_32BIT_LSB_FIRST(rs, directory->pos);
93     RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
94     content0_off_field = rs->pos;
95     RDADD_32BIT_LSB_FIRST(rs, 0); /* content section 0 offset; fill in later */
96     RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
97     PUT_32BIT_LSB_FIRST(rs->text + headersize_field, rs->pos);
98 
99     /* 'Header section' */
100     headersect_off = rs->pos;
101     PUT_32BIT_LSB_FIRST(rs->text + headersect_off_field, rs->pos);
102     RDADD_32BIT_LSB_FIRST(rs, 0x1FE); /* magic number */
103     RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
104     filesize_field = rs->pos;
105     RDADD_32BIT_LSB_FIRST(rs, 0); /* file size; fill in later */
106     RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
107     RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
108     RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
109     PUT_32BIT_LSB_FIRST(rs->text + headersect_size_field,
110                         rs->pos - headersect_off);
111 
112     PUT_32BIT_LSB_FIRST(rs->text + directory_off_field, rs->pos);
113     rdaddsn(rs, directory->text, directory->pos);
114 
115     PUT_32BIT_LSB_FIRST(rs->text + content0_off_field, rs->pos);
116     rdaddsn(rs, content0->text, content0->pos);
117 
118     PUT_32BIT_LSB_FIRST(rs->text + filesize_field, rs->pos);
119 }
120 
encint(rdstringc * rs,unsigned val)121 static void encint(rdstringc *rs, unsigned val)
122 {
123     int i, j, topbit;
124 
125     /* ENCINT in the CHM format is big-endian, but it's easier to
126      * write little-endian and byte-reverse afterwards. */
127 
128     i = rs->pos; /* first byte index */
129 
130     topbit = 0;
131     while (val >= 0x80) {
132         rdaddc(rs, (val & 0x7F) | topbit);
133         val >>= 7;
134         topbit = 0x80;
135     }
136 
137     j = rs->pos; /* last byte index */
138     rdaddc(rs, val | topbit);
139 
140     while (j > i) {
141         char tmp = rs->text[i];
142         rs->text[i] = rs->text[j];
143         rs->text[j] = tmp;
144         i++;
145         j--;
146     }
147 }
148 
149 struct chm_directory_entry {
150     char *filename;                    /* free this when done */
151     int which_content_section;
152     int offset_in_content_section;
153     int file_size;
154 };
155 
strcmp_chm(const char * a,const char * b)156 static int strcmp_chm(const char *a, const char *b)
157 {
158     /*
159      * CHM directory sorting criterion appears to be case-insensitive,
160      * and based on sorting the _lowercased_ text. (Hence, in
161      * particular, '_' sorts before any alphabetic character.)
162      */
163     while (*a || *b) {
164         char ac = *a, bc = *b;
165         if (ac >= 'A' && ac <= 'Z') ac += 'a'-'A';
166         if (bc >= 'A' && bc <= 'Z') bc += 'a'-'A';
167         if (ac != bc)
168             return ac < bc ? -1 : +1;
169         a++;
170         b++;
171     }
172 
173     return 0;
174 }
175 
chm_directory_entry_cmp(void * av,void * bv)176 int chm_directory_entry_cmp(void *av, void *bv)
177 {
178     const struct chm_directory_entry
179         *a = (const struct chm_directory_entry *)av,
180         *b = (const struct chm_directory_entry *)bv;
181     return strcmp_chm(a->filename, b->filename);
182 }
183 
chm_directory_entry_find(void * av,void * bv)184 int chm_directory_entry_find(void *av, void *bv)
185 {
186     const char *a = (const char *)av;
187     const struct chm_directory_entry
188         *b = (const struct chm_directory_entry *)bv;
189     return strcmp_chm(a, b->filename);
190 }
191 
192 struct chm_index_entry {
193     char *first_filename; /* shared pointer with some chm_directory_entry */
194     int chunk_index;
195 };
196 
directory(rdstringc * rs,tree234 * files)197 static void directory(rdstringc *rs, tree234 *files)
198 {
199     const int chunksize = 4096;
200     const int encoded_density = 2;
201     const int useful_density = 1 + (1 << encoded_density);
202     int dirhdr_size_field, dirhdr_size2_field, dirhdr_depth_field;
203     int dirhdr_root_field, dirhdr_tail_field, dirhdr_nchunks_field;
204     int curr_chunk, depth, filename_index;
205     tree234 *index;
206 
207     assert(rs->pos == 0);
208     assert(count234(files) > 0);
209 
210     /* Directory header */
211     rdaddsc(rs, "ITSP");               /* directory header magic number */
212     RDADD_32BIT_LSB_FIRST(rs, 1);      /* format version */
213     dirhdr_size_field = rs->pos;
214     RDADD_32BIT_LSB_FIRST(rs, 0); /* directory header size; fill in later */
215     RDADD_32BIT_LSB_FIRST(rs, 10);     /* unknown; observed to be 10 */
216     RDADD_32BIT_LSB_FIRST(rs, chunksize);
217     RDADD_32BIT_LSB_FIRST(rs, encoded_density);
218     dirhdr_depth_field = rs->pos;
219     RDADD_32BIT_LSB_FIRST(rs, 0); /* B-tree depth; fill in later */
220     dirhdr_root_field = rs->pos;
221     RDADD_32BIT_LSB_FIRST(rs, 0); /* root chunk index; fill in later */
222     RDADD_32BIT_LSB_FIRST(rs, 0); /* head of PMGL chunk list; always 0 here */
223     dirhdr_tail_field = rs->pos;
224     RDADD_32BIT_LSB_FIRST(rs, 0); /* tail of PMGL chunk list; fill in later */
225     RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
226     dirhdr_nchunks_field = rs->pos;
227     RDADD_32BIT_LSB_FIRST(rs, 0); /* total number of chunks; fill in later */
228     RDADD_32BIT_LSB_FIRST(rs, 0x409);  /* language (FIXME) */
229     guid(rs,0x5D02926A,0x212E,0x11D0,0x9D,0xF9,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
230     dirhdr_size2_field = rs->pos;
231     RDADD_32BIT_LSB_FIRST(rs, 0); /* directory header size; fill in later */
232     RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
233     RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
234     RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
235     PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size_field, rs->pos);
236     PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size2_field, rs->pos);
237 
238     index = newtree234(NULL);
239     curr_chunk = 0;
240     depth = 1;
241     /* Write out lowest-level PMGL chunks full of actual directory entries */
242     filename_index = 0;
243     while (filename_index < count234(files)) {
244         rdstringc chunk = {0, 0, NULL};
245         rdstringc reversed_quickref = {0, 0, NULL};
246         int chunk_endlen_field, chunk_nextptr_field;
247         int n_entries, offset_of_first_entry;
248         int saved_pos, saved_rq_pos, i;
249 
250         rdaddsc(&chunk, "PMGL");
251         chunk_endlen_field = chunk.pos;
252         RDADD_32BIT_LSB_FIRST(&chunk, 0); /* space at end; fill in later */
253         RDADD_32BIT_LSB_FIRST(&chunk, 0); /* unknown; observed to be 0 */
254         if (curr_chunk == 0) {
255             RDADD_32BIT_LSB_FIRST(&chunk, 0xFFFFFFFF); /* 'null' prev ptr */
256         } else {
257             RDADD_32BIT_LSB_FIRST(&chunk, curr_chunk - 1);
258         }
259         chunk_nextptr_field = chunk.pos; /* may overwrite 'next' ptr later */
260         RDADD_32BIT_LSB_FIRST(&chunk, curr_chunk + 1);
261 
262         /* Enter this chunk in our index for the next level of the
263          * B-tree (if we end up needing one). */
264         {
265             struct chm_directory_entry *ent = (struct chm_directory_entry *)
266                 index234(files, filename_index);
267             struct chm_index_entry *ient = snew(struct chm_index_entry);
268             assert(ent);
269             ient->first_filename = ent->filename;
270             ient->chunk_index = curr_chunk;
271             addpos234(index, ient, count234(index));
272         }
273 
274         /* Start accumulating the quick-reference index at the end of this
275          * chunk. We'll build it up backwards, and reverse it halfwordwise
276          * when we copy it into the end of our output chunk. */
277         RDADD_16BIT_LSB_FIRST(&reversed_quickref, 0);
278         offset_of_first_entry = chunk.pos;
279 
280         n_entries = 0;
281         /* Write filenames into this chunk until it's full, or until
282          * we run out of filenames. */
283         while (1) {
284             struct chm_directory_entry *ent = (struct chm_directory_entry *)
285                 index234(files, filename_index++);
286             if (!ent) {
287                 /* Run out of filenames, so this is the last PMGL chunk.
288                  * Reset its 'next' pointer to the 'null' -1 value. */
289                 PUT_32BIT_LSB_FIRST(chunk.text + chunk_nextptr_field,
290                                     0xFFFFFFFFU);
291                 /* And point the directory header's tail pointer at
292                  * this chunk. */
293                 PUT_32BIT_LSB_FIRST(rs->text + dirhdr_tail_field, curr_chunk);
294                 break;
295             }
296 
297             /* Save the sizes of stuff in this chunk, so we can put
298              * them back if this entry turns out to overflow. */
299             saved_pos = chunk.pos;
300             saved_rq_pos = reversed_quickref.pos;
301 
302             if (n_entries > 0 && n_entries % useful_density == 0) {
303                 /* Add a quick-reference index pointer. */
304                 RDADD_16BIT_LSB_FIRST(&reversed_quickref, chunk.pos -
305                                       offset_of_first_entry);
306             }
307 
308             encint(&chunk, strlen(ent->filename));
309             rdaddsc(&chunk, ent->filename);
310             encint(&chunk, ent->which_content_section);
311             encint(&chunk, ent->offset_in_content_section);
312             encint(&chunk, ent->file_size);
313             if (chunk.pos + reversed_quickref.pos > chunksize) {
314                 filename_index--;
315                 chunk.pos = saved_pos;
316                 reversed_quickref.pos = saved_rq_pos;
317                 break;
318             }
319 
320             /* If we didn't overflow, then commit to this entry and
321              * loop round for the next one. */
322             n_entries++;
323         }
324 
325         /* Finalise the chunk. */
326         assert(chunk.pos + reversed_quickref.pos <= chunksize);
327         PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field,
328                             chunksize - chunk.pos);
329         PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries);
330         while (chunk.pos + reversed_quickref.pos < chunksize)
331             rdaddc(&chunk, 0);         /* zero-pad */
332         for (i = reversed_quickref.pos - 2; i >= 0; i -= 2)
333             rdaddsn(&chunk, reversed_quickref.text+i, 2);
334 
335         assert(chunk.pos == chunksize);
336         rdaddsn(rs, chunk.text, chunk.pos);
337         sfree(chunk.text);
338         sfree(reversed_quickref.text);
339         curr_chunk++;
340     }
341 
342     /* Write out as many layers of PMGI index chunks as it takes to
343      * reduce the total number of chunks at the current level to 1. */
344     while (count234(index) > 1) {
345         tree234 *prev_index;
346         int index_index = 0;
347 
348         prev_index = index;
349         index = newtree234(NULL);
350         depth++;
351 
352         while (index_index < count234(prev_index)) {
353             rdstringc chunk = {0, 0, NULL};
354             rdstringc reversed_quickref = {0, 0, NULL};
355             int chunk_endlen_field;
356             int n_entries, offset_of_first_entry;
357             int saved_pos, saved_rq_pos, i;
358 
359             rdaddsc(&chunk, "PMGI");
360             chunk_endlen_field = chunk.pos;
361             RDADD_32BIT_LSB_FIRST(&chunk, 0); /* space at end; fill in later */
362 
363             /* Enter this chunk in our index for the next level of the
364              * B-tree (if we end up needing one). */
365             {
366                 struct chm_index_entry *ent = (struct chm_index_entry *)
367                     index234(prev_index, index_index);
368                 struct chm_index_entry *ient = snew(struct chm_index_entry);
369                 assert(ent);
370                 ient->first_filename = ent->first_filename;
371                 ient->chunk_index = curr_chunk;
372                 addpos234(index, ient, count234(index));
373             }
374 
375             /* Start accumulating the quick-reference index at the end
376              * of this chunk, as above. */
377             RDADD_16BIT_LSB_FIRST(&reversed_quickref, 0);
378             offset_of_first_entry = chunk.pos;
379 
380             n_entries = 0;
381             /* Write index entries into this chunk until it's full, or
382              * until we run out of chunks at the previous level. */
383             while (1) {
384                 struct chm_index_entry *ent = (struct chm_index_entry *)
385                 index234(prev_index, index_index++);
386                 if (!ent)
387                     break;
388 
389                 /* Save the sizes of stuff in this chunk, so we can put
390                  * them back if this entry turns out to overflow. */
391                 saved_pos = chunk.pos;
392                 saved_rq_pos = reversed_quickref.pos;
393 
394                 if (n_entries > 0 && n_entries % useful_density == 0) {
395                     /* Add a quick-reference index pointer. */
396                     RDADD_16BIT_LSB_FIRST(&reversed_quickref, chunk.pos -
397                                           offset_of_first_entry);
398                 }
399 
400                 encint(&chunk, strlen(ent->first_filename));
401                 rdaddsc(&chunk, ent->first_filename);
402                 encint(&chunk, ent->chunk_index);
403                 if (chunk.pos + reversed_quickref.pos > chunksize) {
404                     index_index--;
405                     chunk.pos = saved_pos;
406                     reversed_quickref.pos = saved_rq_pos;
407                     break;
408                 }
409 
410                 /* If we didn't overflow, then commit to this entry and
411                  * loop round for the next one. */
412                 n_entries++;
413             }
414 
415             /* Finalise the chunk. */
416             assert(chunk.pos + reversed_quickref.pos <= chunksize);
417             PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field,
418                                 chunksize - chunk.pos);
419             PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries);
420             while (chunk.pos + reversed_quickref.pos < chunksize)
421                 rdaddc(&chunk, 0);         /* zero-pad */
422             for (i = reversed_quickref.pos - 2; i >= 0; i -= 2)
423                 rdaddsn(&chunk, reversed_quickref.text+i, 2);
424 
425             assert(chunk.pos == chunksize);
426             rdaddsn(rs, chunk.text, chunk.pos);
427             sfree(chunk.text);
428             sfree(reversed_quickref.text);
429             curr_chunk++;
430         }
431 
432         /*
433          * Now free the old index.
434          */
435         while (1) {
436             struct chm_index_entry *ent = (struct chm_index_entry *)
437                 delpos234(prev_index, 0);
438             if (!ent)
439                 break;
440             sfree(ent);
441         }
442         freetree234(prev_index);
443     }
444 
445     /*
446      * Finished! We've reduced to a single chunk. Free the remaining
447      * index (which must have size 1).
448      */
449     assert(count234(index) == 1);
450     sfree(delpos234(index, 0));
451     freetree234(index);
452 
453     /* Fill in the deferred fields in the main header. */
454     PUT_32BIT_LSB_FIRST(rs->text + dirhdr_depth_field, depth);
455     PUT_32BIT_LSB_FIRST(rs->text + dirhdr_root_field, curr_chunk-1);
456     PUT_32BIT_LSB_FIRST(rs->text + dirhdr_nchunks_field, curr_chunk);
457 }
458 
sys_start(rdstringc * rs,int code)459 static int sys_start(rdstringc *rs, int code)
460 {
461     int toret = rs->pos;
462     RDADD_16BIT_LSB_FIRST(rs, code);
463     RDADD_16BIT_LSB_FIRST(rs, 0);      /* length; overwrite later */
464     return toret;
465 }
sys_end(rdstringc * rs,int recstart)466 static void sys_end(rdstringc *rs, int recstart)
467 {
468     PUT_16BIT_LSB_FIRST(rs->text + recstart+2, rs->pos - (recstart+4));
469 }
470 
471 struct chm_window {
472     char *name;
473     char *title;
474     char *contentsfile;
475     char *indexfile;
476     char *rootfile;
477     int navpaneflags;
478     int toolbarflags;
479 };
480 
481 struct chm {
482     tree234 *files;
483     tree234 *windows;
484     tree234 *stringtab;
485     rdstringc content0;                /* outer uncompressed container */
486     rdstringc content1;                /* compressed subfile */
487     rdstringc outfile;
488     rdstringc stringsfile;
489     char *title, *contents_filename, *index_filename, *default_topic;
490     char *default_window;
491     struct chm_section *rootsecthead, *rootsecttail;
492     struct chm_section *allsecthead, *allsecttail;
493 };
494 
495 struct chm_section {
496     /* Logical links within the section tree structure */
497     struct chm_section *firstchild, *lastchild, *nextsibling, *parent;
498     /* Link all chm_sections together into one big list, in a
499      * topological order (i.e. every section comes after its
500      * parent) */
501     struct chm_section *next;
502 
503     char *title, *url;
504     int tocidx_offset_1, tocidx_offset_2;
505     int topic_index, urltbl_offset, urlstr_offset;
506 };
507 
508 struct chm_stringtab_entry {
509     struct chm *chm;
510     int strtab_offset;
511 };
512 
chm_stringtab_cmp(void * av,void * bv)513 static int chm_stringtab_cmp(void *av, void *bv)
514 {
515     const struct chm_stringtab_entry
516         *a = (const struct chm_stringtab_entry *)av,
517         *b = (const struct chm_stringtab_entry *)bv;
518     return strcmp(a->chm->stringsfile.text + a->strtab_offset,
519                   b->chm->stringsfile.text + b->strtab_offset);
520 }
521 
chm_stringtab_find(void * av,void * bv)522 static int chm_stringtab_find(void *av, void *bv)
523 {
524     const char *a = (const char *)av;
525     const struct chm_stringtab_entry
526         *b = (const struct chm_stringtab_entry *)bv;
527     return strcmp(a, b->chm->stringsfile.text + b->strtab_offset);
528 }
529 
chm_intern_string(struct chm * chm,const char * string)530 int chm_intern_string(struct chm *chm, const char *string)
531 {
532     struct chm_stringtab_entry *ent;
533     int size;
534 
535     if (!string)
536         return 0;
537 
538     if ((ent = (struct chm_stringtab_entry *)find234(
539              chm->stringtab, (void *)string, chm_stringtab_find)) == NULL) {
540         ent = snew(struct chm_stringtab_entry);
541         ent->chm = chm;
542 
543         /* Pad to ensure the string doesn't cross a page boundary. */
544         size = strlen(string) + 1;  /* include the NUL terminator */
545         assert(size < 0x1000);  /* avoid really serious trouble */
546         while ((chm->stringsfile.pos ^ (chm->stringsfile.pos + size-1)) >> 12)
547             rdaddc(&chm->stringsfile, 0);
548 
549         ent->strtab_offset = chm->stringsfile.pos;
550         rdaddsc(&chm->stringsfile, string);
551         rdaddc(&chm->stringsfile, '\0');
552         add234(chm->stringtab, ent);
553     }
554     return ent->strtab_offset;
555 }
556 
chm_new(void)557 struct chm *chm_new(void)
558 {
559     struct chm *chm = snew(struct chm);
560     chm->files = newtree234(chm_directory_entry_cmp);
561     chm->windows = newtree234(NULL);
562     chm->stringtab = newtree234(chm_stringtab_cmp);
563     chm->content0 = empty_rdstringc;
564     chm->content1 = empty_rdstringc;
565     chm->outfile = empty_rdstringc;
566     chm->stringsfile = empty_rdstringc;
567     chm->title = NULL;
568     chm->contents_filename = NULL;
569     chm->index_filename = NULL;
570     chm->default_topic = NULL;
571     chm->default_window = NULL;
572     chm->rootsecthead = chm->rootsecttail = NULL;
573     chm->allsecthead = chm->allsecttail = NULL;
574     chm_intern_string(chm, "");        /* preinitialise the strings table */
575     return chm;
576 }
577 
chm_free(struct chm * chm)578 void chm_free(struct chm *chm)
579 {
580     struct chm_directory_entry *ent;
581     struct chm_window *win;
582     struct chm_stringtab_entry *str;
583     struct chm_section *sect;
584 
585     while ((ent = delpos234(chm->files, 0)) != NULL) {
586         sfree(ent->filename);
587         sfree(ent);
588     }
589     freetree234(chm->files);
590 
591     while ((win = delpos234(chm->windows, 0)) != NULL) {
592         sfree(win->name);
593         sfree(win->title);
594         sfree(win->contentsfile);
595         sfree(win->indexfile);
596         sfree(win->rootfile);
597         sfree(win);
598     }
599     freetree234(chm->windows);
600 
601     while ((str = delpos234(chm->stringtab, 0)) != NULL) {
602         sfree(str);
603     }
604     freetree234(chm->stringtab);
605 
606     for (sect = chm->allsecthead; sect ;) {
607         struct chm_section *tmp = sect->next;
608         sfree(sect->title);
609         sfree(sect->url);
610         sfree(sect);
611         sect = tmp;
612     }
613 
614     sfree(chm->content0.text);
615     sfree(chm->content1.text);
616     sfree(chm->outfile.text);
617     sfree(chm->stringsfile.text);
618 
619     sfree(chm->title);
620     sfree(chm->contents_filename);
621     sfree(chm->index_filename);
622     sfree(chm->default_topic);
623     sfree(chm->default_window);
624 
625     sfree(chm);
626 }
627 
chm_add_file_internal(struct chm * chm,const char * name,const char * data,int len,rdstringc * sect,int which_sect)628 static void chm_add_file_internal(struct chm *chm, const char *name,
629                                   const char *data, int len,
630                                   rdstringc *sect, int which_sect)
631 {
632     struct chm_directory_entry *ent = snew(struct chm_directory_entry);
633     ent->filename = dupstr(name);
634     ent->which_content_section = which_sect;
635     ent->offset_in_content_section = sect->pos;
636     ent->file_size = len;
637     add234(chm->files, ent);
638     rdaddsn(sect, data, len);
639 }
640 
chm_find_file(struct chm * chm,const char * name)641 static struct chm_directory_entry *chm_find_file(
642     struct chm *chm, const char *name)
643 {
644     return find234(chm->files, (void *)name, chm_directory_entry_find);
645 }
646 
add_leading_slash(const char * str)647 static char *add_leading_slash(const char *str)
648 {
649     char *toret = snewn(2 + strlen(str), char);
650     toret[0] = '/';
651     strcpy(toret+1, str);
652     return toret;
653 }
654 
chm_add_file(struct chm * chm,const char * name,const char * data,int len)655 void chm_add_file(struct chm *chm, const char *name, const char *data, int len)
656 {
657     char *name_with_slash = add_leading_slash(name);
658     chm_add_file_internal(chm, name_with_slash, data, len, &chm->content1, 1);
659     sfree(name_with_slash);
660 }
661 
chm_title(struct chm * chm,const char * title)662 void chm_title(struct chm *chm, const char *title)
663 {
664     chm->title = dupstr(title);
665 }
666 
chm_contents_filename(struct chm * chm,const char * name)667 void chm_contents_filename(struct chm *chm, const char *name)
668 {
669     chm->contents_filename = dupstr(name);
670 }
671 
chm_index_filename(struct chm * chm,const char * name)672 void chm_index_filename(struct chm *chm, const char *name)
673 {
674     chm->index_filename = dupstr(name);
675 }
676 
chm_default_topic(struct chm * chm,const char * name)677 void chm_default_topic(struct chm *chm, const char *name)
678 {
679     chm->default_topic = dupstr(name);
680 }
681 
chm_default_window(struct chm * chm,const char * name)682 void chm_default_window(struct chm *chm, const char *name)
683 {
684     chm->default_window = dupstr(name);
685 }
686 
chm_add_window(struct chm * chm,const char * winname,const char * title,const char * contentsfile,const char * indexfile,const char * rootfile,int navpaneflags,int toolbarflags)687 void chm_add_window(struct chm *chm, const char *winname, const char *title,
688                     const char *contentsfile, const char *indexfile,
689                     const char *rootfile, int navpaneflags, int toolbarflags)
690 {
691     struct chm_window *win = snew(struct chm_window);
692     win->name = dupstr(winname);
693     win->title = dupstr(title);
694     win->contentsfile = contentsfile ? dupstr(contentsfile) : NULL;
695     win->indexfile = indexfile ? dupstr(indexfile) : NULL;
696     win->rootfile = dupstr(rootfile);
697     win->navpaneflags = navpaneflags;
698     win->toolbarflags = toolbarflags;
699     addpos234(chm->windows, win, count234(chm->windows));
700 }
701 
chm_add_section(struct chm * chm,struct chm_section * parent,const char * title,const char * url)702 struct chm_section *chm_add_section(struct chm *chm,
703                                     struct chm_section *parent,
704                                     const char *title, const char *url)
705 {
706     struct chm_section *sect = snew(struct chm_section);
707     sect->title = dupstr(title);
708     sect->url = dupstr(url);
709     sect->firstchild = sect->lastchild = sect->nextsibling = sect->next = NULL;
710     if (parent) {
711         sect->parent = parent;
712         if (parent->lastchild) {
713             parent->lastchild->nextsibling = sect;
714         } else {
715             parent->firstchild = sect;
716         }
717         parent->lastchild = sect;
718     } else {
719         sect->parent = NULL;
720         if (chm->rootsecttail) {
721             chm->rootsecttail->nextsibling = sect;
722         } else {
723             chm->rootsecthead = sect;
724         }
725         chm->rootsecttail = sect;
726     }
727     if (chm->allsecttail) {
728         chm->allsecttail->next = sect;
729     } else {
730         chm->allsecthead = sect;
731     }
732     chm->allsecttail = sect;
733     return sect;
734 }
735 
736 struct chm_urltbl_entry {
737     /*
738      * Records of #URLTBL, before their order is finalised.
739      *
740      * The first word of this record is listed as 'unknown, perhaps
741      * some kind of unique ID' in chmspec. But my observation in HTML
742      * Help Workshop's output is that it's actually a hash of the
743      * target URL, and the file is sorted by them. chm_url_hash()
744      * below implements the hash algorithm.
745      */
746     unsigned long hash;
747     int topic_index;
748     int urlstr_pos;
749     int topics_offset_to_update;
750 };
751 
chm_urltbl_entry_cmp(void * av,void * bv)752 int chm_urltbl_entry_cmp(void *av, void *bv)
753 {
754     const struct chm_urltbl_entry
755         *a = (const struct chm_urltbl_entry *)av,
756         *b = (const struct chm_urltbl_entry *)bv;
757     if (a->hash < b->hash) return -1;
758     if (a->hash > b->hash) return +1;
759     if (a->topic_index < b->topic_index) return -1;
760     if (a->topic_index > b->topic_index) return -1;
761     return 0;
762 }
763 
chm_url_hash(const char * str)764 static unsigned long chm_url_hash(const char *str)
765 {
766     const char *p;
767     unsigned long hash;
768 
769     hash = 0;
770     for (p = str; *p; p++) {
771         /*
772          * Multiply `hash' by 43.
773          */
774         {
775             unsigned long bottom, top;
776             bottom = (hash & 0xFFFFUL) * 43;
777             top = ((hash >> 16) & 0xFFFFUL) * 43;
778             top += (bottom >> 16);
779             bottom &= 0xFFFFUL;
780             top &= 0xFFFFUL;
781             hash = (top << 16) | bottom;
782         }
783 
784         /*
785          * Add the mapping value for this byte to `hash'.
786          */
787         {
788             int c = (signed char)*p;
789 
790             /*
791              * Translation rule determined by getting hhc.exe to hash
792              * a lot of strings and analysing the results. I was able
793              * to confirm this mapping rule for all byte values except
794              * for NUL, CR, LF, ^Z and backslash: the first four of
795              * those I couldn't find any way to get hhc to insert into
796              * a URL, and the last one is automatically translated
797              * into '/', presumably for reasons of Windows vs URI path
798              * syntax normalisation.
799              */
800             int val = (c == '/' ? 0x2c : c <= 'Z' ? c-0x30 : c-0x50);
801 
802             if (val > 0 && hash > (0xFFFFFFFFUL - val)) {
803                 hash -= (0xFFFFFFFFUL - val) + 1;
804             } else if (val < 0 && hash < (unsigned long)-val) {
805                 hash += (0xFFFFFFFFUL + val) + 1;
806             } else
807                 hash += val;
808         }
809     }
810 
811     /*
812      * Special case: an output hash of 0 is turned into 1, which I
813      * conjecture is so that in some context or other 0 can be
814      * reserved to mean something like 'null' or 'no hash value
815      * available'.
816      */
817     if (hash == 0)
818         hash = 1;
819 
820     return hash;
821 }
822 
chm_build(struct chm * chm,int * outlen)823 const char *chm_build(struct chm *chm, int *outlen)
824 {
825     rdstringc dir = {0, 0, NULL};
826     rdstringc sysfile = {0, 0, NULL};
827     struct LZXEncodedFile *ef;
828     int rec;
829 
830     chm_add_file_internal(chm, "/", "", 0, &chm->content0, 0);
831 
832     RDADD_32BIT_LSB_FIRST(&sysfile, 3); /* #SYSTEM file version */
833 
834     rec = sys_start(&sysfile, 9);  /* identify CHM-producing tool */
835     rdaddsc(&sysfile, "Halibut, ");
836     rdaddsc(&sysfile, version);
837     rdaddc(&sysfile, '\0');
838     sys_end(&sysfile, rec);
839 
840     rec = sys_start(&sysfile, 12);  /* number of 'information types' */
841     RDADD_32BIT_LSB_FIRST(&sysfile, 0);
842     sys_end(&sysfile, rec);
843     rec = sys_start(&sysfile, 15);  /* checksum of 'information types' */
844     RDADD_32BIT_LSB_FIRST(&sysfile, 0);
845     sys_end(&sysfile, rec);
846     /* actual section of 'information types', whatever those might be */
847     chm_add_file_internal(chm, "/#ITBITS", "", 0, &chm->content0, 0);
848 
849     if (chm->title) {
850         rec = sys_start(&sysfile, 3);  /* document title */
851         rdaddsc(&sysfile, chm->title);
852         rdaddc(&sysfile, '\0');
853         sys_end(&sysfile, rec);
854     }
855 
856     if (chm->default_topic) {
857         rec = sys_start(&sysfile, 2);
858         rdaddsc(&sysfile, chm->default_topic);
859         rdaddc(&sysfile, '\0');
860         sys_end(&sysfile, rec);
861     }
862 
863     if (chm->contents_filename) {
864         rec = sys_start(&sysfile, 0);
865         rdaddsc(&sysfile, chm->contents_filename);
866         rdaddc(&sysfile, '\0');
867         sys_end(&sysfile, rec);
868     }
869 
870     if (chm->index_filename) {
871         rec = sys_start(&sysfile, 1);
872         rdaddsc(&sysfile, chm->index_filename);
873         rdaddc(&sysfile, '\0');
874         sys_end(&sysfile, rec);
875     }
876 
877     if (chm->default_window) {
878         rec = sys_start(&sysfile, 5);
879         rdaddsc(&sysfile, chm->default_window);
880         rdaddc(&sysfile, '\0');
881         sys_end(&sysfile, rec);
882     }
883 
884     rec = sys_start(&sysfile, 4);
885     RDADD_32BIT_LSB_FIRST(&sysfile, 0x809); /* language again (FIXME) */
886     RDADD_32BIT_LSB_FIRST(&sysfile, 0);     /* DBCS: off */
887     RDADD_32BIT_LSB_FIRST(&sysfile, 1);     /* full-text search: on */
888     RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no KLinks (whatever they are) */
889     RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no ALinks (whatever they are) */
890     RDADD_32BIT_LSB_FIRST(&sysfile, 0x11223344); /* timestamp LSW (FIXME) */
891     RDADD_32BIT_LSB_FIRST(&sysfile, 0x55667788); /* timestamp MSW (FIXME) */
892     RDADD_32BIT_LSB_FIRST(&sysfile, 0);          /* unknown */
893     RDADD_32BIT_LSB_FIRST(&sysfile, 0);          /* unknown */
894     sys_end(&sysfile, rec);
895 
896     {
897         rdstringc winfile = {0, 0, NULL};
898         int i, j, s;
899         struct chm_window *win;
900 
901         RDADD_32BIT_LSB_FIRST(&winfile, count234(chm->windows));
902         RDADD_32BIT_LSB_FIRST(&winfile, 196); /* size of each entry */
903         for (i = 0;
904              (win = (struct chm_window *)index234(chm->windows, i)) != NULL;
905              i++) {
906             RDADD_32BIT_LSB_FIRST(&winfile, 196); /* size of entry */
907             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* not Unicode */
908             s = chm_intern_string(chm, win->name);
909             RDADD_32BIT_LSB_FIRST(&winfile, s);
910             /* Bitmap of which fields are used: 2 means nav pane
911              * style, 0x200 means whether nav pane is initially
912              * closed, 0x400 means tab position */
913             RDADD_32BIT_LSB_FIRST(&winfile, 0x502);
914             /* Nav pane styles:
915              *  0x40000 = user can control window size/pos
916              *  0x20000 = advanced full-text search UI
917              *  0x00400 = include a search tab
918              *  0x00100 = keep contents/index in sync with current topic
919              *  0x00020 = three-pane window */
920             RDADD_32BIT_LSB_FIRST(&winfile, win->navpaneflags);
921             s = chm_intern_string(chm, win->title);
922             RDADD_32BIT_LSB_FIRST(&winfile, s);
923             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window styles */
924             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window ex styles */
925             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.left */
926             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.top */
927             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.right */
928             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.bottom */
929             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window show state */
930             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
931             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
932             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
933             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
934             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
935             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
936             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane width */
937             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.left */
938             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.top */
939             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.right */
940             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.bottom */
941             s = chm_intern_string(chm, win->contentsfile);
942             RDADD_32BIT_LSB_FIRST(&winfile, s);
943             s = chm_intern_string(chm, win->indexfile);
944             RDADD_32BIT_LSB_FIRST(&winfile, s);
945             s = chm_intern_string(chm, win->rootfile);
946             RDADD_32BIT_LSB_FIRST(&winfile, s);
947             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Home button target */
948             RDADD_32BIT_LSB_FIRST(&winfile, win->toolbarflags);
949             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane initially open */
950             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* default nav pane = TOC */
951             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane tabs at top */
952             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* WM_NOTIFY id */
953             for (j = 0; j < 20; j++)
954                 rdaddc(&winfile, 0);            /* tab order block */
955             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* history to keep */
956             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button target */
957             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button target */
958             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button text */
959             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button text */
960             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.left */
961             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.top */
962             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.right */
963             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.bottom */
964             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no information types */
965             RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no custom tabs */
966         }
967         assert(winfile.pos == 8 + 196 * count234(chm->windows));
968         chm_add_file_internal(chm, "/#WINDOWS", winfile.text, winfile.pos,
969                               &chm->content1, 1);
970         sfree(winfile.text);
971     }
972 
973     {
974         struct chm_section *sect;
975         rdstringc tocidx = {0, 0, NULL};
976         rdstringc topics = {0, 0, NULL};
977         rdstringc urltbl = {0, 0, NULL};
978         rdstringc urlstr = {0, 0, NULL};
979         int i, index, s, n_tocidx_3;
980         struct chm_directory_entry *contentsfile = NULL, *indexfile = NULL;
981         tree234 *urltbl_pre;
982         struct chm_urltbl_entry *urltbl_entry;
983 
984         urltbl_pre = newtree234(chm_urltbl_entry_cmp);
985 
986         for (i = 0; i < 0x1000; i++)
987             rdaddc(&tocidx, 0);
988 
989         /* Write a header of one zero byte at the start of #URLSTR.
990          * chmspec says this doesn't always appear, and is unclear on
991          * what this is for, but I suspect it serves the same purpose
992          * as the zero byte at the start of #STRINGS, namely that it
993          * arranges that an absent string in the following records can
994          * be represented by an offset of zero which will
995          * automatically point to this byte and hence indicate the
996          * empty string. */
997         rdaddc(&urlstr, 0);
998 
999         if (chm->contents_filename) {
1000             char *withslash = add_leading_slash(chm->contents_filename);
1001             contentsfile = chm_find_file(chm, withslash);
1002             sfree(withslash);
1003             assert(contentsfile);
1004         }
1005         if (chm->index_filename) {
1006             char *withslash = add_leading_slash(chm->index_filename);
1007             indexfile = chm_find_file(chm, withslash);
1008             sfree(withslash);
1009             assert(indexfile);
1010         }
1011 
1012         index = 0;
1013 
1014         /* #TOCIDX header field pointing at start of type-1 records */
1015         PUT_32BIT_LSB_FIRST(tocidx.text + 0, tocidx.pos);
1016 
1017         /*
1018          * First pass over the section structure, generating in
1019          * parallel one of the multiple structure types in #TOCIDX and
1020          * the sole record in all the other files.
1021          */
1022         for (sect = chm->allsecthead; sect; sect = sect->next) {
1023             /* Size of the first kind of #TOCIDX record varies between
1024              * leaf and internal nodes */
1025             int tocidx_size_1 = (sect->firstchild ? 0x1c : 0x14);
1026 
1027             /*
1028              * Flags:
1029              *  - 8 means there's a local filename, which in _our_ CHM
1030              *    files there always is. If you unset this flag, you
1031              *    get a node in the contents treeview which doesn't
1032              *    open any page when clicked, and exists solely to
1033              *    contain children; in that situation the topic index
1034              *    field at position 0x08 in this record also stops
1035              *    being an index into #TOPICS and instead becomes an
1036              *    index into #STRINGS giving the node's title.
1037              *  - 4 apparently means the node should have the 'book'
1038              *    rather than 'page' icon in the TOC tree view in the
1039              *    help viewer
1040              *  - 1 means the node has a subtree in the tree view,
1041              *    which I take to mean (contrary to chmspec) that
1042              *    _this_ is the flag that means this node is a
1043              *    non-leaf node and hence has the two extra fields for
1044              *    first-child and whatever the other one means
1045              */
1046             unsigned tocidx_1_flags = (sect->firstchild ? 0x5 : 0) | 8;
1047 
1048             int urlstr_size;
1049 
1050             /* Pad to ensure the record isn't split between
1051              * 0x1000-byte pages of the file */
1052             while ((tocidx.pos ^ (tocidx.pos + tocidx_size_1 - 1)) >> 12)
1053                 RDADD_32BIT_LSB_FIRST(&tocidx, 0);
1054 
1055             sect->topic_index = index++;
1056 
1057             /* Write the type-1 record in #TOCIDX */
1058             sect->tocidx_offset_1 = tocidx.pos;
1059             RDADD_16BIT_LSB_FIRST(&tocidx, 0); /* unknown */
1060             /* chmspec thinks this 16-bit field is 'unknown', but in
1061              * my observations it appears to be the index of an entry
1062              * in the #TOCIDX type-3 region. But I still don't know
1063              * what those are really for. */
1064             RDADD_16BIT_LSB_FIRST(&tocidx, sect->topic_index);
1065             RDADD_32BIT_LSB_FIRST(&tocidx, tocidx_1_flags);
1066             RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
1067             RDADD_32BIT_LSB_FIRST(&tocidx, sect->parent ?
1068                                   sect->parent->tocidx_offset_1 : 0);
1069             RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* 'next' ptr; fill in later */
1070             if (sect->firstchild) {
1071                 RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* child; fill in later */
1072                 RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* unknown */
1073             }
1074             assert(tocidx.pos == sect->tocidx_offset_1 + tocidx_size_1);
1075 
1076             /* Figure out our offset in #URLSTR, by ensuring we're not
1077              * going to overrun a page boundary (as usual). For this
1078              * we need our record length, which is two 32-bit fields
1079              * plus a NUL-terminated copy of the target file name / URL. */
1080             urlstr_size = 8 + strlen(sect->url) + 1;
1081             assert(urlstr_size < 0x1000); /* must _fit_ in a page! */
1082             while ((urlstr.pos ^ (urlstr.pos + urlstr_size - 1)) >> 12)
1083                 rdaddc(&urlstr, 0);
1084 
1085             /*
1086              * Save everything we know so far about the #URLTBL record
1087              * we'll need to write.
1088              */
1089             urltbl_entry = snew(struct chm_urltbl_entry);
1090             urltbl_entry->hash = chm_url_hash(sect->url);
1091             urltbl_entry->topic_index = sect->topic_index;
1092             urltbl_entry->urlstr_pos = urlstr.pos;
1093             add234(urltbl_pre, urltbl_entry);
1094 
1095             /* Write the #TOPICS entry */
1096             RDADD_32BIT_LSB_FIRST(&topics, sect->tocidx_offset_1);
1097             s = chm_intern_string(chm, sect->title);
1098             RDADD_32BIT_LSB_FIRST(&topics, s);
1099             urltbl_entry->topics_offset_to_update = topics.pos;
1100             RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
1101             RDADD_16BIT_LSB_FIRST(&topics, 6); /* flag as 'in contents' */
1102             RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
1103 
1104             /*
1105              * Write the #URLSTR entry.
1106              */
1107             RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
1108             RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
1109             rdaddsc(&urlstr, sect->url);       /* 'Local' */
1110             rdaddc(&urlstr, '\0');
1111         }
1112 
1113         /*
1114          * Add entries in #URLTBL, #URLSTR and #TOPICS for the
1115          * contents and index files. They don't form part of the tree
1116          * in #TOCIDX, though.
1117          */
1118         if (chm->contents_filename) {
1119             urltbl_entry = snew(struct chm_urltbl_entry);
1120             urltbl_entry->hash = chm_url_hash(chm->contents_filename);
1121             urltbl_entry->topic_index = index;
1122             urltbl_entry->urlstr_pos = urlstr.pos;
1123             add234(urltbl_pre, urltbl_entry);
1124 
1125             /* #TOPICS entry */
1126             RDADD_32BIT_LSB_FIRST(&topics, 0); /* no #TOCIDX entry */
1127             RDADD_32BIT_LSB_FIRST(&topics, 0xFFFFFFFFU); /* no title either */
1128             urltbl_entry->topics_offset_to_update = topics.pos;
1129             RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
1130             RDADD_16BIT_LSB_FIRST(&topics, 2); /* flag as 'not in contents' */
1131             RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
1132 
1133             /* #URLSTR entry */
1134             RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
1135             RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
1136             rdaddsc(&urlstr, chm->contents_filename); /* 'Local' */
1137             rdaddc(&urlstr, '\0');
1138 
1139             /* And add the entry in #SYSTEM that cites the hash of the
1140              * #URLTBL entry. */
1141             rec = sys_start(&sysfile, 11);
1142             RDADD_32BIT_LSB_FIRST(&sysfile, urltbl_entry->hash);
1143             sys_end(&sysfile, rec);
1144 
1145             index++;
1146         }
1147         if (chm->index_filename) {
1148             urltbl_entry = snew(struct chm_urltbl_entry);
1149             urltbl_entry->hash = chm_url_hash(chm->index_filename);
1150             urltbl_entry->topic_index = index;
1151             urltbl_entry->urlstr_pos = urlstr.pos;
1152             add234(urltbl_pre, urltbl_entry);
1153 
1154             /* #TOPICS entry */
1155             RDADD_32BIT_LSB_FIRST(&topics, 0); /* no #TOCIDX entry */
1156             RDADD_32BIT_LSB_FIRST(&topics, 0xFFFFFFFFU); /* no title either */
1157             urltbl_entry->topics_offset_to_update = topics.pos;
1158             RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
1159             RDADD_16BIT_LSB_FIRST(&topics, 2); /* flag as 'not in contents' */
1160             RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
1161 
1162             /* #URLSTR entry */
1163             RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
1164             RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
1165             rdaddsc(&urlstr, chm->index_filename); /* 'Local' */
1166             rdaddc(&urlstr, '\0');
1167 
1168             /* And add the entry in #SYSTEM that cites the hash of the
1169              * #URLTBL entry. */
1170             rec = sys_start(&sysfile, 7);
1171             RDADD_32BIT_LSB_FIRST(&sysfile, urltbl_entry->hash);
1172             sys_end(&sysfile, rec);
1173 
1174             index++;
1175         }
1176 
1177         /*
1178          * Now we've got all our #URLTBL entries, so we can write out
1179          * #URLTBL itself.
1180          */
1181         while ((urltbl_entry = delpos234(urltbl_pre, 0)) != NULL) {
1182             /* Pad #URLTBL to the beginning of this section's entry.
1183              * Entries are all 12 bytes long, but again there's some
1184              * padding to ensure that they don't cross a page
1185              * boundary. */
1186             while ((urltbl.pos ^ (urltbl.pos + 12 - 1)) >> 12)
1187                 RDADD_32BIT_LSB_FIRST(&urltbl, 0);
1188 
1189             /* Fill in the link from #TOPICS to this entry's offset */
1190             PUT_32BIT_LSB_FIRST(topics.text +
1191                                 urltbl_entry->topics_offset_to_update,
1192                                 urltbl.pos);
1193 
1194             /* Write the entry itself. */
1195             RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->hash);
1196             RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->topic_index);
1197             RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->urlstr_pos);
1198 
1199             sfree(urltbl_entry);
1200         }
1201         freetree234(urltbl_pre);
1202 
1203         /*
1204          * Small follow-up pass filling in forward-pointing offset
1205          * fields in the #TOCIDX type-1 records which the previous
1206          * pass didn't know yet.
1207          */
1208         for (sect = chm->allsecthead; sect; sect = sect->next) {
1209             if (sect->nextsibling)
1210                 PUT_32BIT_LSB_FIRST(tocidx.text + sect->tocidx_offset_1 + 0x10,
1211                                     sect->nextsibling->tocidx_offset_1);
1212             if (sect->firstchild)
1213                 PUT_32BIT_LSB_FIRST(tocidx.text + sect->tocidx_offset_1 + 0x14,
1214                                     sect->firstchild->tocidx_offset_1);
1215         }
1216 
1217         /* #TOCIDX header field pointing at start of type-2 records */
1218         PUT_32BIT_LSB_FIRST(tocidx.text + 0xC, tocidx.pos);
1219 
1220         /*
1221          * Write the #TOCIDX type-2 records, which are just 4 bytes
1222          * long and just contain another copy of each topic's index,
1223          * but we need to have them there so that the type-3 records
1224          * can refer to them by offset.
1225          */
1226         for (sect = chm->allsecthead; sect; sect = sect->next) {
1227             sect->tocidx_offset_2 = tocidx.pos;
1228             RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
1229         }
1230 
1231         /* Align the current #TOCIDX offset to 16 bytes */
1232         while (tocidx.pos & 0xF)
1233             rdaddc(&tocidx, 0);
1234 
1235         /* #TOCIDX header field pointing at start of type-3 records */
1236         PUT_32BIT_LSB_FIRST(tocidx.text + 0x4, tocidx.pos);
1237 
1238         /*
1239          * Write the #TOCIDX type-3 records.
1240          *
1241          * In help files I've examined, there are fewer of these than
1242          * you might expect; apparently not all sections rate one for
1243          * some reason. For the moment I'm just writing out one for
1244          * every section.
1245          */
1246         n_tocidx_3 = 0;
1247         for (sect = chm->allsecthead; sect; sect = sect->next) {
1248             RDADD_32BIT_LSB_FIRST(&tocidx, sect->tocidx_offset_1);
1249             RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index + 666); /* ?! */
1250             RDADD_32BIT_LSB_FIRST(&tocidx, sect->tocidx_offset_2);
1251             RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
1252             n_tocidx_3++;
1253         }
1254 
1255         /* #TOCIDX header field giving number of type-3 records */
1256         PUT_32BIT_LSB_FIRST(tocidx.text + 0x8, n_tocidx_3);
1257 
1258         chm_add_file_internal(chm, "/#TOCIDX", tocidx.text, tocidx.pos,
1259                               &chm->content1, 1);
1260         chm_add_file_internal(chm, "/#TOPICS", topics.text, topics.pos,
1261                               &chm->content1, 1);
1262         chm_add_file_internal(chm, "/#URLTBL", urltbl.text, urltbl.pos,
1263                               &chm->content1, 1);
1264         chm_add_file_internal(chm, "/#URLSTR", urlstr.text, urlstr.pos,
1265                               &chm->content1, 1);
1266 
1267         /*
1268          * Write #IDXHDR (and its mirror in #SYSTEM), which we
1269          * couldn't do until we knew how many topic nodes there were.
1270          */
1271         {
1272             int idxhdr_start;
1273 
1274             rec = sys_start(&sysfile, 13);
1275             idxhdr_start = sysfile.pos;
1276 
1277             rdaddsc(&sysfile, "T#SM");     /* #IDXHDR magic */
1278             RDADD_32BIT_LSB_FIRST(&sysfile, 0x12345678); /* checksum? FIXME */
1279             RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */
1280             RDADD_32BIT_LSB_FIRST(&sysfile, index); /* number of topic nodes */
1281             RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
1282             RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no image list */
1283             RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
1284             RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* top-level node is
1285                                                  * not a folder */
1286             RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no bg colour */
1287             RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no fg colour */
1288             RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no font spec */
1289             RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no window style */
1290             RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no ex win style */
1291             RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* unknown */
1292             RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no frame name */
1293             RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no window name */
1294             RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no information types */
1295             RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */
1296             RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no merge files */
1297             RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
1298             while (sysfile.pos - idxhdr_start < 4096)
1299                 rdaddc(&sysfile, 0);
1300 
1301             chm_add_file_internal(chm, "/#IDXHDR", sysfile.text + idxhdr_start,
1302                                   sysfile.pos - idxhdr_start,
1303                                   &chm->content1, 1);
1304             sys_end(&sysfile, rec);
1305         }
1306 
1307         sfree(tocidx.text);
1308         sfree(topics.text);
1309         sfree(urltbl.text);
1310         sfree(urlstr.text);
1311     }
1312 
1313     /* Missing from #SYSTEM: */
1314     /* 10 (4-byte timestamp) */
1315     /* 6 (logical file name) */
1316 
1317     chm_add_file_internal(chm, "/#SYSTEM", sysfile.text, sysfile.pos,
1318                           &chm->content0, 0);
1319     sfree(sysfile.text);
1320 
1321     chm_add_file_internal(chm, "/#STRINGS", chm->stringsfile.text,
1322                           chm->stringsfile.pos, &chm->content1, 1);
1323 
1324     /*
1325      * ::DataSpace/NameList, giving the names of the two content sections.
1326      */
1327     {
1328         rdstringc dsnl = {0, 0, NULL};
1329         const char *p;
1330         int stringstart;
1331 
1332         RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* total file size; fill in later */
1333         RDADD_16BIT_LSB_FIRST(&dsnl, 2); /* number of names */
1334 
1335         RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* string length; fill in later */
1336         stringstart = dsnl.pos;
1337         for (p = "Uncompressed"; *p; p++)
1338             RDADD_16BIT_LSB_FIRST(&dsnl, *p);
1339         PUT_16BIT_LSB_FIRST(dsnl.text + stringstart - 2,
1340                             (dsnl.pos - stringstart) / 2);
1341         RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* NUL terminator */
1342 
1343         RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* string length; fill in later */
1344         stringstart = dsnl.pos;
1345         for (p = "MSCompressed"; *p; p++)
1346             RDADD_16BIT_LSB_FIRST(&dsnl, *p);
1347         PUT_16BIT_LSB_FIRST(dsnl.text + stringstart - 2,
1348                             (dsnl.pos - stringstart) / 2);
1349         RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* NUL terminator */
1350 
1351         PUT_16BIT_LSB_FIRST(dsnl.text, dsnl.pos / 2);
1352 
1353         chm_add_file_internal(chm, "::DataSpace/NameList", dsnl.text, dsnl.pos,
1354                               &chm->content0, 0);
1355 
1356         sfree(dsnl.text);
1357     }
1358 
1359     /*
1360      * Actually compress the compressed-data section, load the
1361      * compressed version of it into the containing uncompressed
1362      * section, and write the auxiliary files describing it.
1363      */
1364     {
1365         rdstringc rs = {0, 0, NULL};
1366         const char *p;
1367         int orig_decomp_size = chm->content1.pos;
1368         size_t i;
1369 
1370         while (chm->content1.pos & 0x7FFF)
1371             rdaddc(&chm->content1, 0); /* pad to a realign-interval boundary */
1372         ef = lzx(chm->content1.text, chm->content1.pos, 0x8000, 0x10000);
1373         chm_add_file_internal(
1374             chm, "::DataSpace/Storage/MSCompressed/Content",
1375             (char *)ef->data, ef->data_len, &chm->content0, 0);
1376 
1377         for (p = "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}"; *p; p++)
1378             RDADD_16BIT_LSB_FIRST(&rs, *p);
1379         rs.pos = 0x26; /* this file is always written truncated :-) */
1380         chm_add_file_internal(
1381             chm, "::DataSpace/Storage/MSCompressed/Transform/List",
1382             rs.text, rs.pos, &chm->content0, 0);
1383         rs.pos = 0;
1384 
1385         RDADD_32BIT_LSB_FIRST(&rs, orig_decomp_size);
1386         RDADD_32BIT_LSB_FIRST(&rs, 0); /* high word of 64-bit size */
1387         chm_add_file_internal(
1388             chm, "::DataSpace/Storage/MSCompressed/SpanInfo",
1389             rs.text, rs.pos, &chm->content0, 0);
1390         rs.pos = 0;
1391 
1392         RDADD_32BIT_LSB_FIRST(&rs, 6); /* file size */
1393         rdaddsc(&rs, "LZXC");          /* compression type identifier */
1394         RDADD_32BIT_LSB_FIRST(&rs, 2); /* version */
1395         RDADD_32BIT_LSB_FIRST(&rs, 2); /* reset interval in units of 2^15 */
1396         RDADD_32BIT_LSB_FIRST(&rs, 2); /* window size in units of 2^15 */
1397         RDADD_32BIT_LSB_FIRST(&rs, 1); /* reset interval multiplier */
1398         RDADD_32BIT_LSB_FIRST(&rs, 0); /* unknown */
1399         chm_add_file_internal(
1400             chm, "::DataSpace/Storage/MSCompressed/ControlData",
1401             rs.text, rs.pos, &chm->content0, 0);
1402         rs.pos = 0;
1403 
1404         RDADD_32BIT_LSB_FIRST(&rs, 2); /* unknown (version number?) */
1405         RDADD_32BIT_LSB_FIRST(&rs, ef->n_resets); /* reset table length */
1406         RDADD_32BIT_LSB_FIRST(&rs, 8); /* reset table entry size */
1407         RDADD_32BIT_LSB_FIRST(&rs, 0x28); /* reset table offset */
1408         RDADD_32BIT_LSB_FIRST(&rs, orig_decomp_size); /* uncompressed len */
1409         RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
1410         RDADD_32BIT_LSB_FIRST(&rs, ef->data_len); /* compressed len */
1411         RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
1412         RDADD_32BIT_LSB_FIRST(&rs, 0x8000); /* realign interval */
1413         RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
1414         for (i = 0; i < ef->n_resets; i++) {
1415             RDADD_32BIT_LSB_FIRST(&rs, ef->reset_byte_offsets[i]);
1416             RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
1417         }
1418         chm_add_file_internal(
1419             chm, "::DataSpace/Storage/MSCompressed/Transform/"
1420             "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable",
1421             rs.text, rs.pos, &chm->content0, 0);
1422         rs.pos = 0;
1423     }
1424 
1425     sfree(ef->data);
1426     sfree(ef->reset_byte_offsets);
1427     sfree(ef);
1428 
1429     directory(&dir, chm->files);
1430     itsf(&chm->outfile, &dir, &chm->content0);
1431     sfree(dir.text);
1432 
1433     assert(outlen);
1434     *outlen = chm->outfile.pos;
1435     return chm->outfile.text;
1436 }
1437