1 /*
2 * winchm.c: direct output of .CHM files.
3 */
4
5 #include <assert.h>
6 #include <stdio.h>
7
8 #include "halibut.h"
9 #include "tree234.h"
10 #include "lzx.h"
11
12 #define PUT_32BIT_LSB_FIRST(cp, value) do { \
13 ((unsigned char *)cp)[0] = 0xFF & (value); \
14 ((unsigned char *)cp)[1] = 0xFF & ((value) >> 8); \
15 ((unsigned char *)cp)[2] = 0xFF & ((value) >> 16); \
16 ((unsigned char *)cp)[3] = 0xFF & ((value) >> 24); } while (0)
17
18 #define PUT_32BIT_MSB_FIRST(cp, value) do { \
19 ((unsigned char *)cp)[3] = 0xFF & (value); \
20 ((unsigned char *)cp)[2] = 0xFF & ((value) >> 8); \
21 ((unsigned char *)cp)[1] = 0xFF & ((value) >> 16); \
22 ((unsigned char *)cp)[0] = 0xFF & ((value) >> 24); } while (0)
23
24 #define PUT_16BIT_LSB_FIRST(cp, value) do { \
25 ((unsigned char *)cp)[0] = 0xFF & (value); \
26 ((unsigned char *)cp)[1] = 0xFF & ((value) >> 8); } while (0)
27
28 #define RDADD_32BIT_LSB_FIRST(rs, value) do { \
29 unsigned char out[4]; \
30 PUT_32BIT_LSB_FIRST(out, value); \
31 rdaddsn(rs, (void *)out, sizeof(out)); \
32 } while (0)
33
34 #define RDADD_32BIT_MSB_FIRST(rs, value) do { \
35 unsigned char out[4]; \
36 PUT_32BIT_MSB_FIRST(out, value); \
37 rdaddsn(rs, (void *)out, sizeof(out)); \
38 } while (0)
39
40 #define RDADD_16BIT_LSB_FIRST(rs, value) do { \
41 unsigned char out[2]; \
42 PUT_16BIT_LSB_FIRST(out, value); \
43 rdaddsn(rs, (void *)out, sizeof(out)); \
44 } while (0)
45
guid(rdstringc * rs,unsigned long w0,unsigned short h0,unsigned short h1,unsigned char b0,unsigned char b1,unsigned char b2,unsigned char b3,unsigned char b4,unsigned char b5,unsigned char b6,unsigned char b7)46 static void guid(rdstringc *rs, unsigned long w0,
47 unsigned short h0, unsigned short h1,
48 unsigned char b0, unsigned char b1,
49 unsigned char b2, unsigned char b3,
50 unsigned char b4, unsigned char b5,
51 unsigned char b6, unsigned char b7)
52 {
53 RDADD_32BIT_LSB_FIRST(rs, w0);
54 RDADD_16BIT_LSB_FIRST(rs, h0);
55 RDADD_16BIT_LSB_FIRST(rs, h1);
56 rdaddc(rs, b0);
57 rdaddc(rs, b1);
58 rdaddc(rs, b2);
59 rdaddc(rs, b3);
60 rdaddc(rs, b4);
61 rdaddc(rs, b5);
62 rdaddc(rs, b6);
63 rdaddc(rs, b7);
64 }
65
itsf(rdstringc * rs,const rdstringc * directory,const rdstringc * content0)66 static void itsf(rdstringc *rs,
67 const rdstringc *directory, const rdstringc *content0)
68 {
69 int headersize_field;
70 int headersect_off, headersect_off_field, headersect_size_field;
71 int directory_off_field, content0_off_field, filesize_field;
72
73 /* Main file header */
74 rdaddsc(rs, "ITSF"); /* main file magic number */
75 RDADD_32BIT_LSB_FIRST(rs, 3); /* file format version */
76 headersize_field = rs->pos;
77 RDADD_32BIT_LSB_FIRST(rs, 0); /* size of main header; fill in later */
78 RDADD_32BIT_LSB_FIRST(rs, 1); /* unknown, always observed to be 1 */
79 RDADD_32BIT_MSB_FIRST(rs, 0x12345678); /* timestamp (FIXME) */
80 RDADD_32BIT_LSB_FIRST(rs, 0x809); /* language code (FIXME: configurable) */
81 guid(rs,0x7C01FD10,0x7BAA,0x11D0,0x9E,0x0C,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
82 guid(rs,0x7C01FD11,0x7BAA,0x11D0,0x9E,0x0C,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
83 headersect_off_field = rs->pos;
84 RDADD_32BIT_LSB_FIRST(rs, 0); /* header section offset; fill in later */
85 RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
86 headersect_size_field = rs->pos;
87 RDADD_32BIT_LSB_FIRST(rs, 0); /* header section size; fill in later */
88 RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
89 directory_off_field = rs->pos;
90 RDADD_32BIT_LSB_FIRST(rs, 0); /* directory offset; fill in later */
91 RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
92 RDADD_32BIT_LSB_FIRST(rs, directory->pos);
93 RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
94 content0_off_field = rs->pos;
95 RDADD_32BIT_LSB_FIRST(rs, 0); /* content section 0 offset; fill in later */
96 RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
97 PUT_32BIT_LSB_FIRST(rs->text + headersize_field, rs->pos);
98
99 /* 'Header section' */
100 headersect_off = rs->pos;
101 PUT_32BIT_LSB_FIRST(rs->text + headersect_off_field, rs->pos);
102 RDADD_32BIT_LSB_FIRST(rs, 0x1FE); /* magic number */
103 RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
104 filesize_field = rs->pos;
105 RDADD_32BIT_LSB_FIRST(rs, 0); /* file size; fill in later */
106 RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
107 RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
108 RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
109 PUT_32BIT_LSB_FIRST(rs->text + headersect_size_field,
110 rs->pos - headersect_off);
111
112 PUT_32BIT_LSB_FIRST(rs->text + directory_off_field, rs->pos);
113 rdaddsn(rs, directory->text, directory->pos);
114
115 PUT_32BIT_LSB_FIRST(rs->text + content0_off_field, rs->pos);
116 rdaddsn(rs, content0->text, content0->pos);
117
118 PUT_32BIT_LSB_FIRST(rs->text + filesize_field, rs->pos);
119 }
120
encint(rdstringc * rs,unsigned val)121 static void encint(rdstringc *rs, unsigned val)
122 {
123 int i, j, topbit;
124
125 /* ENCINT in the CHM format is big-endian, but it's easier to
126 * write little-endian and byte-reverse afterwards. */
127
128 i = rs->pos; /* first byte index */
129
130 topbit = 0;
131 while (val >= 0x80) {
132 rdaddc(rs, (val & 0x7F) | topbit);
133 val >>= 7;
134 topbit = 0x80;
135 }
136
137 j = rs->pos; /* last byte index */
138 rdaddc(rs, val | topbit);
139
140 while (j > i) {
141 char tmp = rs->text[i];
142 rs->text[i] = rs->text[j];
143 rs->text[j] = tmp;
144 i++;
145 j--;
146 }
147 }
148
149 struct chm_directory_entry {
150 char *filename; /* free this when done */
151 int which_content_section;
152 int offset_in_content_section;
153 int file_size;
154 };
155
strcmp_chm(const char * a,const char * b)156 static int strcmp_chm(const char *a, const char *b)
157 {
158 /*
159 * CHM directory sorting criterion appears to be case-insensitive,
160 * and based on sorting the _lowercased_ text. (Hence, in
161 * particular, '_' sorts before any alphabetic character.)
162 */
163 while (*a || *b) {
164 char ac = *a, bc = *b;
165 if (ac >= 'A' && ac <= 'Z') ac += 'a'-'A';
166 if (bc >= 'A' && bc <= 'Z') bc += 'a'-'A';
167 if (ac != bc)
168 return ac < bc ? -1 : +1;
169 a++;
170 b++;
171 }
172
173 return 0;
174 }
175
chm_directory_entry_cmp(void * av,void * bv)176 int chm_directory_entry_cmp(void *av, void *bv)
177 {
178 const struct chm_directory_entry
179 *a = (const struct chm_directory_entry *)av,
180 *b = (const struct chm_directory_entry *)bv;
181 return strcmp_chm(a->filename, b->filename);
182 }
183
chm_directory_entry_find(void * av,void * bv)184 int chm_directory_entry_find(void *av, void *bv)
185 {
186 const char *a = (const char *)av;
187 const struct chm_directory_entry
188 *b = (const struct chm_directory_entry *)bv;
189 return strcmp_chm(a, b->filename);
190 }
191
192 struct chm_index_entry {
193 char *first_filename; /* shared pointer with some chm_directory_entry */
194 int chunk_index;
195 };
196
directory(rdstringc * rs,tree234 * files)197 static void directory(rdstringc *rs, tree234 *files)
198 {
199 const int chunksize = 4096;
200 const int encoded_density = 2;
201 const int useful_density = 1 + (1 << encoded_density);
202 int dirhdr_size_field, dirhdr_size2_field, dirhdr_depth_field;
203 int dirhdr_root_field, dirhdr_tail_field, dirhdr_nchunks_field;
204 int curr_chunk, depth, filename_index;
205 tree234 *index;
206
207 assert(rs->pos == 0);
208 assert(count234(files) > 0);
209
210 /* Directory header */
211 rdaddsc(rs, "ITSP"); /* directory header magic number */
212 RDADD_32BIT_LSB_FIRST(rs, 1); /* format version */
213 dirhdr_size_field = rs->pos;
214 RDADD_32BIT_LSB_FIRST(rs, 0); /* directory header size; fill in later */
215 RDADD_32BIT_LSB_FIRST(rs, 10); /* unknown; observed to be 10 */
216 RDADD_32BIT_LSB_FIRST(rs, chunksize);
217 RDADD_32BIT_LSB_FIRST(rs, encoded_density);
218 dirhdr_depth_field = rs->pos;
219 RDADD_32BIT_LSB_FIRST(rs, 0); /* B-tree depth; fill in later */
220 dirhdr_root_field = rs->pos;
221 RDADD_32BIT_LSB_FIRST(rs, 0); /* root chunk index; fill in later */
222 RDADD_32BIT_LSB_FIRST(rs, 0); /* head of PMGL chunk list; always 0 here */
223 dirhdr_tail_field = rs->pos;
224 RDADD_32BIT_LSB_FIRST(rs, 0); /* tail of PMGL chunk list; fill in later */
225 RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
226 dirhdr_nchunks_field = rs->pos;
227 RDADD_32BIT_LSB_FIRST(rs, 0); /* total number of chunks; fill in later */
228 RDADD_32BIT_LSB_FIRST(rs, 0x409); /* language (FIXME) */
229 guid(rs,0x5D02926A,0x212E,0x11D0,0x9D,0xF9,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
230 dirhdr_size2_field = rs->pos;
231 RDADD_32BIT_LSB_FIRST(rs, 0); /* directory header size; fill in later */
232 RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
233 RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
234 RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
235 PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size_field, rs->pos);
236 PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size2_field, rs->pos);
237
238 index = newtree234(NULL);
239 curr_chunk = 0;
240 depth = 1;
241 /* Write out lowest-level PMGL chunks full of actual directory entries */
242 filename_index = 0;
243 while (filename_index < count234(files)) {
244 rdstringc chunk = {0, 0, NULL};
245 rdstringc reversed_quickref = {0, 0, NULL};
246 int chunk_endlen_field, chunk_nextptr_field;
247 int n_entries, offset_of_first_entry;
248 int saved_pos, saved_rq_pos, i;
249
250 rdaddsc(&chunk, "PMGL");
251 chunk_endlen_field = chunk.pos;
252 RDADD_32BIT_LSB_FIRST(&chunk, 0); /* space at end; fill in later */
253 RDADD_32BIT_LSB_FIRST(&chunk, 0); /* unknown; observed to be 0 */
254 if (curr_chunk == 0) {
255 RDADD_32BIT_LSB_FIRST(&chunk, 0xFFFFFFFF); /* 'null' prev ptr */
256 } else {
257 RDADD_32BIT_LSB_FIRST(&chunk, curr_chunk - 1);
258 }
259 chunk_nextptr_field = chunk.pos; /* may overwrite 'next' ptr later */
260 RDADD_32BIT_LSB_FIRST(&chunk, curr_chunk + 1);
261
262 /* Enter this chunk in our index for the next level of the
263 * B-tree (if we end up needing one). */
264 {
265 struct chm_directory_entry *ent = (struct chm_directory_entry *)
266 index234(files, filename_index);
267 struct chm_index_entry *ient = snew(struct chm_index_entry);
268 assert(ent);
269 ient->first_filename = ent->filename;
270 ient->chunk_index = curr_chunk;
271 addpos234(index, ient, count234(index));
272 }
273
274 /* Start accumulating the quick-reference index at the end of this
275 * chunk. We'll build it up backwards, and reverse it halfwordwise
276 * when we copy it into the end of our output chunk. */
277 RDADD_16BIT_LSB_FIRST(&reversed_quickref, 0);
278 offset_of_first_entry = chunk.pos;
279
280 n_entries = 0;
281 /* Write filenames into this chunk until it's full, or until
282 * we run out of filenames. */
283 while (1) {
284 struct chm_directory_entry *ent = (struct chm_directory_entry *)
285 index234(files, filename_index++);
286 if (!ent) {
287 /* Run out of filenames, so this is the last PMGL chunk.
288 * Reset its 'next' pointer to the 'null' -1 value. */
289 PUT_32BIT_LSB_FIRST(chunk.text + chunk_nextptr_field,
290 0xFFFFFFFFU);
291 /* And point the directory header's tail pointer at
292 * this chunk. */
293 PUT_32BIT_LSB_FIRST(rs->text + dirhdr_tail_field, curr_chunk);
294 break;
295 }
296
297 /* Save the sizes of stuff in this chunk, so we can put
298 * them back if this entry turns out to overflow. */
299 saved_pos = chunk.pos;
300 saved_rq_pos = reversed_quickref.pos;
301
302 if (n_entries > 0 && n_entries % useful_density == 0) {
303 /* Add a quick-reference index pointer. */
304 RDADD_16BIT_LSB_FIRST(&reversed_quickref, chunk.pos -
305 offset_of_first_entry);
306 }
307
308 encint(&chunk, strlen(ent->filename));
309 rdaddsc(&chunk, ent->filename);
310 encint(&chunk, ent->which_content_section);
311 encint(&chunk, ent->offset_in_content_section);
312 encint(&chunk, ent->file_size);
313 if (chunk.pos + reversed_quickref.pos > chunksize) {
314 filename_index--;
315 chunk.pos = saved_pos;
316 reversed_quickref.pos = saved_rq_pos;
317 break;
318 }
319
320 /* If we didn't overflow, then commit to this entry and
321 * loop round for the next one. */
322 n_entries++;
323 }
324
325 /* Finalise the chunk. */
326 assert(chunk.pos + reversed_quickref.pos <= chunksize);
327 PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field,
328 chunksize - chunk.pos);
329 PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries);
330 while (chunk.pos + reversed_quickref.pos < chunksize)
331 rdaddc(&chunk, 0); /* zero-pad */
332 for (i = reversed_quickref.pos - 2; i >= 0; i -= 2)
333 rdaddsn(&chunk, reversed_quickref.text+i, 2);
334
335 assert(chunk.pos == chunksize);
336 rdaddsn(rs, chunk.text, chunk.pos);
337 sfree(chunk.text);
338 sfree(reversed_quickref.text);
339 curr_chunk++;
340 }
341
342 /* Write out as many layers of PMGI index chunks as it takes to
343 * reduce the total number of chunks at the current level to 1. */
344 while (count234(index) > 1) {
345 tree234 *prev_index;
346 int index_index = 0;
347
348 prev_index = index;
349 index = newtree234(NULL);
350 depth++;
351
352 while (index_index < count234(prev_index)) {
353 rdstringc chunk = {0, 0, NULL};
354 rdstringc reversed_quickref = {0, 0, NULL};
355 int chunk_endlen_field;
356 int n_entries, offset_of_first_entry;
357 int saved_pos, saved_rq_pos, i;
358
359 rdaddsc(&chunk, "PMGI");
360 chunk_endlen_field = chunk.pos;
361 RDADD_32BIT_LSB_FIRST(&chunk, 0); /* space at end; fill in later */
362
363 /* Enter this chunk in our index for the next level of the
364 * B-tree (if we end up needing one). */
365 {
366 struct chm_index_entry *ent = (struct chm_index_entry *)
367 index234(prev_index, index_index);
368 struct chm_index_entry *ient = snew(struct chm_index_entry);
369 assert(ent);
370 ient->first_filename = ent->first_filename;
371 ient->chunk_index = curr_chunk;
372 addpos234(index, ient, count234(index));
373 }
374
375 /* Start accumulating the quick-reference index at the end
376 * of this chunk, as above. */
377 RDADD_16BIT_LSB_FIRST(&reversed_quickref, 0);
378 offset_of_first_entry = chunk.pos;
379
380 n_entries = 0;
381 /* Write index entries into this chunk until it's full, or
382 * until we run out of chunks at the previous level. */
383 while (1) {
384 struct chm_index_entry *ent = (struct chm_index_entry *)
385 index234(prev_index, index_index++);
386 if (!ent)
387 break;
388
389 /* Save the sizes of stuff in this chunk, so we can put
390 * them back if this entry turns out to overflow. */
391 saved_pos = chunk.pos;
392 saved_rq_pos = reversed_quickref.pos;
393
394 if (n_entries > 0 && n_entries % useful_density == 0) {
395 /* Add a quick-reference index pointer. */
396 RDADD_16BIT_LSB_FIRST(&reversed_quickref, chunk.pos -
397 offset_of_first_entry);
398 }
399
400 encint(&chunk, strlen(ent->first_filename));
401 rdaddsc(&chunk, ent->first_filename);
402 encint(&chunk, ent->chunk_index);
403 if (chunk.pos + reversed_quickref.pos > chunksize) {
404 index_index--;
405 chunk.pos = saved_pos;
406 reversed_quickref.pos = saved_rq_pos;
407 break;
408 }
409
410 /* If we didn't overflow, then commit to this entry and
411 * loop round for the next one. */
412 n_entries++;
413 }
414
415 /* Finalise the chunk. */
416 assert(chunk.pos + reversed_quickref.pos <= chunksize);
417 PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field,
418 chunksize - chunk.pos);
419 PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries);
420 while (chunk.pos + reversed_quickref.pos < chunksize)
421 rdaddc(&chunk, 0); /* zero-pad */
422 for (i = reversed_quickref.pos - 2; i >= 0; i -= 2)
423 rdaddsn(&chunk, reversed_quickref.text+i, 2);
424
425 assert(chunk.pos == chunksize);
426 rdaddsn(rs, chunk.text, chunk.pos);
427 sfree(chunk.text);
428 sfree(reversed_quickref.text);
429 curr_chunk++;
430 }
431
432 /*
433 * Now free the old index.
434 */
435 while (1) {
436 struct chm_index_entry *ent = (struct chm_index_entry *)
437 delpos234(prev_index, 0);
438 if (!ent)
439 break;
440 sfree(ent);
441 }
442 freetree234(prev_index);
443 }
444
445 /*
446 * Finished! We've reduced to a single chunk. Free the remaining
447 * index (which must have size 1).
448 */
449 assert(count234(index) == 1);
450 sfree(delpos234(index, 0));
451 freetree234(index);
452
453 /* Fill in the deferred fields in the main header. */
454 PUT_32BIT_LSB_FIRST(rs->text + dirhdr_depth_field, depth);
455 PUT_32BIT_LSB_FIRST(rs->text + dirhdr_root_field, curr_chunk-1);
456 PUT_32BIT_LSB_FIRST(rs->text + dirhdr_nchunks_field, curr_chunk);
457 }
458
sys_start(rdstringc * rs,int code)459 static int sys_start(rdstringc *rs, int code)
460 {
461 int toret = rs->pos;
462 RDADD_16BIT_LSB_FIRST(rs, code);
463 RDADD_16BIT_LSB_FIRST(rs, 0); /* length; overwrite later */
464 return toret;
465 }
sys_end(rdstringc * rs,int recstart)466 static void sys_end(rdstringc *rs, int recstart)
467 {
468 PUT_16BIT_LSB_FIRST(rs->text + recstart+2, rs->pos - (recstart+4));
469 }
470
471 struct chm_window {
472 char *name;
473 char *title;
474 char *contentsfile;
475 char *indexfile;
476 char *rootfile;
477 int navpaneflags;
478 int toolbarflags;
479 };
480
481 struct chm {
482 tree234 *files;
483 tree234 *windows;
484 tree234 *stringtab;
485 rdstringc content0; /* outer uncompressed container */
486 rdstringc content1; /* compressed subfile */
487 rdstringc outfile;
488 rdstringc stringsfile;
489 char *title, *contents_filename, *index_filename, *default_topic;
490 char *default_window;
491 struct chm_section *rootsecthead, *rootsecttail;
492 struct chm_section *allsecthead, *allsecttail;
493 };
494
495 struct chm_section {
496 /* Logical links within the section tree structure */
497 struct chm_section *firstchild, *lastchild, *nextsibling, *parent;
498 /* Link all chm_sections together into one big list, in a
499 * topological order (i.e. every section comes after its
500 * parent) */
501 struct chm_section *next;
502
503 char *title, *url;
504 int tocidx_offset_1, tocidx_offset_2;
505 int topic_index, urltbl_offset, urlstr_offset;
506 };
507
508 struct chm_stringtab_entry {
509 struct chm *chm;
510 int strtab_offset;
511 };
512
chm_stringtab_cmp(void * av,void * bv)513 static int chm_stringtab_cmp(void *av, void *bv)
514 {
515 const struct chm_stringtab_entry
516 *a = (const struct chm_stringtab_entry *)av,
517 *b = (const struct chm_stringtab_entry *)bv;
518 return strcmp(a->chm->stringsfile.text + a->strtab_offset,
519 b->chm->stringsfile.text + b->strtab_offset);
520 }
521
chm_stringtab_find(void * av,void * bv)522 static int chm_stringtab_find(void *av, void *bv)
523 {
524 const char *a = (const char *)av;
525 const struct chm_stringtab_entry
526 *b = (const struct chm_stringtab_entry *)bv;
527 return strcmp(a, b->chm->stringsfile.text + b->strtab_offset);
528 }
529
chm_intern_string(struct chm * chm,const char * string)530 int chm_intern_string(struct chm *chm, const char *string)
531 {
532 struct chm_stringtab_entry *ent;
533 int size;
534
535 if (!string)
536 return 0;
537
538 if ((ent = (struct chm_stringtab_entry *)find234(
539 chm->stringtab, (void *)string, chm_stringtab_find)) == NULL) {
540 ent = snew(struct chm_stringtab_entry);
541 ent->chm = chm;
542
543 /* Pad to ensure the string doesn't cross a page boundary. */
544 size = strlen(string) + 1; /* include the NUL terminator */
545 assert(size < 0x1000); /* avoid really serious trouble */
546 while ((chm->stringsfile.pos ^ (chm->stringsfile.pos + size-1)) >> 12)
547 rdaddc(&chm->stringsfile, 0);
548
549 ent->strtab_offset = chm->stringsfile.pos;
550 rdaddsc(&chm->stringsfile, string);
551 rdaddc(&chm->stringsfile, '\0');
552 add234(chm->stringtab, ent);
553 }
554 return ent->strtab_offset;
555 }
556
chm_new(void)557 struct chm *chm_new(void)
558 {
559 struct chm *chm = snew(struct chm);
560 chm->files = newtree234(chm_directory_entry_cmp);
561 chm->windows = newtree234(NULL);
562 chm->stringtab = newtree234(chm_stringtab_cmp);
563 chm->content0 = empty_rdstringc;
564 chm->content1 = empty_rdstringc;
565 chm->outfile = empty_rdstringc;
566 chm->stringsfile = empty_rdstringc;
567 chm->title = NULL;
568 chm->contents_filename = NULL;
569 chm->index_filename = NULL;
570 chm->default_topic = NULL;
571 chm->default_window = NULL;
572 chm->rootsecthead = chm->rootsecttail = NULL;
573 chm->allsecthead = chm->allsecttail = NULL;
574 chm_intern_string(chm, ""); /* preinitialise the strings table */
575 return chm;
576 }
577
chm_free(struct chm * chm)578 void chm_free(struct chm *chm)
579 {
580 struct chm_directory_entry *ent;
581 struct chm_window *win;
582 struct chm_stringtab_entry *str;
583 struct chm_section *sect;
584
585 while ((ent = delpos234(chm->files, 0)) != NULL) {
586 sfree(ent->filename);
587 sfree(ent);
588 }
589 freetree234(chm->files);
590
591 while ((win = delpos234(chm->windows, 0)) != NULL) {
592 sfree(win->name);
593 sfree(win->title);
594 sfree(win->contentsfile);
595 sfree(win->indexfile);
596 sfree(win->rootfile);
597 sfree(win);
598 }
599 freetree234(chm->windows);
600
601 while ((str = delpos234(chm->stringtab, 0)) != NULL) {
602 sfree(str);
603 }
604 freetree234(chm->stringtab);
605
606 for (sect = chm->allsecthead; sect ;) {
607 struct chm_section *tmp = sect->next;
608 sfree(sect->title);
609 sfree(sect->url);
610 sfree(sect);
611 sect = tmp;
612 }
613
614 sfree(chm->content0.text);
615 sfree(chm->content1.text);
616 sfree(chm->outfile.text);
617 sfree(chm->stringsfile.text);
618
619 sfree(chm->title);
620 sfree(chm->contents_filename);
621 sfree(chm->index_filename);
622 sfree(chm->default_topic);
623 sfree(chm->default_window);
624
625 sfree(chm);
626 }
627
chm_add_file_internal(struct chm * chm,const char * name,const char * data,int len,rdstringc * sect,int which_sect)628 static void chm_add_file_internal(struct chm *chm, const char *name,
629 const char *data, int len,
630 rdstringc *sect, int which_sect)
631 {
632 struct chm_directory_entry *ent = snew(struct chm_directory_entry);
633 ent->filename = dupstr(name);
634 ent->which_content_section = which_sect;
635 ent->offset_in_content_section = sect->pos;
636 ent->file_size = len;
637 add234(chm->files, ent);
638 rdaddsn(sect, data, len);
639 }
640
chm_find_file(struct chm * chm,const char * name)641 static struct chm_directory_entry *chm_find_file(
642 struct chm *chm, const char *name)
643 {
644 return find234(chm->files, (void *)name, chm_directory_entry_find);
645 }
646
add_leading_slash(const char * str)647 static char *add_leading_slash(const char *str)
648 {
649 char *toret = snewn(2 + strlen(str), char);
650 toret[0] = '/';
651 strcpy(toret+1, str);
652 return toret;
653 }
654
chm_add_file(struct chm * chm,const char * name,const char * data,int len)655 void chm_add_file(struct chm *chm, const char *name, const char *data, int len)
656 {
657 char *name_with_slash = add_leading_slash(name);
658 chm_add_file_internal(chm, name_with_slash, data, len, &chm->content1, 1);
659 sfree(name_with_slash);
660 }
661
chm_title(struct chm * chm,const char * title)662 void chm_title(struct chm *chm, const char *title)
663 {
664 chm->title = dupstr(title);
665 }
666
chm_contents_filename(struct chm * chm,const char * name)667 void chm_contents_filename(struct chm *chm, const char *name)
668 {
669 chm->contents_filename = dupstr(name);
670 }
671
chm_index_filename(struct chm * chm,const char * name)672 void chm_index_filename(struct chm *chm, const char *name)
673 {
674 chm->index_filename = dupstr(name);
675 }
676
chm_default_topic(struct chm * chm,const char * name)677 void chm_default_topic(struct chm *chm, const char *name)
678 {
679 chm->default_topic = dupstr(name);
680 }
681
chm_default_window(struct chm * chm,const char * name)682 void chm_default_window(struct chm *chm, const char *name)
683 {
684 chm->default_window = dupstr(name);
685 }
686
chm_add_window(struct chm * chm,const char * winname,const char * title,const char * contentsfile,const char * indexfile,const char * rootfile,int navpaneflags,int toolbarflags)687 void chm_add_window(struct chm *chm, const char *winname, const char *title,
688 const char *contentsfile, const char *indexfile,
689 const char *rootfile, int navpaneflags, int toolbarflags)
690 {
691 struct chm_window *win = snew(struct chm_window);
692 win->name = dupstr(winname);
693 win->title = dupstr(title);
694 win->contentsfile = contentsfile ? dupstr(contentsfile) : NULL;
695 win->indexfile = indexfile ? dupstr(indexfile) : NULL;
696 win->rootfile = dupstr(rootfile);
697 win->navpaneflags = navpaneflags;
698 win->toolbarflags = toolbarflags;
699 addpos234(chm->windows, win, count234(chm->windows));
700 }
701
chm_add_section(struct chm * chm,struct chm_section * parent,const char * title,const char * url)702 struct chm_section *chm_add_section(struct chm *chm,
703 struct chm_section *parent,
704 const char *title, const char *url)
705 {
706 struct chm_section *sect = snew(struct chm_section);
707 sect->title = dupstr(title);
708 sect->url = dupstr(url);
709 sect->firstchild = sect->lastchild = sect->nextsibling = sect->next = NULL;
710 if (parent) {
711 sect->parent = parent;
712 if (parent->lastchild) {
713 parent->lastchild->nextsibling = sect;
714 } else {
715 parent->firstchild = sect;
716 }
717 parent->lastchild = sect;
718 } else {
719 sect->parent = NULL;
720 if (chm->rootsecttail) {
721 chm->rootsecttail->nextsibling = sect;
722 } else {
723 chm->rootsecthead = sect;
724 }
725 chm->rootsecttail = sect;
726 }
727 if (chm->allsecttail) {
728 chm->allsecttail->next = sect;
729 } else {
730 chm->allsecthead = sect;
731 }
732 chm->allsecttail = sect;
733 return sect;
734 }
735
736 struct chm_urltbl_entry {
737 /*
738 * Records of #URLTBL, before their order is finalised.
739 *
740 * The first word of this record is listed as 'unknown, perhaps
741 * some kind of unique ID' in chmspec. But my observation in HTML
742 * Help Workshop's output is that it's actually a hash of the
743 * target URL, and the file is sorted by them. chm_url_hash()
744 * below implements the hash algorithm.
745 */
746 unsigned long hash;
747 int topic_index;
748 int urlstr_pos;
749 int topics_offset_to_update;
750 };
751
chm_urltbl_entry_cmp(void * av,void * bv)752 int chm_urltbl_entry_cmp(void *av, void *bv)
753 {
754 const struct chm_urltbl_entry
755 *a = (const struct chm_urltbl_entry *)av,
756 *b = (const struct chm_urltbl_entry *)bv;
757 if (a->hash < b->hash) return -1;
758 if (a->hash > b->hash) return +1;
759 if (a->topic_index < b->topic_index) return -1;
760 if (a->topic_index > b->topic_index) return -1;
761 return 0;
762 }
763
chm_url_hash(const char * str)764 static unsigned long chm_url_hash(const char *str)
765 {
766 const char *p;
767 unsigned long hash;
768
769 hash = 0;
770 for (p = str; *p; p++) {
771 /*
772 * Multiply `hash' by 43.
773 */
774 {
775 unsigned long bottom, top;
776 bottom = (hash & 0xFFFFUL) * 43;
777 top = ((hash >> 16) & 0xFFFFUL) * 43;
778 top += (bottom >> 16);
779 bottom &= 0xFFFFUL;
780 top &= 0xFFFFUL;
781 hash = (top << 16) | bottom;
782 }
783
784 /*
785 * Add the mapping value for this byte to `hash'.
786 */
787 {
788 int c = (signed char)*p;
789
790 /*
791 * Translation rule determined by getting hhc.exe to hash
792 * a lot of strings and analysing the results. I was able
793 * to confirm this mapping rule for all byte values except
794 * for NUL, CR, LF, ^Z and backslash: the first four of
795 * those I couldn't find any way to get hhc to insert into
796 * a URL, and the last one is automatically translated
797 * into '/', presumably for reasons of Windows vs URI path
798 * syntax normalisation.
799 */
800 int val = (c == '/' ? 0x2c : c <= 'Z' ? c-0x30 : c-0x50);
801
802 if (val > 0 && hash > (0xFFFFFFFFUL - val)) {
803 hash -= (0xFFFFFFFFUL - val) + 1;
804 } else if (val < 0 && hash < (unsigned long)-val) {
805 hash += (0xFFFFFFFFUL + val) + 1;
806 } else
807 hash += val;
808 }
809 }
810
811 /*
812 * Special case: an output hash of 0 is turned into 1, which I
813 * conjecture is so that in some context or other 0 can be
814 * reserved to mean something like 'null' or 'no hash value
815 * available'.
816 */
817 if (hash == 0)
818 hash = 1;
819
820 return hash;
821 }
822
chm_build(struct chm * chm,int * outlen)823 const char *chm_build(struct chm *chm, int *outlen)
824 {
825 rdstringc dir = {0, 0, NULL};
826 rdstringc sysfile = {0, 0, NULL};
827 struct LZXEncodedFile *ef;
828 int rec;
829
830 chm_add_file_internal(chm, "/", "", 0, &chm->content0, 0);
831
832 RDADD_32BIT_LSB_FIRST(&sysfile, 3); /* #SYSTEM file version */
833
834 rec = sys_start(&sysfile, 9); /* identify CHM-producing tool */
835 rdaddsc(&sysfile, "Halibut, ");
836 rdaddsc(&sysfile, version);
837 rdaddc(&sysfile, '\0');
838 sys_end(&sysfile, rec);
839
840 rec = sys_start(&sysfile, 12); /* number of 'information types' */
841 RDADD_32BIT_LSB_FIRST(&sysfile, 0);
842 sys_end(&sysfile, rec);
843 rec = sys_start(&sysfile, 15); /* checksum of 'information types' */
844 RDADD_32BIT_LSB_FIRST(&sysfile, 0);
845 sys_end(&sysfile, rec);
846 /* actual section of 'information types', whatever those might be */
847 chm_add_file_internal(chm, "/#ITBITS", "", 0, &chm->content0, 0);
848
849 if (chm->title) {
850 rec = sys_start(&sysfile, 3); /* document title */
851 rdaddsc(&sysfile, chm->title);
852 rdaddc(&sysfile, '\0');
853 sys_end(&sysfile, rec);
854 }
855
856 if (chm->default_topic) {
857 rec = sys_start(&sysfile, 2);
858 rdaddsc(&sysfile, chm->default_topic);
859 rdaddc(&sysfile, '\0');
860 sys_end(&sysfile, rec);
861 }
862
863 if (chm->contents_filename) {
864 rec = sys_start(&sysfile, 0);
865 rdaddsc(&sysfile, chm->contents_filename);
866 rdaddc(&sysfile, '\0');
867 sys_end(&sysfile, rec);
868 }
869
870 if (chm->index_filename) {
871 rec = sys_start(&sysfile, 1);
872 rdaddsc(&sysfile, chm->index_filename);
873 rdaddc(&sysfile, '\0');
874 sys_end(&sysfile, rec);
875 }
876
877 if (chm->default_window) {
878 rec = sys_start(&sysfile, 5);
879 rdaddsc(&sysfile, chm->default_window);
880 rdaddc(&sysfile, '\0');
881 sys_end(&sysfile, rec);
882 }
883
884 rec = sys_start(&sysfile, 4);
885 RDADD_32BIT_LSB_FIRST(&sysfile, 0x809); /* language again (FIXME) */
886 RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* DBCS: off */
887 RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* full-text search: on */
888 RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no KLinks (whatever they are) */
889 RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no ALinks (whatever they are) */
890 RDADD_32BIT_LSB_FIRST(&sysfile, 0x11223344); /* timestamp LSW (FIXME) */
891 RDADD_32BIT_LSB_FIRST(&sysfile, 0x55667788); /* timestamp MSW (FIXME) */
892 RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
893 RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
894 sys_end(&sysfile, rec);
895
896 {
897 rdstringc winfile = {0, 0, NULL};
898 int i, j, s;
899 struct chm_window *win;
900
901 RDADD_32BIT_LSB_FIRST(&winfile, count234(chm->windows));
902 RDADD_32BIT_LSB_FIRST(&winfile, 196); /* size of each entry */
903 for (i = 0;
904 (win = (struct chm_window *)index234(chm->windows, i)) != NULL;
905 i++) {
906 RDADD_32BIT_LSB_FIRST(&winfile, 196); /* size of entry */
907 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* not Unicode */
908 s = chm_intern_string(chm, win->name);
909 RDADD_32BIT_LSB_FIRST(&winfile, s);
910 /* Bitmap of which fields are used: 2 means nav pane
911 * style, 0x200 means whether nav pane is initially
912 * closed, 0x400 means tab position */
913 RDADD_32BIT_LSB_FIRST(&winfile, 0x502);
914 /* Nav pane styles:
915 * 0x40000 = user can control window size/pos
916 * 0x20000 = advanced full-text search UI
917 * 0x00400 = include a search tab
918 * 0x00100 = keep contents/index in sync with current topic
919 * 0x00020 = three-pane window */
920 RDADD_32BIT_LSB_FIRST(&winfile, win->navpaneflags);
921 s = chm_intern_string(chm, win->title);
922 RDADD_32BIT_LSB_FIRST(&winfile, s);
923 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window styles */
924 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window ex styles */
925 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.left */
926 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.top */
927 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.right */
928 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.bottom */
929 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window show state */
930 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
931 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
932 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
933 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
934 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
935 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
936 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane width */
937 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.left */
938 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.top */
939 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.right */
940 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.bottom */
941 s = chm_intern_string(chm, win->contentsfile);
942 RDADD_32BIT_LSB_FIRST(&winfile, s);
943 s = chm_intern_string(chm, win->indexfile);
944 RDADD_32BIT_LSB_FIRST(&winfile, s);
945 s = chm_intern_string(chm, win->rootfile);
946 RDADD_32BIT_LSB_FIRST(&winfile, s);
947 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Home button target */
948 RDADD_32BIT_LSB_FIRST(&winfile, win->toolbarflags);
949 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane initially open */
950 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* default nav pane = TOC */
951 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane tabs at top */
952 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* WM_NOTIFY id */
953 for (j = 0; j < 20; j++)
954 rdaddc(&winfile, 0); /* tab order block */
955 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* history to keep */
956 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button target */
957 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button target */
958 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button text */
959 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button text */
960 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.left */
961 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.top */
962 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.right */
963 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.bottom */
964 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no information types */
965 RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no custom tabs */
966 }
967 assert(winfile.pos == 8 + 196 * count234(chm->windows));
968 chm_add_file_internal(chm, "/#WINDOWS", winfile.text, winfile.pos,
969 &chm->content1, 1);
970 sfree(winfile.text);
971 }
972
973 {
974 struct chm_section *sect;
975 rdstringc tocidx = {0, 0, NULL};
976 rdstringc topics = {0, 0, NULL};
977 rdstringc urltbl = {0, 0, NULL};
978 rdstringc urlstr = {0, 0, NULL};
979 int i, index, s, n_tocidx_3;
980 struct chm_directory_entry *contentsfile = NULL, *indexfile = NULL;
981 tree234 *urltbl_pre;
982 struct chm_urltbl_entry *urltbl_entry;
983
984 urltbl_pre = newtree234(chm_urltbl_entry_cmp);
985
986 for (i = 0; i < 0x1000; i++)
987 rdaddc(&tocidx, 0);
988
989 /* Write a header of one zero byte at the start of #URLSTR.
990 * chmspec says this doesn't always appear, and is unclear on
991 * what this is for, but I suspect it serves the same purpose
992 * as the zero byte at the start of #STRINGS, namely that it
993 * arranges that an absent string in the following records can
994 * be represented by an offset of zero which will
995 * automatically point to this byte and hence indicate the
996 * empty string. */
997 rdaddc(&urlstr, 0);
998
999 if (chm->contents_filename) {
1000 char *withslash = add_leading_slash(chm->contents_filename);
1001 contentsfile = chm_find_file(chm, withslash);
1002 sfree(withslash);
1003 assert(contentsfile);
1004 }
1005 if (chm->index_filename) {
1006 char *withslash = add_leading_slash(chm->index_filename);
1007 indexfile = chm_find_file(chm, withslash);
1008 sfree(withslash);
1009 assert(indexfile);
1010 }
1011
1012 index = 0;
1013
1014 /* #TOCIDX header field pointing at start of type-1 records */
1015 PUT_32BIT_LSB_FIRST(tocidx.text + 0, tocidx.pos);
1016
1017 /*
1018 * First pass over the section structure, generating in
1019 * parallel one of the multiple structure types in #TOCIDX and
1020 * the sole record in all the other files.
1021 */
1022 for (sect = chm->allsecthead; sect; sect = sect->next) {
1023 /* Size of the first kind of #TOCIDX record varies between
1024 * leaf and internal nodes */
1025 int tocidx_size_1 = (sect->firstchild ? 0x1c : 0x14);
1026
1027 /*
1028 * Flags:
1029 * - 8 means there's a local filename, which in _our_ CHM
1030 * files there always is. If you unset this flag, you
1031 * get a node in the contents treeview which doesn't
1032 * open any page when clicked, and exists solely to
1033 * contain children; in that situation the topic index
1034 * field at position 0x08 in this record also stops
1035 * being an index into #TOPICS and instead becomes an
1036 * index into #STRINGS giving the node's title.
1037 * - 4 apparently means the node should have the 'book'
1038 * rather than 'page' icon in the TOC tree view in the
1039 * help viewer
1040 * - 1 means the node has a subtree in the tree view,
1041 * which I take to mean (contrary to chmspec) that
1042 * _this_ is the flag that means this node is a
1043 * non-leaf node and hence has the two extra fields for
1044 * first-child and whatever the other one means
1045 */
1046 unsigned tocidx_1_flags = (sect->firstchild ? 0x5 : 0) | 8;
1047
1048 int urlstr_size;
1049
1050 /* Pad to ensure the record isn't split between
1051 * 0x1000-byte pages of the file */
1052 while ((tocidx.pos ^ (tocidx.pos + tocidx_size_1 - 1)) >> 12)
1053 RDADD_32BIT_LSB_FIRST(&tocidx, 0);
1054
1055 sect->topic_index = index++;
1056
1057 /* Write the type-1 record in #TOCIDX */
1058 sect->tocidx_offset_1 = tocidx.pos;
1059 RDADD_16BIT_LSB_FIRST(&tocidx, 0); /* unknown */
1060 /* chmspec thinks this 16-bit field is 'unknown', but in
1061 * my observations it appears to be the index of an entry
1062 * in the #TOCIDX type-3 region. But I still don't know
1063 * what those are really for. */
1064 RDADD_16BIT_LSB_FIRST(&tocidx, sect->topic_index);
1065 RDADD_32BIT_LSB_FIRST(&tocidx, tocidx_1_flags);
1066 RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
1067 RDADD_32BIT_LSB_FIRST(&tocidx, sect->parent ?
1068 sect->parent->tocidx_offset_1 : 0);
1069 RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* 'next' ptr; fill in later */
1070 if (sect->firstchild) {
1071 RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* child; fill in later */
1072 RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* unknown */
1073 }
1074 assert(tocidx.pos == sect->tocidx_offset_1 + tocidx_size_1);
1075
1076 /* Figure out our offset in #URLSTR, by ensuring we're not
1077 * going to overrun a page boundary (as usual). For this
1078 * we need our record length, which is two 32-bit fields
1079 * plus a NUL-terminated copy of the target file name / URL. */
1080 urlstr_size = 8 + strlen(sect->url) + 1;
1081 assert(urlstr_size < 0x1000); /* must _fit_ in a page! */
1082 while ((urlstr.pos ^ (urlstr.pos + urlstr_size - 1)) >> 12)
1083 rdaddc(&urlstr, 0);
1084
1085 /*
1086 * Save everything we know so far about the #URLTBL record
1087 * we'll need to write.
1088 */
1089 urltbl_entry = snew(struct chm_urltbl_entry);
1090 urltbl_entry->hash = chm_url_hash(sect->url);
1091 urltbl_entry->topic_index = sect->topic_index;
1092 urltbl_entry->urlstr_pos = urlstr.pos;
1093 add234(urltbl_pre, urltbl_entry);
1094
1095 /* Write the #TOPICS entry */
1096 RDADD_32BIT_LSB_FIRST(&topics, sect->tocidx_offset_1);
1097 s = chm_intern_string(chm, sect->title);
1098 RDADD_32BIT_LSB_FIRST(&topics, s);
1099 urltbl_entry->topics_offset_to_update = topics.pos;
1100 RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
1101 RDADD_16BIT_LSB_FIRST(&topics, 6); /* flag as 'in contents' */
1102 RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
1103
1104 /*
1105 * Write the #URLSTR entry.
1106 */
1107 RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
1108 RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
1109 rdaddsc(&urlstr, sect->url); /* 'Local' */
1110 rdaddc(&urlstr, '\0');
1111 }
1112
1113 /*
1114 * Add entries in #URLTBL, #URLSTR and #TOPICS for the
1115 * contents and index files. They don't form part of the tree
1116 * in #TOCIDX, though.
1117 */
1118 if (chm->contents_filename) {
1119 urltbl_entry = snew(struct chm_urltbl_entry);
1120 urltbl_entry->hash = chm_url_hash(chm->contents_filename);
1121 urltbl_entry->topic_index = index;
1122 urltbl_entry->urlstr_pos = urlstr.pos;
1123 add234(urltbl_pre, urltbl_entry);
1124
1125 /* #TOPICS entry */
1126 RDADD_32BIT_LSB_FIRST(&topics, 0); /* no #TOCIDX entry */
1127 RDADD_32BIT_LSB_FIRST(&topics, 0xFFFFFFFFU); /* no title either */
1128 urltbl_entry->topics_offset_to_update = topics.pos;
1129 RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
1130 RDADD_16BIT_LSB_FIRST(&topics, 2); /* flag as 'not in contents' */
1131 RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
1132
1133 /* #URLSTR entry */
1134 RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
1135 RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
1136 rdaddsc(&urlstr, chm->contents_filename); /* 'Local' */
1137 rdaddc(&urlstr, '\0');
1138
1139 /* And add the entry in #SYSTEM that cites the hash of the
1140 * #URLTBL entry. */
1141 rec = sys_start(&sysfile, 11);
1142 RDADD_32BIT_LSB_FIRST(&sysfile, urltbl_entry->hash);
1143 sys_end(&sysfile, rec);
1144
1145 index++;
1146 }
1147 if (chm->index_filename) {
1148 urltbl_entry = snew(struct chm_urltbl_entry);
1149 urltbl_entry->hash = chm_url_hash(chm->index_filename);
1150 urltbl_entry->topic_index = index;
1151 urltbl_entry->urlstr_pos = urlstr.pos;
1152 add234(urltbl_pre, urltbl_entry);
1153
1154 /* #TOPICS entry */
1155 RDADD_32BIT_LSB_FIRST(&topics, 0); /* no #TOCIDX entry */
1156 RDADD_32BIT_LSB_FIRST(&topics, 0xFFFFFFFFU); /* no title either */
1157 urltbl_entry->topics_offset_to_update = topics.pos;
1158 RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
1159 RDADD_16BIT_LSB_FIRST(&topics, 2); /* flag as 'not in contents' */
1160 RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
1161
1162 /* #URLSTR entry */
1163 RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
1164 RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
1165 rdaddsc(&urlstr, chm->index_filename); /* 'Local' */
1166 rdaddc(&urlstr, '\0');
1167
1168 /* And add the entry in #SYSTEM that cites the hash of the
1169 * #URLTBL entry. */
1170 rec = sys_start(&sysfile, 7);
1171 RDADD_32BIT_LSB_FIRST(&sysfile, urltbl_entry->hash);
1172 sys_end(&sysfile, rec);
1173
1174 index++;
1175 }
1176
1177 /*
1178 * Now we've got all our #URLTBL entries, so we can write out
1179 * #URLTBL itself.
1180 */
1181 while ((urltbl_entry = delpos234(urltbl_pre, 0)) != NULL) {
1182 /* Pad #URLTBL to the beginning of this section's entry.
1183 * Entries are all 12 bytes long, but again there's some
1184 * padding to ensure that they don't cross a page
1185 * boundary. */
1186 while ((urltbl.pos ^ (urltbl.pos + 12 - 1)) >> 12)
1187 RDADD_32BIT_LSB_FIRST(&urltbl, 0);
1188
1189 /* Fill in the link from #TOPICS to this entry's offset */
1190 PUT_32BIT_LSB_FIRST(topics.text +
1191 urltbl_entry->topics_offset_to_update,
1192 urltbl.pos);
1193
1194 /* Write the entry itself. */
1195 RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->hash);
1196 RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->topic_index);
1197 RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->urlstr_pos);
1198
1199 sfree(urltbl_entry);
1200 }
1201 freetree234(urltbl_pre);
1202
1203 /*
1204 * Small follow-up pass filling in forward-pointing offset
1205 * fields in the #TOCIDX type-1 records which the previous
1206 * pass didn't know yet.
1207 */
1208 for (sect = chm->allsecthead; sect; sect = sect->next) {
1209 if (sect->nextsibling)
1210 PUT_32BIT_LSB_FIRST(tocidx.text + sect->tocidx_offset_1 + 0x10,
1211 sect->nextsibling->tocidx_offset_1);
1212 if (sect->firstchild)
1213 PUT_32BIT_LSB_FIRST(tocidx.text + sect->tocidx_offset_1 + 0x14,
1214 sect->firstchild->tocidx_offset_1);
1215 }
1216
1217 /* #TOCIDX header field pointing at start of type-2 records */
1218 PUT_32BIT_LSB_FIRST(tocidx.text + 0xC, tocidx.pos);
1219
1220 /*
1221 * Write the #TOCIDX type-2 records, which are just 4 bytes
1222 * long and just contain another copy of each topic's index,
1223 * but we need to have them there so that the type-3 records
1224 * can refer to them by offset.
1225 */
1226 for (sect = chm->allsecthead; sect; sect = sect->next) {
1227 sect->tocidx_offset_2 = tocidx.pos;
1228 RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
1229 }
1230
1231 /* Align the current #TOCIDX offset to 16 bytes */
1232 while (tocidx.pos & 0xF)
1233 rdaddc(&tocidx, 0);
1234
1235 /* #TOCIDX header field pointing at start of type-3 records */
1236 PUT_32BIT_LSB_FIRST(tocidx.text + 0x4, tocidx.pos);
1237
1238 /*
1239 * Write the #TOCIDX type-3 records.
1240 *
1241 * In help files I've examined, there are fewer of these than
1242 * you might expect; apparently not all sections rate one for
1243 * some reason. For the moment I'm just writing out one for
1244 * every section.
1245 */
1246 n_tocidx_3 = 0;
1247 for (sect = chm->allsecthead; sect; sect = sect->next) {
1248 RDADD_32BIT_LSB_FIRST(&tocidx, sect->tocidx_offset_1);
1249 RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index + 666); /* ?! */
1250 RDADD_32BIT_LSB_FIRST(&tocidx, sect->tocidx_offset_2);
1251 RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
1252 n_tocidx_3++;
1253 }
1254
1255 /* #TOCIDX header field giving number of type-3 records */
1256 PUT_32BIT_LSB_FIRST(tocidx.text + 0x8, n_tocidx_3);
1257
1258 chm_add_file_internal(chm, "/#TOCIDX", tocidx.text, tocidx.pos,
1259 &chm->content1, 1);
1260 chm_add_file_internal(chm, "/#TOPICS", topics.text, topics.pos,
1261 &chm->content1, 1);
1262 chm_add_file_internal(chm, "/#URLTBL", urltbl.text, urltbl.pos,
1263 &chm->content1, 1);
1264 chm_add_file_internal(chm, "/#URLSTR", urlstr.text, urlstr.pos,
1265 &chm->content1, 1);
1266
1267 /*
1268 * Write #IDXHDR (and its mirror in #SYSTEM), which we
1269 * couldn't do until we knew how many topic nodes there were.
1270 */
1271 {
1272 int idxhdr_start;
1273
1274 rec = sys_start(&sysfile, 13);
1275 idxhdr_start = sysfile.pos;
1276
1277 rdaddsc(&sysfile, "T#SM"); /* #IDXHDR magic */
1278 RDADD_32BIT_LSB_FIRST(&sysfile, 0x12345678); /* checksum? FIXME */
1279 RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */
1280 RDADD_32BIT_LSB_FIRST(&sysfile, index); /* number of topic nodes */
1281 RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
1282 RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no image list */
1283 RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
1284 RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* top-level node is
1285 * not a folder */
1286 RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no bg colour */
1287 RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no fg colour */
1288 RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no font spec */
1289 RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no window style */
1290 RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no ex win style */
1291 RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* unknown */
1292 RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no frame name */
1293 RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no window name */
1294 RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no information types */
1295 RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */
1296 RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no merge files */
1297 RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
1298 while (sysfile.pos - idxhdr_start < 4096)
1299 rdaddc(&sysfile, 0);
1300
1301 chm_add_file_internal(chm, "/#IDXHDR", sysfile.text + idxhdr_start,
1302 sysfile.pos - idxhdr_start,
1303 &chm->content1, 1);
1304 sys_end(&sysfile, rec);
1305 }
1306
1307 sfree(tocidx.text);
1308 sfree(topics.text);
1309 sfree(urltbl.text);
1310 sfree(urlstr.text);
1311 }
1312
1313 /* Missing from #SYSTEM: */
1314 /* 10 (4-byte timestamp) */
1315 /* 6 (logical file name) */
1316
1317 chm_add_file_internal(chm, "/#SYSTEM", sysfile.text, sysfile.pos,
1318 &chm->content0, 0);
1319 sfree(sysfile.text);
1320
1321 chm_add_file_internal(chm, "/#STRINGS", chm->stringsfile.text,
1322 chm->stringsfile.pos, &chm->content1, 1);
1323
1324 /*
1325 * ::DataSpace/NameList, giving the names of the two content sections.
1326 */
1327 {
1328 rdstringc dsnl = {0, 0, NULL};
1329 const char *p;
1330 int stringstart;
1331
1332 RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* total file size; fill in later */
1333 RDADD_16BIT_LSB_FIRST(&dsnl, 2); /* number of names */
1334
1335 RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* string length; fill in later */
1336 stringstart = dsnl.pos;
1337 for (p = "Uncompressed"; *p; p++)
1338 RDADD_16BIT_LSB_FIRST(&dsnl, *p);
1339 PUT_16BIT_LSB_FIRST(dsnl.text + stringstart - 2,
1340 (dsnl.pos - stringstart) / 2);
1341 RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* NUL terminator */
1342
1343 RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* string length; fill in later */
1344 stringstart = dsnl.pos;
1345 for (p = "MSCompressed"; *p; p++)
1346 RDADD_16BIT_LSB_FIRST(&dsnl, *p);
1347 PUT_16BIT_LSB_FIRST(dsnl.text + stringstart - 2,
1348 (dsnl.pos - stringstart) / 2);
1349 RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* NUL terminator */
1350
1351 PUT_16BIT_LSB_FIRST(dsnl.text, dsnl.pos / 2);
1352
1353 chm_add_file_internal(chm, "::DataSpace/NameList", dsnl.text, dsnl.pos,
1354 &chm->content0, 0);
1355
1356 sfree(dsnl.text);
1357 }
1358
1359 /*
1360 * Actually compress the compressed-data section, load the
1361 * compressed version of it into the containing uncompressed
1362 * section, and write the auxiliary files describing it.
1363 */
1364 {
1365 rdstringc rs = {0, 0, NULL};
1366 const char *p;
1367 int orig_decomp_size = chm->content1.pos;
1368 size_t i;
1369
1370 while (chm->content1.pos & 0x7FFF)
1371 rdaddc(&chm->content1, 0); /* pad to a realign-interval boundary */
1372 ef = lzx(chm->content1.text, chm->content1.pos, 0x8000, 0x10000);
1373 chm_add_file_internal(
1374 chm, "::DataSpace/Storage/MSCompressed/Content",
1375 (char *)ef->data, ef->data_len, &chm->content0, 0);
1376
1377 for (p = "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}"; *p; p++)
1378 RDADD_16BIT_LSB_FIRST(&rs, *p);
1379 rs.pos = 0x26; /* this file is always written truncated :-) */
1380 chm_add_file_internal(
1381 chm, "::DataSpace/Storage/MSCompressed/Transform/List",
1382 rs.text, rs.pos, &chm->content0, 0);
1383 rs.pos = 0;
1384
1385 RDADD_32BIT_LSB_FIRST(&rs, orig_decomp_size);
1386 RDADD_32BIT_LSB_FIRST(&rs, 0); /* high word of 64-bit size */
1387 chm_add_file_internal(
1388 chm, "::DataSpace/Storage/MSCompressed/SpanInfo",
1389 rs.text, rs.pos, &chm->content0, 0);
1390 rs.pos = 0;
1391
1392 RDADD_32BIT_LSB_FIRST(&rs, 6); /* file size */
1393 rdaddsc(&rs, "LZXC"); /* compression type identifier */
1394 RDADD_32BIT_LSB_FIRST(&rs, 2); /* version */
1395 RDADD_32BIT_LSB_FIRST(&rs, 2); /* reset interval in units of 2^15 */
1396 RDADD_32BIT_LSB_FIRST(&rs, 2); /* window size in units of 2^15 */
1397 RDADD_32BIT_LSB_FIRST(&rs, 1); /* reset interval multiplier */
1398 RDADD_32BIT_LSB_FIRST(&rs, 0); /* unknown */
1399 chm_add_file_internal(
1400 chm, "::DataSpace/Storage/MSCompressed/ControlData",
1401 rs.text, rs.pos, &chm->content0, 0);
1402 rs.pos = 0;
1403
1404 RDADD_32BIT_LSB_FIRST(&rs, 2); /* unknown (version number?) */
1405 RDADD_32BIT_LSB_FIRST(&rs, ef->n_resets); /* reset table length */
1406 RDADD_32BIT_LSB_FIRST(&rs, 8); /* reset table entry size */
1407 RDADD_32BIT_LSB_FIRST(&rs, 0x28); /* reset table offset */
1408 RDADD_32BIT_LSB_FIRST(&rs, orig_decomp_size); /* uncompressed len */
1409 RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
1410 RDADD_32BIT_LSB_FIRST(&rs, ef->data_len); /* compressed len */
1411 RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
1412 RDADD_32BIT_LSB_FIRST(&rs, 0x8000); /* realign interval */
1413 RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
1414 for (i = 0; i < ef->n_resets; i++) {
1415 RDADD_32BIT_LSB_FIRST(&rs, ef->reset_byte_offsets[i]);
1416 RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
1417 }
1418 chm_add_file_internal(
1419 chm, "::DataSpace/Storage/MSCompressed/Transform/"
1420 "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable",
1421 rs.text, rs.pos, &chm->content0, 0);
1422 rs.pos = 0;
1423 }
1424
1425 sfree(ef->data);
1426 sfree(ef->reset_byte_offsets);
1427 sfree(ef);
1428
1429 directory(&dir, chm->files);
1430 itsf(&chm->outfile, &dir, &chm->content0);
1431 sfree(dir.text);
1432
1433 assert(outlen);
1434 *outlen = chm->outfile.pos;
1435 return chm->outfile.text;
1436 }
1437