1 /*
2 * Info backend for Halibut
3 *
4 * The Info file format isn't well-specified, and what specification
5 * there is is scattered all over the place. Sources include:
6 * (info), from GNU Texinfo.
7 * (texinfo), also from GNU Texinfo.
8 * (Emacs)Misc Help, and (emacs)Info Lookup, from GNU Emacs.
9 * info.el, from GNU Emacs.
10 *
11 * Possible future work:
12 *
13 * - configurable choice of how to allocate node names?
14 * + possibly a template-like approach, choosing node names to
15 * be the full section title or perhaps the internal keyword?
16 * + neither of those seems quite right. Perhaps instead a
17 * Windows Help-like mechanism, where a magic config
18 * directive allows user choice of name for every node.
19 * + Only trouble with that is, now what happens to the section
20 * numbers? Do they become completely vestigial and just sit
21 * in the title text of each node? Or do we keep them in the
22 * menus somehow? I think people might occasionally want to
23 * go to a section by number, if only because all the _other_
24 * formats of the same document will reference the numbers
25 * all the time. So our menu lines could look like one of
26 * these:
27 * * Nodename: Section 1.2. Title of section.
28 * * Section 1.2: Nodename. Title of section.
29 *
30 * - might be helpful to diagnose duplicate node names!
31 *
32 * - Indices generated by makeinfo use a menu rather than a bunch of
33 * cross-references, which reduces visual clutter rather. For
34 * singly-referenced items, it looks like:
35 * * toner cartridge, replacing: Toner.
36 * It does a horrid job on multiply-referenced entries, though,
37 * perhaps because the name before the colon is meant to be unique.
38 * Info's 'i' command requires the use of a menu -- it fails to
39 * find any index entries at all with Halibut's current index format.
40 *
41 * - The string "*note" is matched case-insensitively, so we could
42 * make things slightly less ugly by using the lower-case version
43 * when the user asks for \k. Unfortunately, standalone Info seems
44 * to match node names case-sensitively, so we can't downcase that.
45 *
46 * - The character encoding used in an Info file can be configured using
47 * an Emacs local variables block at the end, like this:
48 * Local Variables:
49 * coding: iso-8859-1
50 * End:
51 */
52
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <assert.h>
56 #include "halibut.h"
57
58 typedef struct {
59 wchar_t *underline;
60 } alignstruct;
61
62 typedef struct {
63 char *filename;
64 int maxfilesize;
65 int charset;
66 int listindentbefore, listindentafter;
67 int indent_code, width, index_width;
68 alignstruct atitle, achapter, *asect;
69 int nasect;
70 wchar_t *bullet, *listsuffix;
71 wchar_t *startemph, *endemph;
72 wchar_t *startstrong, *endstrong;
73 wchar_t *lquote, *rquote;
74 wchar_t *sectsuffix;
75 wchar_t *rule;
76 wchar_t *index_text;
77 } infoconfig;
78
79 typedef struct {
80 rdstringc output;
81 int charset;
82 charset_state state;
83 int wcmode;
84 } info_data;
85 #define EMPTY_INFO_DATA { { 0, 0, NULL }, 0, CHARSET_INIT_STATE, FALSE }
86 static const info_data empty_info_data = EMPTY_INFO_DATA;
87
88 typedef struct node_tag node;
89 struct node_tag {
90 node *listnext;
91 node *up, *prev, *next, *lastchild;
92 int pos, started_menu, filenum;
93 char *name;
94 info_data text;
95 };
96
97 typedef struct {
98 char *text;
99 int length;
100 int nnodes, nodesize;
101 node **nodes;
102 } info_idx;
103
104 static int info_rdadd(info_data *, wchar_t);
105 static int info_rdadds(info_data *, wchar_t const *);
106 static int info_rdaddc(info_data *, char);
107 static int info_rdaddsc(info_data *, char const *);
108
109 static void info_heading(info_data *, word *, word *, alignstruct, int,
110 infoconfig *);
111 static void info_rule(info_data *, int, int, infoconfig *);
112 static void info_para(info_data *, word *, wchar_t *, word *, keywordlist *,
113 int, int, int, infoconfig *);
114 static void info_codepara(info_data *, word *, int, int);
115 static void info_versionid(info_data *, word *, infoconfig *);
116 static void info_menu_item(info_data *, node *, paragraph *, infoconfig *);
117 static word *info_transform_wordlist(word *, keywordlist *);
118 static int info_check_index(word *, node *, indexdata *);
119
120 static int info_rdaddwc(info_data *, word *, word *, int, infoconfig *);
121
122 static node *info_node_new(char *name, int charset);
123 static char *info_node_name_for_para(paragraph *p, infoconfig *);
124 static char *info_node_name_for_text(wchar_t *text, infoconfig *);
125
info_configure(paragraph * source)126 static infoconfig info_configure(paragraph *source) {
127 infoconfig ret;
128 paragraph *p;
129 int n;
130
131 /*
132 * Defaults.
133 */
134 ret.filename = dupstr("output.info");
135 ret.maxfilesize = 64 << 10;
136 ret.charset = CS_ASCII;
137 ret.width = 70;
138 ret.listindentbefore = 1;
139 ret.listindentafter = 3;
140 ret.indent_code = 2;
141 ret.index_width = 40;
142 ret.listsuffix = L".";
143 ret.bullet = L"\x2022\0-\0\0";
144 ret.rule = L"\x2500\0-\0\0";
145 ret.startemph = L"_\0_\0\0";
146 ret.endemph = uadv(ret.startemph);
147 ret.startstrong = L"*\0*\0\0";
148 ret.endstrong = uadv(ret.startstrong);
149 ret.lquote = L"\x2018\0\x2019\0`\0'\0\0";
150 ret.rquote = uadv(ret.lquote);
151 ret.sectsuffix = L": ";
152 /*
153 * Default underline characters are chosen to match those recognised by
154 * Info-fontify-node.
155 */
156 ret.atitle.underline = L"*\0\0";
157 ret.achapter.underline = L"=\0\0";
158 ret.nasect = 2;
159 ret.asect = snewn(ret.nasect, alignstruct);
160 ret.asect[0].underline = L"-\0\0";
161 ret.asect[1].underline = L".\0\0";
162 ret.index_text = L"Index";
163
164 /*
165 * Two-pass configuration so that we can pick up global config
166 * (e.g. `quotes') before having it overridden by specific
167 * config (`info-quotes'), irrespective of the order in which
168 * they occur.
169 */
170 for (p = source; p; p = p->next) {
171 if (p->type == para_Config) {
172 if (!ustricmp(p->keyword, L"quotes")) {
173 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
174 ret.lquote = uadv(p->keyword);
175 ret.rquote = uadv(ret.lquote);
176 }
177 } else if (!ustricmp(p->keyword, L"index")) {
178 ret.index_text = uadv(p->keyword);
179 }
180 }
181 }
182
183 for (p = source; p; p = p->next) {
184 if (p->type == para_Config) {
185 if (!ustricmp(p->keyword, L"info-filename")) {
186 sfree(ret.filename);
187 ret.filename = dupstr(adv(p->origkeyword));
188 } else if (!ustricmp(p->keyword, L"info-charset")) {
189 ret.charset = charset_from_ustr(&p->fpos, uadv(p->keyword));
190 } else if (!ustricmp(p->keyword, L"info-max-file-size")) {
191 ret.maxfilesize = utoi(uadv(p->keyword));
192 } else if (!ustricmp(p->keyword, L"info-width")) {
193 ret.width = utoi(uadv(p->keyword));
194 } else if (!ustricmp(p->keyword, L"info-indent-code")) {
195 ret.indent_code = utoi(uadv(p->keyword));
196 } else if (!ustricmp(p->keyword, L"info-index-width")) {
197 ret.index_width = utoi(uadv(p->keyword));
198 } else if (!ustricmp(p->keyword, L"info-list-indent")) {
199 ret.listindentbefore = utoi(uadv(p->keyword));
200 } else if (!ustricmp(p->keyword, L"info-listitem-indent")) {
201 ret.listindentafter = utoi(uadv(p->keyword));
202 } else if (!ustricmp(p->keyword, L"info-section-suffix")) {
203 ret.sectsuffix = uadv(p->keyword);
204 } else if (!ustricmp(p->keyword, L"info-underline")) {
205 ret.atitle.underline = ret.achapter.underline =
206 uadv(p->keyword);
207 for (n = 0; n < ret.nasect; n++)
208 ret.asect[n].underline = ret.atitle.underline;
209 } else if (!ustricmp(p->keyword, L"info-chapter-underline")) {
210 ret.achapter.underline = uadv(p->keyword);
211 } else if (!ustricmp(p->keyword, L"info-section-underline")) {
212 wchar_t *q = uadv(p->keyword);
213 int n = 0;
214 if (uisdigit(*q)) {
215 n = utoi(q);
216 q = uadv(q);
217 }
218 if (n >= ret.nasect) {
219 int i;
220 ret.asect = sresize(ret.asect, n+1, alignstruct);
221 for (i = ret.nasect; i <= n; i++)
222 ret.asect[i] = ret.asect[ret.nasect-1];
223 ret.nasect = n+1;
224 }
225 ret.asect[n].underline = q;
226 } else if (!ustricmp(p->keyword, L"text-title-underline")) {
227 ret.atitle.underline = uadv(p->keyword);
228 } else if (!ustricmp(p->keyword, L"info-bullet")) {
229 ret.bullet = uadv(p->keyword);
230 } else if (!ustricmp(p->keyword, L"info-rule")) {
231 ret.rule = uadv(p->keyword);
232 } else if (!ustricmp(p->keyword, L"info-list-suffix")) {
233 ret.listsuffix = uadv(p->keyword);
234 } else if (!ustricmp(p->keyword, L"info-emphasis")) {
235 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
236 ret.startemph = uadv(p->keyword);
237 ret.endemph = uadv(ret.startemph);
238 }
239 } else if (!ustricmp(p->keyword, L"info-strong")) {
240 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
241 ret.startstrong = uadv(p->keyword);
242 ret.endstrong = uadv(ret.startstrong);
243 }
244 } else if (!ustricmp(p->keyword, L"info-quotes")) {
245 if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) {
246 ret.lquote = uadv(p->keyword);
247 ret.rquote = uadv(ret.lquote);
248 }
249 }
250 }
251 }
252
253 /*
254 * Now process fallbacks on quote characters, underlines, the
255 * rule character, the emphasis characters, and bullets.
256 */
257 while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
258 (!cvt_ok(ret.charset, ret.lquote) ||
259 !cvt_ok(ret.charset, ret.rquote))) {
260 ret.lquote = uadv(ret.rquote);
261 ret.rquote = uadv(ret.lquote);
262 }
263
264 while (*uadv(ret.endemph) && *uadv(uadv(ret.endemph)) &&
265 (!cvt_ok(ret.charset, ret.startemph) ||
266 !cvt_ok(ret.charset, ret.endemph))) {
267 ret.startemph = uadv(ret.endemph);
268 ret.endemph = uadv(ret.startemph);
269 }
270
271 while (*uadv(ret.endstrong) && *uadv(uadv(ret.endstrong)) &&
272 (!cvt_ok(ret.charset, ret.startstrong) ||
273 !cvt_ok(ret.charset, ret.endstrong))) {
274 ret.startstrong = uadv(ret.endstrong);
275 ret.endstrong = uadv(ret.startstrong);
276 }
277
278 while (*ret.atitle.underline && *uadv(ret.atitle.underline) &&
279 !cvt_ok(ret.charset, ret.atitle.underline))
280 ret.atitle.underline = uadv(ret.atitle.underline);
281
282 while (*ret.achapter.underline && *uadv(ret.achapter.underline) &&
283 !cvt_ok(ret.charset, ret.achapter.underline))
284 ret.achapter.underline = uadv(ret.achapter.underline);
285
286 for (n = 0; n < ret.nasect; n++) {
287 while (*ret.asect[n].underline && *uadv(ret.asect[n].underline) &&
288 !cvt_ok(ret.charset, ret.asect[n].underline))
289 ret.asect[n].underline = uadv(ret.asect[n].underline);
290 }
291
292 while (*ret.bullet && *uadv(ret.bullet) &&
293 !cvt_ok(ret.charset, ret.bullet))
294 ret.bullet = uadv(ret.bullet);
295
296 while (*ret.rule && *uadv(ret.rule) &&
297 !cvt_ok(ret.charset, ret.rule))
298 ret.rule = uadv(ret.rule);
299
300 return ret;
301 }
302
info_config_filename(char * filename)303 paragraph *info_config_filename(char *filename)
304 {
305 return cmdline_cfg_simple("info-filename", filename, NULL);
306 }
307
info_backend(paragraph * sourceform,keywordlist * keywords,indexdata * idx,void * unused)308 void info_backend(paragraph *sourceform, keywordlist *keywords,
309 indexdata *idx, void *unused) {
310 paragraph *p;
311 infoconfig conf;
312 word *prefix, *body, *wp;
313 word spaceword;
314 wchar_t *prefixextra;
315 int nesting, nestindent;
316 int indentb, indenta;
317 int filepos;
318 int has_index = FALSE;
319 info_data intro_text = EMPTY_INFO_DATA;
320 node *topnode, *currnode;
321 word bullet;
322 FILE *fp;
323
324 IGNORE(unused);
325
326 conf = info_configure(sourceform);
327
328 /*
329 * Go through and create a node for each section.
330 */
331 topnode = info_node_new("Top", conf.charset);
332 currnode = topnode;
333 for (p = sourceform; p; p = p->next) switch (p->type) {
334 /*
335 * Chapter titles.
336 */
337 case para_Chapter:
338 case para_Appendix:
339 case para_UnnumberedChapter:
340 case para_Heading:
341 case para_Subsect:
342 {
343 node *newnode, *upnode;
344 char *nodename;
345
346 nodename = info_node_name_for_para(p, &conf);
347 newnode = info_node_new(nodename, conf.charset);
348 sfree(nodename);
349
350 p->private_data = newnode;
351
352 if (p->parent)
353 upnode = (node *)p->parent->private_data;
354 else
355 upnode = topnode;
356 assert(upnode);
357 newnode->up = upnode;
358
359 currnode->next = newnode;
360 newnode->prev = currnode;
361
362 currnode->listnext = newnode;
363 currnode = newnode;
364 }
365 break;
366 default:
367 p->private_data = NULL;
368 break;
369 }
370
371 /*
372 * Set up the display form of each index entry.
373 */
374 {
375 int i;
376 indexentry *entry;
377
378 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
379 info_idx *ii = snew(info_idx);
380 info_data id = EMPTY_INFO_DATA;
381
382 id.charset = conf.charset;
383
384 ii->nnodes = ii->nodesize = 0;
385 ii->nodes = NULL;
386
387 ii->length = info_rdaddwc(&id, entry->text, NULL, FALSE, &conf);
388
389 ii->text = id.output.text;
390
391 entry->backend_data = ii;
392 }
393 }
394
395 /*
396 * An Info file begins with a piece of introductory text which
397 * is apparently never shown anywhere. This seems to me to be a
398 * good place to put the copyright notice and the version IDs.
399 * Also, Info directory entries are expected to go here.
400 */
401 intro_text.charset = conf.charset;
402
403 info_rdaddsc(&intro_text,
404 "This Info file generated by Halibut, ");
405 info_rdaddsc(&intro_text, version);
406 info_rdaddsc(&intro_text, "\n\n");
407
408 for (p = sourceform; p; p = p->next)
409 if (p->type == para_Config &&
410 !ustricmp(p->keyword, L"info-dir-entry")) {
411 wchar_t *section, *shortname, *longname, *kw;
412 char *s;
413
414 section = uadv(p->keyword);
415 shortname = *section ? uadv(section) : L"";
416 longname = *shortname ? uadv(shortname) : L"";
417 kw = *longname ? uadv(longname) : L"";
418
419 if (!*longname) {
420 err_cfginsufarg(&p->fpos, p->origkeyword, 3);
421 continue;
422 }
423
424 info_rdaddsc(&intro_text, "INFO-DIR-SECTION ");
425 info_rdadds(&intro_text, section);
426 info_rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
427 info_rdadds(&intro_text, shortname);
428 info_rdaddsc(&intro_text, ": (");
429 s = dupstr(conf.filename);
430 if (strlen(s) > 5 && !strcmp(s+strlen(s)-5, ".info"))
431 s[strlen(s)-5] = '\0';
432 info_rdaddsc(&intro_text, s);
433 sfree(s);
434 info_rdaddsc(&intro_text, ")");
435 if (*kw) {
436 keyword *kwl = kw_lookup(keywords, kw);
437 if (kwl && kwl->para->private_data) {
438 node *n = (node *)kwl->para->private_data;
439 info_rdaddsc(&intro_text, n->name);
440 }
441 }
442 info_rdaddsc(&intro_text, ". ");
443 info_rdadds(&intro_text, longname);
444 info_rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
445 }
446
447 for (p = sourceform; p; p = p->next)
448 if (p->type == para_Copyright)
449 info_para(&intro_text, NULL, NULL, p->words, keywords,
450 0, 0, conf.width, &conf);
451
452 for (p = sourceform; p; p = p->next)
453 if (p->type == para_VersionID)
454 info_versionid(&intro_text, p->words, &conf);
455
456 if (intro_text.output.text[intro_text.output.pos-1] != '\n')
457 info_rdaddc(&intro_text, '\n');
458
459 /* Do the title */
460 for (p = sourceform; p; p = p->next)
461 if (p->type == para_Title)
462 info_heading(&topnode->text, NULL, p->words,
463 conf.atitle, conf.width, &conf);
464
465 nestindent = conf.listindentbefore + conf.listindentafter;
466 nesting = 0;
467
468 currnode = topnode;
469
470 /* Do the main document */
471 for (p = sourceform; p; p = p->next) switch (p->type) {
472
473 case para_QuotePush:
474 nesting += 2;
475 break;
476 case para_QuotePop:
477 nesting -= 2;
478 assert(nesting >= 0);
479 break;
480
481 case para_LcontPush:
482 nesting += nestindent;
483 break;
484 case para_LcontPop:
485 nesting -= nestindent;
486 assert(nesting >= 0);
487 break;
488
489 /*
490 * Things we ignore because we've already processed them or
491 * aren't going to touch them in this pass.
492 */
493 case para_IM:
494 case para_BR:
495 case para_Biblio: /* only touch BiblioCited */
496 case para_VersionID:
497 case para_NoCite:
498 case para_Title:
499 break;
500
501 /*
502 * Chapter titles.
503 */
504 case para_Chapter:
505 case para_Appendix:
506 case para_UnnumberedChapter:
507 case para_Heading:
508 case para_Subsect:
509 currnode = p->private_data;
510 assert(currnode);
511 assert(currnode->up);
512
513 if (!currnode->up->started_menu) {
514 info_rdaddsc(&currnode->up->text, "* Menu:\n\n");
515 currnode->up->started_menu = TRUE;
516 }
517 info_menu_item(&currnode->up->text, currnode, p, &conf);
518
519 has_index |= info_check_index(p->words, currnode, idx);
520 if (p->type == para_Chapter || p->type == para_Appendix ||
521 p->type == para_UnnumberedChapter)
522 info_heading(&currnode->text, p->kwtext, p->words,
523 conf.achapter, conf.width, &conf);
524 else
525 info_heading(&currnode->text, p->kwtext, p->words,
526 conf.asect[p->aux>=conf.nasect?conf.nasect-1:p->aux],
527 conf.width, &conf);
528 nesting = 0;
529 break;
530
531 case para_Rule:
532 info_rule(&currnode->text, nesting, conf.width - nesting, &conf);
533 break;
534
535 case para_Normal:
536 case para_Copyright:
537 case para_DescribedThing:
538 case para_Description:
539 case para_BiblioCited:
540 case para_Bullet:
541 case para_NumberedList:
542 has_index |= info_check_index(p->words, currnode, idx);
543 if (p->type == para_Bullet) {
544 bullet.next = NULL;
545 bullet.alt = NULL;
546 bullet.type = word_Normal;
547 bullet.text = conf.bullet;
548 prefix = •
549 prefixextra = NULL;
550 indentb = conf.listindentbefore;
551 indenta = conf.listindentafter;
552 } else if (p->type == para_NumberedList) {
553 prefix = p->kwtext;
554 prefixextra = conf.listsuffix;
555 indentb = conf.listindentbefore;
556 indenta = conf.listindentafter;
557 } else if (p->type == para_Description) {
558 prefix = NULL;
559 prefixextra = NULL;
560 indentb = conf.listindentbefore;
561 indenta = conf.listindentafter;
562 } else {
563 prefix = NULL;
564 prefixextra = NULL;
565 indentb = indenta = 0;
566 }
567 if (p->type == para_BiblioCited) {
568 body = dup_word_list(p->kwtext);
569 for (wp = body; wp->next; wp = wp->next);
570 wp->next = &spaceword;
571 spaceword.next = p->words;
572 spaceword.alt = NULL;
573 spaceword.type = word_WhiteSpace;
574 spaceword.text = NULL;
575 } else {
576 wp = NULL;
577 body = p->words;
578 }
579 info_para(&currnode->text, prefix, prefixextra, body, keywords,
580 nesting + indentb, indenta,
581 conf.width - nesting - indentb - indenta, &conf);
582 if (wp) {
583 wp->next = NULL;
584 free_word_list(body);
585 }
586 break;
587
588 case para_Code:
589 info_codepara(&currnode->text, p->words,
590 nesting + conf.indent_code,
591 conf.width - nesting - 2 * conf.indent_code);
592 break;
593 }
594
595 /*
596 * Create an index node if required.
597 */
598 if (has_index) {
599 node *newnode;
600 int i, j, k;
601 indexentry *entry;
602 char *nodename;
603
604 nodename = info_node_name_for_text(conf.index_text, &conf);
605 newnode = info_node_new(nodename, conf.charset);
606 sfree(nodename);
607
608 newnode->up = topnode;
609
610 currnode->next = newnode;
611 newnode->prev = currnode;
612 currnode->listnext = newnode;
613
614 k = info_rdadds(&newnode->text, conf.index_text);
615 info_rdaddsc(&newnode->text, "\n");
616 while (k > 0) {
617 info_rdadds(&newnode->text, conf.achapter.underline);
618 k -= ustrwid(conf.achapter.underline, conf.charset);
619 }
620 info_rdaddsc(&newnode->text, "\n\n");
621
622 info_menu_item(&topnode->text, newnode, NULL, &conf);
623
624 for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
625 info_idx *ii = (info_idx *)entry->backend_data;
626
627 for (j = 0; j < ii->nnodes; j++) {
628 /*
629 * When we have multiple references for a single
630 * index term, we only display the actual term on
631 * the first line, to make it clear that the terms
632 * really are the same.
633 */
634 if (j == 0)
635 info_rdaddsc(&newnode->text, ii->text);
636 for (k = (j ? 0 : ii->length); k < conf.index_width-2; k++)
637 info_rdaddc(&newnode->text, ' ');
638 info_rdaddsc(&newnode->text, " *Note ");
639 info_rdaddsc(&newnode->text, ii->nodes[j]->name);
640 info_rdaddsc(&newnode->text, "::\n");
641 }
642 }
643 }
644
645 /*
646 * Finalise the text of each node, by adding the ^_ delimiter
647 * and the node line at the top.
648 */
649 for (currnode = topnode; currnode; currnode = currnode->listnext) {
650 char *origtext = currnode->text.output.text;
651 currnode->text = empty_info_data;
652 currnode->text.charset = conf.charset;
653 info_rdaddsc(&currnode->text, "\037\nFile: ");
654 info_rdaddsc(&currnode->text, conf.filename);
655 info_rdaddsc(&currnode->text, ", Node: ");
656 info_rdaddsc(&currnode->text, currnode->name);
657 if (currnode->prev) {
658 info_rdaddsc(&currnode->text, ", Prev: ");
659 info_rdaddsc(&currnode->text, currnode->prev->name);
660 }
661 info_rdaddsc(&currnode->text, ", Up: ");
662 info_rdaddsc(&currnode->text, (currnode->up ?
663 currnode->up->name : "(dir)"));
664 if (currnode->next) {
665 info_rdaddsc(&currnode->text, ", Next: ");
666 info_rdaddsc(&currnode->text, currnode->next->name);
667 }
668 info_rdaddsc(&currnode->text, "\n\n");
669 info_rdaddsc(&currnode->text, origtext);
670 /*
671 * Just make _absolutely_ sure we end with a newline.
672 */
673 if (currnode->text.output.text[currnode->text.output.pos-1] != '\n')
674 info_rdaddc(&currnode->text, '\n');
675
676 sfree(origtext);
677 }
678
679 /*
680 * Compute the offsets for the tag table.
681 */
682 filepos = intro_text.output.pos;
683 for (currnode = topnode; currnode; currnode = currnode->listnext) {
684 currnode->pos = filepos;
685 filepos += currnode->text.output.pos;
686 }
687
688 /*
689 * Split into sub-files.
690 */
691 if (conf.maxfilesize > 0) {
692 int currfilesize = intro_text.output.pos, currfilenum = 1;
693 for (currnode = topnode; currnode; currnode = currnode->listnext) {
694 if (currfilesize > intro_text.output.pos &&
695 currfilesize + currnode->text.output.pos > conf.maxfilesize) {
696 currfilenum++;
697 currfilesize = intro_text.output.pos;
698 }
699 currnode->filenum = currfilenum;
700 currfilesize += currnode->text.output.pos;
701 }
702 }
703
704 /*
705 * Write the primary output file.
706 */
707 fp = fopen(conf.filename, "w");
708 if (!fp) {
709 err_cantopenw(conf.filename);
710 return;
711 }
712 fputs(intro_text.output.text, fp);
713 if (conf.maxfilesize == 0) {
714 for (currnode = topnode; currnode; currnode = currnode->listnext)
715 fputs(currnode->text.output.text, fp);
716 } else {
717 int filenum = 0;
718 fprintf(fp, "\037\nIndirect:\n");
719 for (currnode = topnode; currnode; currnode = currnode->listnext)
720 if (filenum != currnode->filenum) {
721 filenum = currnode->filenum;
722 fprintf(fp, "%s-%d: %d\n", conf.filename, filenum,
723 currnode->pos);
724 }
725 }
726 fprintf(fp, "\037\nTag Table:\n");
727 if (conf.maxfilesize > 0)
728 fprintf(fp, "(Indirect)\n");
729 for (currnode = topnode; currnode; currnode = currnode->listnext)
730 fprintf(fp, "Node: %s\177%d\n", currnode->name, currnode->pos);
731 fprintf(fp, "\037\nEnd Tag Table\n");
732 fclose(fp);
733
734 /*
735 * Write the subfiles.
736 */
737 if (conf.maxfilesize > 0) {
738 int filenum = 0;
739 fp = NULL;
740
741 for (currnode = topnode; currnode; currnode = currnode->listnext) {
742 if (filenum != currnode->filenum) {
743 char *fname;
744
745 filenum = currnode->filenum;
746
747 if (fp)
748 fclose(fp);
749 fname = snewn(strlen(conf.filename) + 40, char);
750 sprintf(fname, "%s-%d", conf.filename, filenum);
751 fp = fopen(fname, "w");
752 if (!fp) {
753 err_cantopenw(fname);
754 return;
755 }
756 sfree(fname);
757 fputs(intro_text.output.text, fp);
758 }
759 fputs(currnode->text.output.text, fp);
760 }
761
762 if (fp)
763 fclose(fp);
764 }
765 }
766
info_check_index(word * w,node * n,indexdata * idx)767 static int info_check_index(word *w, node *n, indexdata *idx)
768 {
769 int ret = 0;
770
771 for (; w; w = w->next) {
772 if (w->type == word_IndexRef) {
773 indextag *tag;
774 int i;
775
776 tag = index_findtag(idx, w->text);
777 if (!tag)
778 break;
779
780 for (i = 0; i < tag->nrefs; i++) {
781 indexentry *entry = tag->refs[i];
782 info_idx *ii = (info_idx *)entry->backend_data;
783
784 if (ii->nnodes > 0 && ii->nodes[ii->nnodes-1] == n) {
785 /*
786 * If the same index term is indexed twice
787 * within the same section, we only want to
788 * mention it once in the index. So do nothing
789 * here.
790 */
791 continue;
792 }
793
794 if (ii->nnodes >= ii->nodesize) {
795 ii->nodesize += 32;
796 ii->nodes = sresize(ii->nodes, ii->nodesize, node *);
797 }
798
799 ii->nodes[ii->nnodes++] = n;
800
801 ret = 1;
802 }
803 }
804 }
805
806 return ret;
807 }
808
info_transform_wordlist(word * words,keywordlist * keywords)809 static word *info_transform_wordlist(word *words, keywordlist *keywords)
810 {
811 word *ret = dup_word_list(words);
812 word *w;
813 keyword *kwl;
814
815 for (w = ret; w; w = w->next) {
816 w->private_data = NULL;
817 if (w->type == word_UpperXref || w->type == word_LowerXref) {
818 kwl = kw_lookup(keywords, w->text);
819 if (kwl) {
820 if (kwl->para->type == para_NumberedList ||
821 kwl->para->type == para_BiblioCited) {
822 /*
823 * In Info, we do nothing special for xrefs to
824 * numbered list items or bibliography entries.
825 */
826 continue;
827 } else {
828 /*
829 * An xref to a different section has its text
830 * completely replaced.
831 */
832 word *w2, *w3, *w4;
833 w2 = w3 = w->next;
834 w4 = NULL;
835 while (w2) {
836 if (w2->type == word_XrefEnd) {
837 w4 = w2->next;
838 w2->next = NULL;
839 break;
840 }
841 w2 = w2->next;
842 }
843 free_word_list(w3);
844
845 /*
846 * Now w is the UpperXref / LowerXref we
847 * started with, and w4 is the next word after
848 * the corresponding XrefEnd (if any). The
849 * simplest thing is just to stick a pointer to
850 * the target node structure in the private
851 * data field of the xref word, and let
852 * info_rdaddwc and friends read the node name
853 * out from there.
854 */
855 w->next = w4;
856 w->private_data = kwl->para->private_data;
857 assert(w->private_data);
858 }
859 }
860 }
861 }
862
863 return ret;
864 }
865
info_rdaddwc(info_data * id,word * words,word * end,int xrefs,infoconfig * cfg)866 static int info_rdaddwc(info_data *id, word *words, word *end, int xrefs,
867 infoconfig *cfg) {
868 int ret = 0;
869
870 for (; words && words != end; words = words->next) switch (words->type) {
871 case word_HyperLink:
872 case word_HyperEnd:
873 case word_XrefEnd:
874 case word_IndexRef:
875 break;
876
877 case word_Normal:
878 case word_Emph:
879 case word_Strong:
880 case word_Code:
881 case word_WeakCode:
882 case word_WhiteSpace:
883 case word_EmphSpace:
884 case word_StrongSpace:
885 case word_CodeSpace:
886 case word_WkCodeSpace:
887 case word_Quote:
888 case word_EmphQuote:
889 case word_StrongQuote:
890 case word_CodeQuote:
891 case word_WkCodeQuote:
892 assert(words->type != word_CodeQuote &&
893 words->type != word_WkCodeQuote);
894 if (towordstyle(words->type) == word_Emph &&
895 (attraux(words->aux) == attr_First ||
896 attraux(words->aux) == attr_Only))
897 ret += info_rdadds(id, cfg->startemph);
898 else if (towordstyle(words->type) == word_Strong &&
899 (attraux(words->aux) == attr_First ||
900 attraux(words->aux) == attr_Only))
901 ret += info_rdadds(id, cfg->startstrong);
902 else if (towordstyle(words->type) == word_Code &&
903 (attraux(words->aux) == attr_First ||
904 attraux(words->aux) == attr_Only))
905 ret += info_rdadds(id, cfg->lquote);
906 if (removeattr(words->type) == word_Normal) {
907 if (cvt_ok(id->charset, words->text) || !words->alt)
908 ret += info_rdadds(id, words->text);
909 else
910 ret += info_rdaddwc(id, words->alt, NULL, FALSE, cfg);
911 } else if (removeattr(words->type) == word_WhiteSpace) {
912 ret += info_rdadd(id, L' ');
913 } else if (removeattr(words->type) == word_Quote) {
914 ret += info_rdadds(id, quoteaux(words->aux) == quote_Open ?
915 cfg->lquote : cfg->rquote);
916 }
917 if (towordstyle(words->type) == word_Emph &&
918 (attraux(words->aux) == attr_Last ||
919 attraux(words->aux) == attr_Only))
920 ret += info_rdadds(id, cfg->endemph);
921 else if (towordstyle(words->type) == word_Strong &&
922 (attraux(words->aux) == attr_Last ||
923 attraux(words->aux) == attr_Only))
924 ret += info_rdadds(id, cfg->endstrong);
925 else if (towordstyle(words->type) == word_Code &&
926 (attraux(words->aux) == attr_Last ||
927 attraux(words->aux) == attr_Only))
928 ret += info_rdadds(id, cfg->rquote);
929 break;
930
931 case word_UpperXref:
932 case word_LowerXref:
933 if (xrefs && words->private_data) {
934 /*
935 * This bit is structural and so must be done in char
936 * rather than wchar_t.
937 */
938 ret += info_rdaddsc(id, "*Note ");
939 ret += info_rdaddsc(id, ((node *)words->private_data)->name);
940 ret += info_rdaddsc(id, "::");
941 }
942 break;
943 }
944
945 return ret;
946 }
947
948 static int info_width_internal(word *words, int xrefs, infoconfig *cfg);
949
info_width_internal_list(word * words,int xrefs,infoconfig * cfg)950 static int info_width_internal_list(word *words, int xrefs, infoconfig *cfg) {
951 int w = 0;
952 while (words) {
953 w += info_width_internal(words, xrefs, cfg);
954 words = words->next;
955 }
956 return w;
957 }
958
info_width_internal(word * words,int xrefs,infoconfig * cfg)959 static int info_width_internal(word *words, int xrefs, infoconfig *cfg) {
960 int wid;
961 int attr;
962
963 switch (words->type) {
964 case word_HyperLink:
965 case word_HyperEnd:
966 case word_XrefEnd:
967 case word_IndexRef:
968 return 0;
969
970 case word_UpperXref:
971 case word_LowerXref:
972 if (xrefs && words->private_data) {
973 /* "*Note " plus "::" comes to 8 characters */
974 return 8 + strwid(((node *)words->private_data)->name,
975 cfg->charset);
976 } else
977 return 0;
978 }
979
980 assert(words->type < word_internal_endattrs);
981
982 wid = 0;
983 attr = towordstyle(words->type);
984
985 if (attr == word_Emph || attr == word_Strong || attr == word_Code) {
986 if (attraux(words->aux) == attr_Only ||
987 attraux(words->aux) == attr_First)
988 wid += ustrwid(attr == word_Emph ? cfg->startemph :
989 attr == word_Strong ? cfg->startstrong :
990 cfg->lquote, cfg->charset);
991 }
992 if (attr == word_Emph || attr == word_Strong || attr == word_Code) {
993 if (attraux(words->aux) == attr_Only ||
994 attraux(words->aux) == attr_Last)
995 wid += ustrwid(attr == word_Emph ? cfg->startemph :
996 attr == word_Strong ? cfg->startstrong :
997 cfg->lquote, cfg->charset);
998 }
999
1000 switch (words->type) {
1001 case word_Normal:
1002 case word_Emph:
1003 case word_Strong:
1004 case word_Code:
1005 case word_WeakCode:
1006 if (cvt_ok(cfg->charset, words->text) || !words->alt)
1007 wid += ustrwid(words->text, cfg->charset);
1008 else
1009 wid += info_width_internal_list(words->alt, xrefs, cfg);
1010 return wid;
1011
1012 case word_WhiteSpace:
1013 case word_EmphSpace:
1014 case word_StrongSpace:
1015 case word_CodeSpace:
1016 case word_WkCodeSpace:
1017 case word_Quote:
1018 case word_EmphQuote:
1019 case word_StrongQuote:
1020 case word_CodeQuote:
1021 case word_WkCodeQuote:
1022 assert(words->type != word_CodeQuote &&
1023 words->type != word_WkCodeQuote);
1024 if (removeattr(words->type) == word_Quote) {
1025 if (quoteaux(words->aux) == quote_Open)
1026 wid += ustrwid(cfg->lquote, cfg->charset);
1027 else
1028 wid += ustrwid(cfg->rquote, cfg->charset);
1029 } else
1030 wid++; /* space */
1031 }
1032 return wid;
1033 }
1034
info_width_noxrefs(void * ctx,word * words)1035 static int info_width_noxrefs(void *ctx, word *words)
1036 {
1037 return info_width_internal(words, FALSE, (infoconfig *)ctx);
1038 }
info_width_xrefs(void * ctx,word * words)1039 static int info_width_xrefs(void *ctx, word *words)
1040 {
1041 return info_width_internal(words, TRUE, (infoconfig *)ctx);
1042 }
1043
info_heading(info_data * text,word * tprefix,word * words,alignstruct align,int width,infoconfig * cfg)1044 static void info_heading(info_data *text, word *tprefix,
1045 word *words, alignstruct align,
1046 int width, infoconfig *cfg) {
1047 int length;
1048 int firstlinewidth, wrapwidth;
1049 wrappedline *wrapping, *p;
1050
1051 length = 0;
1052 if (tprefix) {
1053 length += info_rdaddwc(text, tprefix, NULL, FALSE, cfg);
1054 length += info_rdadds(text, cfg->sectsuffix);
1055 }
1056
1057 wrapwidth = width;
1058 firstlinewidth = width - length;
1059
1060 wrapping = wrap_para(words, firstlinewidth, wrapwidth,
1061 info_width_noxrefs, cfg, 0);
1062 for (p = wrapping; p; p = p->next) {
1063 length += info_rdaddwc(text, p->begin, p->end, FALSE, cfg);
1064 info_rdadd(text, L'\n');
1065 if (*align.underline) {
1066 while (length > 0) {
1067 info_rdadds(text, align.underline);
1068 length -= ustrwid(align.underline, cfg->charset);
1069 }
1070 info_rdadd(text, L'\n');
1071 }
1072 length = 0;
1073 }
1074 wrap_free(wrapping);
1075 info_rdadd(text, L'\n');
1076 }
1077
info_rule(info_data * text,int indent,int width,infoconfig * cfg)1078 static void info_rule(info_data *text, int indent, int width, infoconfig *cfg)
1079 {
1080 while (indent--) info_rdadd(text, L' ');
1081 while (width > 0) {
1082 info_rdadds(text, cfg->rule);
1083 width -= ustrwid(cfg->rule, cfg->charset);
1084 }
1085 info_rdadd(text, L'\n');
1086 info_rdadd(text, L'\n');
1087 }
1088
info_para(info_data * text,word * prefix,wchar_t * prefixextra,word * input,keywordlist * keywords,int indent,int extraindent,int width,infoconfig * cfg)1089 static void info_para(info_data *text, word *prefix, wchar_t *prefixextra,
1090 word *input, keywordlist *keywords, int indent,
1091 int extraindent, int width, infoconfig *cfg) {
1092 wrappedline *wrapping, *p;
1093 word *words;
1094 int e;
1095 int i;
1096 int firstlinewidth = width;
1097
1098 words = info_transform_wordlist(input, keywords);
1099
1100 if (prefix) {
1101 for (i = 0; i < indent; i++)
1102 info_rdadd(text, L' ');
1103 e = info_rdaddwc(text, prefix, NULL, FALSE, cfg);
1104 if (prefixextra)
1105 e += info_rdadds(text, prefixextra);
1106 /* If the prefix is too long, shorten the first line to fit. */
1107 e = extraindent - e;
1108 if (e < 0) {
1109 firstlinewidth += e; /* this decreases it, since e < 0 */
1110 if (firstlinewidth < 0) {
1111 e = indent + extraindent;
1112 firstlinewidth = width;
1113 info_rdadd(text, L'\n');
1114 } else
1115 e = 0;
1116 }
1117 } else
1118 e = indent + extraindent;
1119
1120 wrapping = wrap_para(words, firstlinewidth, width, info_width_xrefs,
1121 cfg, 0);
1122 for (p = wrapping; p; p = p->next) {
1123 for (i = 0; i < e; i++)
1124 info_rdadd(text, L' ');
1125 info_rdaddwc(text, p->begin, p->end, TRUE, cfg);
1126 info_rdadd(text, L'\n');
1127 e = indent + extraindent;
1128 }
1129 wrap_free(wrapping);
1130 info_rdadd(text, L'\n');
1131
1132 free_word_list(words);
1133 }
1134
info_codepara(info_data * text,word * words,int indent,int width)1135 static void info_codepara(info_data *text, word *words,
1136 int indent, int width) {
1137 int i;
1138
1139 for (; words; words = words->next) if (words->type == word_WeakCode) {
1140 for (i = 0; i < indent; i++)
1141 info_rdadd(text, L' ');
1142 if (info_rdadds(text, words->text) > width) {
1143 /* FIXME: warn */
1144 }
1145 info_rdadd(text, L'\n');
1146 }
1147
1148 info_rdadd(text, L'\n');
1149 }
1150
info_versionid(info_data * text,word * words,infoconfig * cfg)1151 static void info_versionid(info_data *text, word *words, infoconfig *cfg) {
1152 info_rdadd(text, L'[');
1153 info_rdaddwc(text, words, NULL, FALSE, cfg);
1154 info_rdadds(text, L"]\n");
1155 }
1156
info_node_new(char * name,int charset)1157 static node *info_node_new(char *name, int charset)
1158 {
1159 node *n;
1160
1161 n = snew(node);
1162 n->text = empty_info_data;
1163 n->text.charset = charset;
1164 n->up = n->next = n->prev = n->lastchild = n->listnext = NULL;
1165 n->name = dupstr(name);
1166 n->started_menu = FALSE;
1167
1168 return n;
1169 }
1170
info_node_name_core(info_data * id,filepos * fpos)1171 static char *info_node_name_core(info_data *id, filepos *fpos)
1172 {
1173 char *p, *q;
1174
1175 /*
1176 * We cannot have commas, colons or parentheses in a node name.
1177 * Remove any that we find, with a warning.
1178 */
1179 p = q = id->output.text;
1180 while (*p) {
1181 if (*p == ':' || *p == ',' || *p == '(' || *p == ')') {
1182 err_infonodechar(fpos, *p);
1183 } else {
1184 *q++ = *p;
1185 }
1186 p++;
1187 }
1188 *q = '\0';
1189
1190 return id->output.text;
1191 }
1192
info_node_name_for_para(paragraph * par,infoconfig * cfg)1193 static char *info_node_name_for_para(paragraph *par, infoconfig *cfg)
1194 {
1195 info_data id = EMPTY_INFO_DATA;
1196
1197 id.charset = cfg->charset;
1198 info_rdaddwc(&id, par->kwtext ? par->kwtext : par->words,
1199 NULL, FALSE, cfg);
1200 info_rdaddsc(&id, NULL);
1201
1202 return info_node_name_core(&id, &par->fpos);
1203 }
1204
info_node_name_for_text(wchar_t * text,infoconfig * cfg)1205 static char *info_node_name_for_text(wchar_t *text, infoconfig *cfg)
1206 {
1207 info_data id = EMPTY_INFO_DATA;
1208
1209 id.charset = cfg->charset;
1210 info_rdadds(&id, text);
1211 info_rdaddsc(&id, NULL);
1212
1213 return info_node_name_core(&id, NULL);
1214 }
1215
info_menu_item(info_data * text,node * n,paragraph * p,infoconfig * cfg)1216 static void info_menu_item(info_data *text, node *n, paragraph *p,
1217 infoconfig *cfg)
1218 {
1219 /*
1220 * FIXME: Depending on how we're doing node names in this info
1221 * file, we might want to do
1222 *
1223 * * Node name:: Chapter title
1224 *
1225 * _or_
1226 *
1227 * * Chapter number: Node name.
1228 *
1229 * This function mostly works in char rather than wchar_t,
1230 * because a menu item is a structural component.
1231 */
1232 info_rdaddsc(text, "* ");
1233 info_rdaddsc(text, n->name);
1234 info_rdaddsc(text, "::");
1235 if (p) {
1236 info_rdaddc(text, ' ');
1237 info_rdaddwc(text, p->words, NULL, FALSE, cfg);
1238 }
1239 info_rdaddc(text, '\n');
1240 }
1241
1242 /*
1243 * These functions implement my wrapper on the rdadd* calls which
1244 * allows me to switch arbitrarily between literal octet-string
1245 * text and charset-translated Unicode. (Because no matter what
1246 * character set I write the actual text in, I expect info readers
1247 * to treat node names and file names literally and to expect
1248 * keywords like `*Note' in their canonical form, so I have to take
1249 * steps to ensure that those structural elements of the file
1250 * aren't messed with.)
1251 */
info_rdadds(info_data * d,wchar_t const * wcs)1252 static int info_rdadds(info_data *d, wchar_t const *wcs)
1253 {
1254 if (!d->wcmode) {
1255 d->state = charset_init_state;
1256 d->wcmode = TRUE;
1257 }
1258
1259 if (wcs) {
1260 char buf[256];
1261 int len, width, ret;
1262
1263 width = ustrwid(wcs, d->charset);
1264
1265 len = ustrlen(wcs);
1266 while (len > 0) {
1267 int prevlen = len;
1268
1269 ret = charset_from_unicode(&wcs, &len, buf, lenof(buf),
1270 d->charset, &d->state, NULL);
1271
1272 assert(len < prevlen);
1273
1274 if (ret > 0) {
1275 buf[ret] = '\0';
1276 rdaddsc(&d->output, buf);
1277 }
1278 }
1279
1280 return width;
1281 } else
1282 return 0;
1283 }
1284
info_rdaddsc(info_data * d,char const * cs)1285 static int info_rdaddsc(info_data *d, char const *cs)
1286 {
1287 if (d->wcmode) {
1288 char buf[256];
1289 int ret;
1290
1291 ret = charset_from_unicode(NULL, 0, buf, lenof(buf),
1292 d->charset, &d->state, NULL);
1293 if (ret > 0) {
1294 buf[ret] = '\0';
1295 rdaddsc(&d->output, buf);
1296 }
1297
1298 d->wcmode = FALSE;
1299 }
1300
1301 if (cs) {
1302 rdaddsc(&d->output, cs);
1303 return strwid(cs, d->charset);
1304 } else
1305 return 0;
1306 }
1307
info_rdadd(info_data * d,wchar_t wc)1308 static int info_rdadd(info_data *d, wchar_t wc)
1309 {
1310 wchar_t wcs[2];
1311 wcs[0] = wc;
1312 wcs[1] = L'\0';
1313 return info_rdadds(d, wcs);
1314 }
1315
info_rdaddc(info_data * d,char c)1316 static int info_rdaddc(info_data *d, char c)
1317 {
1318 char cs[2];
1319 cs[0] = c;
1320 cs[1] = '\0';
1321 return info_rdaddsc(d, cs);
1322 }
1323