1 /*
2 * Separated HTML export functions for HTMLDOC, a HTML document processing
3 * program.
4 *
5 * Copyright 2011-2021 by Michael R Sweet.
6 * Copyright 1997-2010 by Easy Software Products. All rights reserved.
7 *
8 * This program is free software. Distribution and use rights are outlined in
9 * the file "COPYING".
10 */
11
12 /*
13 * Include necessary headers.
14 */
15
16 #include "htmldoc.h"
17 #include "markdown.h"
18 #include <ctype.h>
19
20
21 //
22 // Named link structure...
23 //
24
25 typedef struct
26 {
27 uchar *filename; /* File for link */
28 uchar name[124]; /* Reference name */
29 } link_t;
30
31
32 //
33 // Local globals...
34 //
35
36 // Heading strings used for filenames...
37 static size_t num_headings = 0, // Number of headings
38 alloc_headings = 0; // Allocated headings
39 static uchar **headings; // Heading strings
40
41 // Links in document - used to add the correct filename to the link
42 static size_t num_links = 0, // Number of links
43 alloc_links = 0; // Allocated links
44 static link_t *links; // Links
45
46
47 //
48 // Local functions...
49 //
50
51 extern "C" {
52 typedef int (*compare_func_t)(const void *, const void *);
53 }
54
55 static void write_header(FILE **out, uchar *filename, uchar *title,
56 uchar *author, uchar *copyright, uchar *docnumber,
57 int heading);
58 static void write_footer(FILE **out, int heading);
59 static void write_title(FILE *out, uchar *title, uchar *author,
60 uchar *copyright, uchar *docnumber);
61 static int write_all(FILE *out, tree_t *t, int col);
62 static int write_doc(FILE **out, tree_t *t, int col, int *heading,
63 uchar *title, uchar *author, uchar *copyright,
64 uchar *docnumber);
65 static int write_node(FILE *out, tree_t *t, int col);
66 static int write_nodeclose(FILE *out, tree_t *t, int col);
67 static int write_toc(FILE *out, tree_t *t, int col);
68 static uchar *get_title(tree_t *doc);
69
70 static void add_heading(tree_t *t);
71 static void add_link(uchar *name);
72 static link_t *find_link(uchar *name);
73 static int compare_links(link_t *n1, link_t *n2);
74 static void scan_links(tree_t *t);
75 static void update_links(tree_t *t, int *heading);
76
77
78 //
79 // 'htmlsep_export()' - Export to separated HTML files...
80 //
81
82 int // O - 0 = success, -1 = failure
htmlsep_export(tree_t * document,tree_t * toc)83 htmlsep_export(tree_t *document, // I - Document to export
84 tree_t *toc) // I - Table of contents for document
85 {
86 size_t i; // Looping var
87 int heading; // Current heading number
88 uchar *title, // Title text
89 *author, // Author name
90 *copyright, // Copyright text
91 *docnumber; // Document number
92 FILE *out; // Output file
93
94
95 // We only support writing to a directory...
96 if (!OutputFiles)
97 {
98 progress_error(HD_ERROR_INTERNAL_ERROR, "Unable to generate separated HTML to a single file!");
99 return (-1);
100 }
101
102 // Copy logo and title images...
103 if (LogoImage[0])
104 image_copy(LogoImage, file_find(LogoImage, Path), OutputPath);
105
106 for (int hfi = 0; hfi < MAX_HF_IMAGES; hfi ++)
107 if (HFImage[hfi][0])
108 image_copy(HFImage[hfi], file_find(HFImage[hfi], Path), OutputPath);
109
110 if (TitleImage[0] && TitlePage &&
111 #ifdef WIN32
112 (stricmp(file_extension(TitleImage), "bmp") == 0 ||
113 stricmp(file_extension(TitleImage), "gif") == 0 ||
114 stricmp(file_extension(TitleImage), "jpg") == 0 ||
115 stricmp(file_extension(TitleImage), "png") == 0))
116 #else
117 (strcmp(file_extension(TitleImage), "bmp") == 0 ||
118 strcmp(file_extension(TitleImage), "gif") == 0 ||
119 strcmp(file_extension(TitleImage), "jpg") == 0 ||
120 strcmp(file_extension(TitleImage), "png") == 0))
121 #endif // WIN32
122 image_copy(TitleImage, file_find(TitleImage, Path), OutputPath);
123
124 // Get document strings...
125 title = get_title(document);
126 author = htmlGetMeta(document, (uchar *)"author");
127 copyright = htmlGetMeta(document, (uchar *)"copyright");
128 docnumber = htmlGetMeta(document, (uchar *)"docnumber");
129 if (!docnumber)
130 docnumber = htmlGetMeta(document, (uchar *)"version");
131
132 // Scan for all links in the document, and then update them...
133 num_links = 0;
134 alloc_links = 0;
135 links = NULL;
136
137 scan_links(document);
138
139 // printf("num_headings = %d\n", num_headings);
140 // for (i = 0; i < num_headings; i ++)
141 // printf("headings[%d] = \"%s\"\n", i, headings[i]);
142
143 heading = -1;
144 update_links(document, &heading);
145 update_links(toc, NULL);
146
147 // Generate title pages and a table of contents...
148 out = NULL;
149 if (TitlePage)
150 {
151 write_header(&out, (uchar *)"index.html", title, author, copyright,
152 docnumber, -1);
153 if (out != NULL)
154 write_title(out, title, author, copyright, docnumber);
155
156 write_footer(&out, -1);
157
158 write_header(&out, (uchar *)"toc.html", title, author, copyright,
159 docnumber, -1);
160 }
161 else
162 write_header(&out, (uchar *)"index.html", title, author, copyright,
163 docnumber, -1);
164
165 if (out != NULL)
166 write_toc(out, toc, 0);
167
168 write_footer(&out, -1);
169
170 // Then write each output file...
171 heading = -1;
172 write_doc(&out, document, 0, &heading, title, author, copyright, docnumber);
173
174 if (out != NULL)
175 write_footer(&out, heading);
176
177 // Free memory...
178 if (title != NULL)
179 free(title);
180
181 if (alloc_links)
182 {
183 free(links);
184
185 num_links = 0;
186 alloc_links = 0;
187 links = NULL;
188 }
189
190 if (alloc_headings)
191 {
192 for (i = 0; i < num_headings; i ++)
193 free(headings[i]);
194
195 free(headings);
196
197 num_headings = 0;
198 alloc_headings = 0;
199 headings = NULL;
200 }
201
202 return (out == NULL);
203 }
204
205
206 /*
207 * 'write_header()' - Output the standard "header" for a HTML file.
208 */
209
210 static void
write_header(FILE ** out,uchar * filename,uchar * title,uchar * author,uchar * copyright,uchar * docnumber,int heading)211 write_header(FILE **out, /* IO - Output file */
212 uchar *filename, /* I - Output filename */
213 uchar *title, /* I - Title for document */
214 uchar *author, /* I - Author for document */
215 uchar *copyright, /* I - Copyright for document */
216 uchar *docnumber, /* I - ID number for document */
217 int heading) /* I - Current heading */
218 {
219 char realname[1024]; /* Real filename */
220 const char *basename; /* Filename without directory */
221 static const char *families[] =/* Typeface names */
222 {
223 "monospace",
224 "serif",
225 "sans-serif",
226 "monospace",
227 "serif",
228 "sans-serif",
229 "symbol",
230 "dingbats"
231 };
232
233
234 basename = file_basename((char *)filename);
235
236 snprintf(realname, sizeof(realname), "%s/%s", OutputPath, basename);
237
238 *out = fopen(realname, "wb");
239
240 if (*out == NULL)
241 {
242 progress_error(HD_ERROR_WRITE_ERROR,
243 "Unable to create output file \"%s\" - %s.\n",
244 realname, strerror(errno));
245 return;
246 }
247
248 fputs("<!DOCTYPE html>\n", *out);
249 fputs("<HTML>\n", *out);
250 fputs("<HEAD>\n", *out);
251 if (title != NULL)
252 fprintf(*out, "<TITLE>%s</TITLE>\n", title);
253 if (author != NULL)
254 fprintf(*out, "<META NAME=\"author\" CONTENT=\"%s\">\n", author);
255 if (copyright != NULL)
256 fprintf(*out, "<META NAME=\"copyright\" CONTENT=\"%s\">\n", copyright);
257 if (docnumber != NULL)
258 fprintf(*out, "<META NAME=\"docnumber\" CONTENT=\"%s\">\n", docnumber);
259 fprintf(*out, "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; CHARSET=%s\">\n",
260 _htmlCharSet);
261
262 fputs("<LINK REL=\"Start\" HREF=\"index.html\">\n", *out);
263
264 if (TitlePage)
265 fputs("<LINK REL=\"Contents\" HREF=\"toc.html\">\n", *out);
266 else
267 fputs("<LINK REL=\"Contents\" HREF=\"index.html\">\n", *out);
268
269 if (heading >= 0)
270 {
271 if (heading > 0)
272 fprintf(*out, "<LINK REL=\"Prev\" HREF=\"%s.html\">\n", headings[heading - 1]);
273
274 if ((size_t)heading < (num_headings - 1))
275 fprintf(*out, "<LINK REL=\"Next\" HREF=\"%s.html\">\n", headings[heading + 1]);
276 }
277
278 fputs("<STYLE TYPE=\"text/css\"><!--\n", *out);
279 fprintf(*out, "BODY { font-family: %s; }\n", families[_htmlBodyFont]);
280 fprintf(*out, "H1 { font-family: %s; }\n", families[_htmlHeadingFont]);
281 fprintf(*out, "H2 { font-family: %s; }\n", families[_htmlHeadingFont]);
282 fprintf(*out, "H3 { font-family: %s; }\n", families[_htmlHeadingFont]);
283 fprintf(*out, "H4 { font-family: %s; }\n", families[_htmlHeadingFont]);
284 fprintf(*out, "H5 { font-family: %s; }\n", families[_htmlHeadingFont]);
285 fprintf(*out, "H6 { font-family: %s; }\n", families[_htmlHeadingFont]);
286 fputs("SUB { font-size: smaller; }\n", *out);
287 fputs("SUP { font-size: smaller; }\n", *out);
288 fputs("PRE { font-family: monospace; margin-left: 36pt; }\n", *out);
289
290 if (!LinkStyle)
291 fputs("A { text-decoration: none; }\n", *out);
292
293 fputs("--></STYLE>\n", *out);
294 fputs("</HEAD>\n", *out);
295
296 if (BodyImage[0])
297 fprintf(*out, "<BODY BACKGROUND=\"%s\"", file_basename(BodyImage));
298 else if (BodyColor[0])
299 fprintf(*out, "<BODY BGCOLOR=\"%s\"", BodyColor);
300 else
301 fputs("<BODY", *out);
302
303 if (_htmlTextColor[0])
304 fprintf(*out, " TEXT=\"%s\"", _htmlTextColor);
305
306 if (LinkColor[0])
307 fprintf(*out, " LINK=\"%s\" VLINK=\"%s\" ALINK=\"%s\"", LinkColor,
308 LinkColor, LinkColor);
309
310 fputs(">\n", *out);
311
312 if (heading >= 0)
313 {
314 if (LogoImage[0])
315 fprintf(*out, "<IMG SRC=\"%s\">\n", file_basename(LogoImage));
316
317 for (int hfi = 0; hfi < MAX_HF_IMAGES; ++hfi)
318 if (HFImage[hfi][0])
319 fprintf(*out, "<IMG SRC=\"%s\">\n", file_basename(HFImage[hfi]));
320
321 if (TitlePage)
322 fputs("<A HREF=\"toc.html\">Contents</A>\n", *out);
323 else
324 fputs("<A HREF=\"index.html\">Contents</A>\n", *out);
325
326 if (heading > 0)
327 fprintf(*out, "<A HREF=\"%s.html\">Previous</A>\n", headings[heading - 1]);
328
329 if ((size_t)heading < (num_headings - 1))
330 fprintf(*out, "<A HREF=\"%s.html\">Next</A>\n", headings[heading + 1]);
331
332 fputs("<HR NOSHADE>\n", *out);
333 }
334 }
335
336
337 /*
338 * 'write_footer()' - Output the standard "footer" for a HTML file.
339 */
340
341 static void
write_footer(FILE ** out,int heading)342 write_footer(FILE **out, /* IO - Output file pointer */
343 int heading) /* I - Current heading */
344 {
345 if (*out == NULL)
346 return;
347
348 fputs("<HR NOSHADE>\n", *out);
349
350 if (heading >= 0)
351 {
352 if (LogoImage[0])
353 fprintf(*out, "<IMG SRC=\"%s\">\n", file_basename(LogoImage));
354
355 for (int hfi = 0; hfi < MAX_HF_IMAGES; ++hfi)
356 if (HFImage[hfi][0])
357 fprintf(*out, "<IMG SRC=\"%s\">\n", file_basename(HFImage[hfi]));
358
359 if (TitlePage)
360 fputs("<A HREF=\"toc.html\">Contents</A>\n", *out);
361 else
362 fputs("<A HREF=\"index.html\">Contents</A>\n", *out);
363
364 if (heading > 0)
365 fprintf(*out, "<A HREF=\"%s.html\">Previous</A>\n", headings[heading - 1]);
366
367 if ((size_t)heading < (num_headings - 1))
368 fprintf(*out, "<A HREF=\"%s.html\">Next</A>\n", headings[heading + 1]);
369 }
370
371 fputs("</BODY>\n", *out);
372 fputs("</HTML>\n", *out);
373
374 progress_error(HD_ERROR_NONE, "BYTES: %ld", ftell(*out));
375
376 fclose(*out);
377 *out = NULL;
378 }
379
380
381 /*
382 * 'write_title()' - Write a title page...
383 */
384
385 static void
write_title(FILE * out,uchar * title,uchar * author,uchar * copyright,uchar * docnumber)386 write_title(FILE *out, /* I - Output file */
387 uchar *title, /* I - Title for document */
388 uchar *author, /* I - Author for document */
389 uchar *copyright, /* I - Copyright for document */
390 uchar *docnumber) /* I - ID number for document */
391 {
392 FILE *fp; /* Title file */
393 const char *title_ext, /* Extension of title file */
394 *title_file; /* Location of title file */
395 tree_t *t; /* Title file document tree */
396
397
398 if (out == NULL)
399 return;
400
401 title_ext = file_extension(TitleImage);
402
403 #ifdef WIN32
404 if (TitleImage[0] &&
405 stricmp(title_ext, "bmp") != 0 &&
406 stricmp(title_ext, "gif") != 0 &&
407 stricmp(title_ext, "jpg") != 0 &&
408 stricmp(title_ext, "png") != 0)
409 #else
410 if (TitleImage[0] &&
411 strcmp(title_ext, "bmp") != 0 &&
412 strcmp(title_ext, "gif") != 0 &&
413 strcmp(title_ext, "jpg") != 0 &&
414 strcmp(title_ext, "png") != 0)
415 #endif // WIN32
416 {
417 // Find the title page file...
418 if ((title_file = file_find(Path, TitleImage)) == NULL)
419 {
420 progress_error(HD_ERROR_FILE_NOT_FOUND,
421 "Unable to find title file \"%s\"!", TitleImage);
422 return;
423 }
424
425 // Write a title page from HTML source...
426 if ((fp = fopen(title_file, "rb")) == NULL)
427 {
428 progress_error(HD_ERROR_FILE_NOT_FOUND,
429 "Unable to open title file \"%s\" - %s!",
430 TitleImage, strerror(errno));
431 return;
432 }
433
434 #ifdef _WIN32
435 if (!stricmp(title_ext, "md"))
436 #else
437 if (!strcmp(title_ext, "md"))
438 #endif // _WIN32
439 t = mdReadFile(NULL, fp, file_directory(TitleImage));
440 else
441 t = htmlReadFile(NULL, fp, file_directory(TitleImage));
442
443 htmlFixLinks(t, t, (uchar *)file_directory(TitleImage));
444 fclose(fp);
445
446 write_all(out, t, 0);
447 htmlDeleteTree(t);
448 }
449 else
450 {
451 // Write a "standard" title page with image...
452 fputs("<CENTER>", out);
453
454 if (TitleImage[0])
455 {
456 image_t *img = image_load(TitleImage, !OutputColor);
457
458 fprintf(out, "<IMG SRC=\"%s\" WIDTH=\"%d\" HEIGHT=\"%d\" "
459 "ALT=\"%s\"><BR>\n",
460 file_basename((char *)TitleImage), img->width, img->height,
461 title ? (char *)title : "");
462 }
463
464 if (title != NULL)
465 fprintf(out, "<H1>%s</H1><BR>\n", title);
466 else
467 fputs("\n", out);
468
469 if (docnumber != NULL)
470 fprintf(out, "%s<BR>\n", docnumber);
471
472 if (author != NULL)
473 fprintf(out, "%s<BR>\n", author);
474
475 if (copyright != NULL)
476 fprintf(out, "%s<BR>\n", copyright);
477
478 fputs("<A HREF=\"toc.html\">Table of Contents</A>", out);
479 fputs("</CENTER>\n", out);
480 }
481 }
482
483
484 /*
485 * 'write_all()' - Write all markup text for the given tree.
486 */
487
488 static int /* O - Current column */
write_all(FILE * out,tree_t * t,int col)489 write_all(FILE *out, /* I - Output file */
490 tree_t *t, /* I - Document tree */
491 int col) /* I - Current column */
492 {
493 if (out == NULL)
494 return (0);
495
496 while (t != NULL)
497 {
498 col = write_node(out, t, col);
499
500 if (t->markup != MARKUP_HEAD && t->markup != MARKUP_TITLE)
501 col = write_all(out, t->child, col);
502
503 col = write_nodeclose(out, t, col);
504
505 t = t->next;
506 }
507
508 return (col);
509 }
510
511
512 /*
513 * 'write_doc()' - Write the entire document.
514 */
515
516 static int // O - Current column
write_doc(FILE ** out,tree_t * t,int col,int * heading,uchar * title,uchar * author,uchar * copyright,uchar * docnumber)517 write_doc(FILE **out, // I - Output file
518 tree_t *t, // I - Document tree
519 int col, // I - Current column
520 int *heading, // IO - Current heading
521 uchar *title, // I - Title
522 uchar *author, // I - Author
523 uchar *copyright, // I - Copyright
524 uchar *docnumber) // I - Document number
525 {
526 uchar filename[1024]; // Filename
527
528
529 while (t != NULL)
530 {
531 if (t->markup >= MARKUP_H1 && t->markup < (MARKUP_H1 + TocLevels) &&
532 htmlGetVariable(t, (uchar *)"_HD_OMIT_TOC") == NULL)
533 {
534 if (*heading >= 0)
535 write_footer(out, *heading);
536
537 (*heading) ++;
538
539 if (*heading >= 0)
540 {
541 snprintf((char *)filename, sizeof(filename), "%s.html",
542 headings[*heading]);
543 write_header(out, filename, title, author, copyright, docnumber,
544 *heading);
545 }
546 }
547
548 col = write_node(*out, t, col);
549
550 if (t->markup != MARKUP_HEAD && t->markup != MARKUP_TITLE)
551 col = write_doc(out, t->child, col, heading,
552 title, author, copyright, docnumber);
553
554 col = write_nodeclose(*out, t, col);
555
556 t = t->next;
557 }
558
559 return (col);
560 }
561
562
563 /*
564 * 'write_node()' - Write a single tree node.
565 */
566
567 static int /* O - Current column */
write_node(FILE * out,tree_t * t,int col)568 write_node(FILE *out, /* I - Output file */
569 tree_t *t, /* I - Document tree node */
570 int col) /* I - Current column */
571 {
572 int i; /* Looping var */
573 uchar *ptr, /* Pointer to output string */
574 *entity, /* Entity string */
575 *src, /* Source image */
576 *realsrc, /* Real source image */
577 newsrc[1024]; /* New source image filename */
578
579
580 if (out == NULL)
581 return (0);
582
583 switch (t->markup)
584 {
585 case MARKUP_NONE :
586 if (t->data == NULL)
587 break;
588
589 if (t->preformatted)
590 {
591 for (ptr = t->data; *ptr; ptr ++)
592 fputs((char *)iso8859(*ptr), out);
593
594 if (t->data[0] && t->data[strlen((char *)t->data) - 1] == '\n')
595 col = 0;
596 else
597 col += strlen((char *)t->data);
598 }
599 else
600 {
601 if ((col + (int)strlen((char *)t->data)) > 72 && col > 0)
602 {
603 putc('\n', out);
604 col = 0;
605 }
606
607 for (ptr = t->data; *ptr; ptr ++)
608 fputs((char *)iso8859(*ptr), out);
609
610 col += strlen((char *)t->data);
611
612 if (col > 72)
613 {
614 putc('\n', out);
615 col = 0;
616 }
617 }
618 break;
619
620 case MARKUP_COMMENT :
621 case MARKUP_UNKNOWN :
622 fputs("\n<!--", out);
623 for (ptr = t->data; *ptr; ptr ++)
624 fputs((char *)iso8859(*ptr), out);
625 fputs("-->\n", out);
626 col = 0;
627 break;
628
629 case MARKUP_AREA :
630 case MARKUP_BODY :
631 case MARKUP_DOCTYPE :
632 case MARKUP_ERROR :
633 case MARKUP_FILE :
634 case MARKUP_HEAD :
635 case MARKUP_HTML :
636 case MARKUP_MAP :
637 case MARKUP_META :
638 case MARKUP_TITLE :
639 break;
640
641 case MARKUP_BR :
642 case MARKUP_CENTER :
643 case MARKUP_DD :
644 case MARKUP_DL :
645 case MARKUP_DT :
646 case MARKUP_H1 :
647 case MARKUP_H2 :
648 case MARKUP_H3 :
649 case MARKUP_H4 :
650 case MARKUP_H5 :
651 case MARKUP_H6 :
652 case MARKUP_H7 :
653 case MARKUP_H8 :
654 case MARKUP_H9 :
655 case MARKUP_H10 :
656 case MARKUP_H11 :
657 case MARKUP_H12 :
658 case MARKUP_H13 :
659 case MARKUP_H14 :
660 case MARKUP_H15 :
661 case MARKUP_HR :
662 case MARKUP_LI :
663 case MARKUP_OL :
664 case MARKUP_P :
665 case MARKUP_PRE :
666 case MARKUP_TABLE :
667 case MARKUP_TR :
668 case MARKUP_UL :
669 if (col > 0)
670 {
671 putc('\n', out);
672 col = 0;
673 }
674
675 default :
676 if (t->markup == MARKUP_IMG &&
677 (src = htmlGetVariable(t, (uchar *)"SRC")) != NULL &&
678 (realsrc = htmlGetVariable(t, (uchar *)"REALSRC")) != NULL)
679 {
680 /*
681 * Update local images...
682 */
683
684 if (file_method((char *)src) == NULL &&
685 src[0] != '/' && src[0] != '\\' &&
686 (!isalpha(src[0]) || src[1] != ':'))
687 {
688 image_copy((char *)src, (char *)realsrc, OutputPath);
689 strlcpy((char *)newsrc, file_basename((char *)src), sizeof(newsrc));
690 htmlSetVariable(t, (uchar *)"SRC", newsrc);
691 }
692 }
693
694 if (t->markup != MARKUP_EMBED)
695 {
696 col += fprintf(out, "<%s", _htmlMarkups[t->markup]);
697 for (i = 0; i < t->nvars; i ++)
698 {
699 if (strcasecmp((char *)t->vars[i].name, "BREAK") == 0 &&
700 t->markup == MARKUP_HR)
701 continue;
702
703 if (strcasecmp((char *)t->vars[i].name, "REALSRC") == 0 &&
704 t->markup == MARKUP_IMG)
705 continue;
706
707 if (strncasecmp((char *)t->vars[i].name, "_HD_", 4) == 0)
708 continue;
709
710 if (col > 72 && !t->preformatted)
711 {
712 putc('\n', out);
713 col = 0;
714 }
715
716 if (col > 0)
717 {
718 putc(' ', out);
719 col ++;
720 }
721
722 if (t->vars[i].value == NULL)
723 col += fprintf(out, "%s", t->vars[i].name);
724 else
725 {
726 col += fprintf(out, "%s=\"", t->vars[i].name);
727 for (ptr = t->vars[i].value; *ptr; ptr ++)
728 {
729 entity = iso8859(*ptr);
730 fputs((char *)entity, out);
731 col += strlen((char *)entity);
732 }
733
734 putc('\"', out);
735 col ++;
736 }
737 }
738
739 putc('>', out);
740 col ++;
741
742 if (col > 72 && !t->preformatted)
743 {
744 putc('\n', out);
745 col = 0;
746 }
747 }
748 break;
749 }
750
751 return (col);
752 }
753
754
755 /*
756 * 'write_nodeclose()' - Close a single tree node.
757 */
758
759 static int /* O - Current column */
write_nodeclose(FILE * out,tree_t * t,int col)760 write_nodeclose(FILE *out, /* I - Output file */
761 tree_t *t, /* I - Document tree node */
762 int col) /* I - Current column */
763 {
764 if (out == NULL)
765 return (0);
766
767 if (t->markup != MARKUP_HEAD && t->markup != MARKUP_TITLE)
768 {
769 if (col > 72 && !t->preformatted)
770 {
771 putc('\n', out);
772 col = 0;
773 }
774
775 switch (t->markup)
776 {
777 case MARKUP_BODY :
778 case MARKUP_ERROR :
779 case MARKUP_FILE :
780 case MARKUP_HEAD :
781 case MARKUP_HTML :
782 case MARKUP_NONE :
783 case MARKUP_TITLE :
784
785 case MARKUP_APPLET :
786 case MARKUP_AREA :
787 case MARKUP_BR :
788 case MARKUP_COMMENT :
789 case MARKUP_DOCTYPE :
790 case MARKUP_EMBED :
791 case MARKUP_HR :
792 case MARKUP_IMG :
793 case MARKUP_INPUT :
794 case MARKUP_ISINDEX :
795 case MARKUP_LINK :
796 case MARKUP_META :
797 case MARKUP_NOBR :
798 case MARKUP_SPACER :
799 case MARKUP_WBR :
800 case MARKUP_UNKNOWN :
801 break;
802
803 case MARKUP_CENTER :
804 case MARKUP_DD :
805 case MARKUP_DL :
806 case MARKUP_DT :
807 case MARKUP_H1 :
808 case MARKUP_H2 :
809 case MARKUP_H3 :
810 case MARKUP_H4 :
811 case MARKUP_H5 :
812 case MARKUP_H6 :
813 case MARKUP_H7 :
814 case MARKUP_H8 :
815 case MARKUP_H9 :
816 case MARKUP_H10 :
817 case MARKUP_H11 :
818 case MARKUP_H12 :
819 case MARKUP_H13 :
820 case MARKUP_H14 :
821 case MARKUP_H15 :
822 case MARKUP_LI :
823 case MARKUP_OL :
824 case MARKUP_P :
825 case MARKUP_PRE :
826 case MARKUP_TABLE :
827 case MARKUP_TR :
828 case MARKUP_UL :
829 fprintf(out, "</%s>\n", _htmlMarkups[t->markup]);
830 col = 0;
831 break;
832
833 default :
834 col += fprintf(out, "</%s>", _htmlMarkups[t->markup]);
835 break;
836 }
837 }
838
839 return (col);
840 }
841
842
843 /*
844 * 'write_toc()' - Write all markup text for the given table-of-contents.
845 */
846
847 static int /* O - Current column */
write_toc(FILE * out,tree_t * t,int col)848 write_toc(FILE *out, /* I - Output file */
849 tree_t *t, /* I - Document tree */
850 int col) /* I - Current column */
851 {
852 if (out == NULL)
853 return (0);
854
855 while (t != NULL)
856 {
857 if (htmlGetVariable(t, (uchar *)"_HD_OMIT_TOC") == NULL)
858 {
859 col = write_node(out, t, col);
860
861 if (t->markup != MARKUP_HEAD && t->markup != MARKUP_TITLE)
862 col = write_toc(out, t->child, col);
863
864 col = write_nodeclose(out, t, col);
865 }
866
867 t = t->next;
868 }
869
870 return (col);
871 }
872
873
874 /*
875 * 'get_title()' - Get the title string for the given document...
876 */
877
878 static uchar * /* O - Title string */
get_title(tree_t * doc)879 get_title(tree_t *doc) /* I - Document tree */
880 {
881 uchar *temp; /* Temporary pointer to title */
882
883
884 while (doc != NULL)
885 {
886 if (doc->markup == MARKUP_TITLE)
887 return (htmlGetText(doc->child));
888 else if (doc->child != NULL)
889 if ((temp = get_title(doc->child)) != NULL)
890 return (temp);
891
892 doc = doc->next;
893 }
894
895 return (NULL);
896 }
897
898
899 //
900 // 'add_heading()' - Add a heading to the list of headings...
901 //
902
903 static void
add_heading(tree_t * t)904 add_heading(tree_t *t) // I - Heading node
905 {
906 size_t i, // Looping var
907 count; // Count of headings with this name
908 uchar *heading, // Heading text for this node
909 *ptr, // Pointer into text
910 *ptr2, // Second pointer into text
911 s[1024], // New text if we have a conflict
912 **temp; // New heading array pointer
913
914
915 // Start by getting the heading text...
916 heading = htmlGetText(t->child);
917 if (!heading || !*heading)
918 {
919 free(heading);
920 return; // Nothing to do!
921 }
922
923 // Sanitize the text...
924 for (ptr = heading; *ptr;)
925 if (!isalnum(*ptr))
926 {
927 // Remove anything but letters and numbers from the filename
928 for (ptr2 = ptr; *ptr2; ptr2 ++)
929 *ptr2 = ptr2[1];
930
931 *ptr2 = '\0';
932 }
933 else
934 ptr ++;
935
936 // Now loop through the existing headings and check for dups...
937 for (ptr = heading, i = 0, count = 0; i < num_headings; i ++)
938 if (strcmp((char *)headings[i], (char *)ptr) == 0)
939 {
940 // Create a new instance of the heading...
941 count ++;
942 snprintf((char *)s, sizeof(s), "%s%d", heading, (int)count);
943 ptr = s;
944 }
945
946 // Now add the heading...
947 if (num_headings >= alloc_headings)
948 {
949 // Allocate more headings...
950 alloc_headings += ALLOC_HEADINGS;
951
952 if (num_headings == 0)
953 temp = (uchar **)malloc(sizeof(uchar *) * alloc_headings);
954 else
955 temp = (uchar **)realloc(headings, sizeof(uchar *) * alloc_headings);
956
957 if (temp == NULL)
958 {
959 progress_error(HD_ERROR_OUT_OF_MEMORY,
960 "Unable to allocate memory for %d headings - %s",
961 (int)alloc_headings, strerror(errno));
962 alloc_headings -= ALLOC_HEADINGS;
963 return;
964 }
965
966 headings = temp;
967 }
968
969 if (ptr == heading)
970 {
971 // Reuse the already-allocated string...
972 headings[num_headings] = ptr;
973 }
974 else
975 {
976 // Make a copy of the string "s" and free the old heading string...
977 headings[num_headings] = (uchar *)strdup((char *)s);
978 free(heading);
979 }
980
981 num_headings ++;
982 }
983
984
985 /*
986 * 'add_link()' - Add a named link...
987 */
988
989 static void
add_link(uchar * name)990 add_link(uchar *name) /* I - Name of link */
991 {
992 uchar *filename; /* File for link */
993 link_t *temp; /* New name */
994
995
996 if (num_headings)
997 filename = headings[num_headings - 1];
998 else
999 filename = (uchar *)"noheading";
1000
1001 if ((temp = find_link(name)) != NULL)
1002 temp->filename = filename;
1003 else
1004 {
1005 // See if we need to allocate memory for links...
1006 if (num_links >= alloc_links)
1007 {
1008 // Allocate more links...
1009 alloc_links += ALLOC_LINKS;
1010
1011 if (num_links == 0)
1012 temp = (link_t *)malloc(sizeof(link_t) * alloc_links);
1013 else
1014 temp = (link_t *)realloc(links, sizeof(link_t) * alloc_links);
1015
1016 if (temp == NULL)
1017 {
1018 progress_error(HD_ERROR_OUT_OF_MEMORY,
1019 "Unable to allocate memory for %d links - %s",
1020 (int)alloc_links, strerror(errno));
1021 alloc_links -= ALLOC_LINKS;
1022 return;
1023 }
1024
1025 links = temp;
1026 }
1027
1028 // Add a new link...
1029 temp = links + num_links;
1030 num_links ++;
1031
1032 strlcpy((char *)temp->name, (char *)name, sizeof(temp->name));
1033 temp->filename = filename;
1034
1035 if (num_links > 1)
1036 qsort(links, num_links, sizeof(link_t), (compare_func_t)compare_links);
1037 }
1038 }
1039
1040
1041 /*
1042 * 'find_link()' - Find a named link...
1043 */
1044
1045 static link_t *
find_link(uchar * name)1046 find_link(uchar *name) /* I - Name to find */
1047 {
1048 uchar *target; /* Pointer to target name portion */
1049 link_t key, /* Search key */
1050 *match; /* Matching name entry */
1051
1052
1053 if (name == NULL || num_links == 0)
1054 return (NULL);
1055
1056 if ((target = (uchar *)file_target((char *)name)) == NULL)
1057 return (NULL);
1058
1059 strlcpy((char *)key.name, (char *)target, sizeof(key.name));
1060 key.name[sizeof(key.name) - 1] = '\0';
1061 match = (link_t *)bsearch(&key, links, num_links, sizeof(link_t),
1062 (compare_func_t)compare_links);
1063
1064 return (match);
1065 }
1066
1067
1068 /*
1069 * 'compare_links()' - Compare two named links.
1070 */
1071
1072 static int /* O - 0 = equal, -1 or 1 = not equal */
compare_links(link_t * n1,link_t * n2)1073 compare_links(link_t *n1, /* I - First name */
1074 link_t *n2) /* I - Second name */
1075 {
1076 return (strcasecmp((char *)n1->name, (char *)n2->name));
1077 }
1078
1079
1080 /*
1081 * 'scan_links()' - Scan a document for link targets, and keep track of
1082 * the files they are in...
1083 */
1084
1085 static void
scan_links(tree_t * t)1086 scan_links(tree_t *t) /* I - Document tree */
1087 {
1088 uchar *name; /* Name of link */
1089
1090
1091 while (t != NULL)
1092 {
1093 if (t->markup >= MARKUP_H1 && t->markup < (MARKUP_H1 + TocLevels) &&
1094 htmlGetVariable(t, (uchar *)"_HD_OMIT_TOC") == NULL)
1095 add_heading(t);
1096
1097 if (t->markup == MARKUP_A &&
1098 (name = htmlGetVariable(t, (uchar *)"NAME")) != NULL)
1099 add_link(name);
1100
1101 if (t->child != NULL)
1102 scan_links(t->child);
1103
1104 t = t->next;
1105 }
1106 }
1107
1108
1109 /*
1110 * 'update_links()' - Update links as needed.
1111 */
1112
1113 static void
update_links(tree_t * t,int * heading)1114 update_links(tree_t *t, /* I - Document tree */
1115 int *heading) /* I - Current heading */
1116 {
1117 link_t *link; /* Link */
1118 uchar *href; /* Reference name */
1119 uchar newhref[1024]; /* New reference name */
1120 uchar *filename; /* Current filename */
1121
1122
1123 // Scan the document, rewriting HREF's as needed...
1124 while (t != NULL)
1125 {
1126 if (t->markup >= MARKUP_H1 && t->markup < (MARKUP_H1 + TocLevels) &&
1127 htmlGetVariable(t, (uchar *)"_HD_OMIT_TOC") == NULL && heading)
1128 (*heading) ++;
1129
1130 // Figure out the current filename based upon the current heading number...
1131 if (!heading || *heading < 0 || (size_t)*heading >= num_headings)
1132 filename = (uchar *)"noheading";
1133 else
1134 filename = headings[*heading];
1135
1136 if (t->markup == MARKUP_A &&
1137 (href = htmlGetVariable(t, (uchar *)"HREF")) != NULL)
1138 {
1139 // Update this link as needed...
1140 if (href[0] == '#' && (link = find_link(href)) != NULL)
1141 {
1142 // The filename in the link structure is a copy of the heading
1143 // pointer...
1144 if (filename != link->filename)
1145 {
1146 // Rewrite using the new name...
1147 snprintf((char *)newhref, sizeof(newhref), "%s.html%s",
1148 link->filename, href);
1149 htmlSetVariable(t, (uchar *)"HREF", newhref);
1150 }
1151 }
1152 }
1153
1154 // Descend the tree as needed...
1155 if (t->child != NULL)
1156 update_links(t->child, heading);
1157
1158 // Move to the next node at this level...
1159 t = t->next;
1160 }
1161 }
1162