1 /*
2  * Separated HTML export functions for HTMLDOC, a HTML document processing
3  * program.
4  *
5  * Copyright 2011-2021 by Michael R Sweet.
6  * Copyright 1997-2010 by Easy Software Products.  All rights reserved.
7  *
8  * This program is free software.  Distribution and use rights are outlined in
9  * the file "COPYING".
10  */
11 
12 /*
13  * Include necessary headers.
14  */
15 
16 #include "htmldoc.h"
17 #include "markdown.h"
18 #include <ctype.h>
19 
20 
21 //
22 // Named link structure...
23 //
24 
25 typedef struct
26 {
27   uchar		*filename;	/* File for link */
28   uchar		name[124];	/* Reference name */
29 } link_t;
30 
31 
32 //
33 // Local globals...
34 //
35 
36 // Heading strings used for filenames...
37 static size_t	num_headings = 0,	// Number of headings
38 		alloc_headings = 0;	// Allocated headings
39 static uchar	**headings;		// Heading strings
40 
41 // Links in document - used to add the correct filename to the link
42 static size_t	num_links = 0,		// Number of links
43 		alloc_links = 0;	// Allocated links
44 static link_t	*links;			// Links
45 
46 
47 //
48 // Local functions...
49 //
50 
51 extern "C" {
52 typedef int	(*compare_func_t)(const void *, const void *);
53 }
54 
55 static void	write_header(FILE **out, uchar *filename, uchar *title,
56 		             uchar *author, uchar *copyright, uchar *docnumber,
57 			     int heading);
58 static void	write_footer(FILE **out, int heading);
59 static void	write_title(FILE *out, uchar *title, uchar *author,
60 		            uchar *copyright, uchar *docnumber);
61 static int	write_all(FILE *out, tree_t *t, int col);
62 static int	write_doc(FILE **out, tree_t *t, int col, int *heading,
63 		          uchar *title, uchar *author, uchar *copyright,
64 			  uchar *docnumber);
65 static int	write_node(FILE *out, tree_t *t, int col);
66 static int	write_nodeclose(FILE *out, tree_t *t, int col);
67 static int	write_toc(FILE *out, tree_t *t, int col);
68 static uchar	*get_title(tree_t *doc);
69 
70 static void	add_heading(tree_t *t);
71 static void	add_link(uchar *name);
72 static link_t	*find_link(uchar *name);
73 static int	compare_links(link_t *n1, link_t *n2);
74 static void	scan_links(tree_t *t);
75 static void	update_links(tree_t *t, int *heading);
76 
77 
78 //
79 // 'htmlsep_export()' - Export to separated HTML files...
80 //
81 
82 int					// O - 0 = success, -1 = failure
htmlsep_export(tree_t * document,tree_t * toc)83 htmlsep_export(tree_t *document,	// I - Document to export
84                tree_t *toc)		// I - Table of contents for document
85 {
86   size_t	i;			// Looping var
87   int		heading;		// Current heading number
88   uchar		*title,			// Title text
89 		*author,		// Author name
90 		*copyright,		// Copyright text
91 		*docnumber;		// Document number
92   FILE		*out;			// Output file
93 
94 
95   // We only support writing to a directory...
96   if (!OutputFiles)
97   {
98     progress_error(HD_ERROR_INTERNAL_ERROR, "Unable to generate separated HTML to a single file!");
99     return (-1);
100   }
101 
102   // Copy logo and title images...
103   if (LogoImage[0])
104     image_copy(LogoImage, file_find(LogoImage, Path), OutputPath);
105 
106   for (int hfi = 0; hfi < MAX_HF_IMAGES; hfi ++)
107     if (HFImage[hfi][0])
108       image_copy(HFImage[hfi], file_find(HFImage[hfi], Path), OutputPath);
109 
110   if (TitleImage[0] && TitlePage &&
111 #ifdef WIN32
112       (stricmp(file_extension(TitleImage), "bmp") == 0 ||
113        stricmp(file_extension(TitleImage), "gif") == 0 ||
114        stricmp(file_extension(TitleImage), "jpg") == 0 ||
115        stricmp(file_extension(TitleImage), "png") == 0))
116 #else
117       (strcmp(file_extension(TitleImage), "bmp") == 0 ||
118        strcmp(file_extension(TitleImage), "gif") == 0 ||
119        strcmp(file_extension(TitleImage), "jpg") == 0 ||
120        strcmp(file_extension(TitleImage), "png") == 0))
121 #endif // WIN32
122     image_copy(TitleImage, file_find(TitleImage, Path), OutputPath);
123 
124   // Get document strings...
125   title     = get_title(document);
126   author    = htmlGetMeta(document, (uchar *)"author");
127   copyright = htmlGetMeta(document, (uchar *)"copyright");
128   docnumber = htmlGetMeta(document, (uchar *)"docnumber");
129   if (!docnumber)
130     docnumber = htmlGetMeta(document, (uchar *)"version");
131 
132   // Scan for all links in the document, and then update them...
133   num_links   = 0;
134   alloc_links = 0;
135   links       = NULL;
136 
137   scan_links(document);
138 
139 //  printf("num_headings = %d\n", num_headings);
140 //  for (i = 0; i < num_headings; i ++)
141 //    printf("headings[%d] = \"%s\"\n", i, headings[i]);
142 
143   heading = -1;
144   update_links(document, &heading);
145   update_links(toc, NULL);
146 
147   // Generate title pages and a table of contents...
148   out = NULL;
149   if (TitlePage)
150   {
151     write_header(&out, (uchar *)"index.html", title, author, copyright,
152                  docnumber, -1);
153     if (out != NULL)
154       write_title(out, title, author, copyright, docnumber);
155 
156     write_footer(&out, -1);
157 
158     write_header(&out, (uchar *)"toc.html", title, author, copyright,
159                  docnumber, -1);
160   }
161   else
162     write_header(&out, (uchar *)"index.html", title, author, copyright,
163                  docnumber, -1);
164 
165   if (out != NULL)
166     write_toc(out, toc, 0);
167 
168   write_footer(&out, -1);
169 
170   // Then write each output file...
171   heading = -1;
172   write_doc(&out, document, 0, &heading, title, author, copyright, docnumber);
173 
174   if (out != NULL)
175     write_footer(&out, heading);
176 
177   // Free memory...
178   if (title != NULL)
179     free(title);
180 
181   if (alloc_links)
182   {
183     free(links);
184 
185     num_links   = 0;
186     alloc_links = 0;
187     links       = NULL;
188   }
189 
190   if (alloc_headings)
191   {
192     for (i = 0; i < num_headings; i ++)
193       free(headings[i]);
194 
195     free(headings);
196 
197     num_headings   = 0;
198     alloc_headings = 0;
199     headings       = NULL;
200   }
201 
202   return (out == NULL);
203 }
204 
205 
206 /*
207  * 'write_header()' - Output the standard "header" for a HTML file.
208  */
209 
210 static void
write_header(FILE ** out,uchar * filename,uchar * title,uchar * author,uchar * copyright,uchar * docnumber,int heading)211 write_header(FILE   **out,	/* IO - Output file */
212              uchar  *filename,	/* I - Output filename */
213 	     uchar  *title,	/* I - Title for document */
214              uchar  *author,	/* I - Author for document */
215              uchar  *copyright,	/* I - Copyright for document */
216              uchar  *docnumber,	/* I - ID number for document */
217 	     int    heading)	/* I - Current heading */
218 {
219   char		realname[1024];	/* Real filename */
220   const char	*basename;	/* Filename without directory */
221   static const char *families[] =/* Typeface names */
222 		{
223 		  "monospace",
224 		  "serif",
225 		  "sans-serif",
226 		  "monospace",
227 		  "serif",
228 		  "sans-serif",
229 		  "symbol",
230 		  "dingbats"
231 		};
232 
233 
234   basename = file_basename((char *)filename);
235 
236   snprintf(realname, sizeof(realname), "%s/%s", OutputPath, basename);
237 
238   *out = fopen(realname, "wb");
239 
240   if (*out == NULL)
241   {
242     progress_error(HD_ERROR_WRITE_ERROR,
243                    "Unable to create output file \"%s\" - %s.\n",
244                    realname, strerror(errno));
245     return;
246   }
247 
248   fputs("<!DOCTYPE html>\n", *out);
249   fputs("<HTML>\n", *out);
250   fputs("<HEAD>\n", *out);
251   if (title != NULL)
252     fprintf(*out, "<TITLE>%s</TITLE>\n", title);
253   if (author != NULL)
254     fprintf(*out, "<META NAME=\"author\" CONTENT=\"%s\">\n", author);
255   if (copyright != NULL)
256     fprintf(*out, "<META NAME=\"copyright\" CONTENT=\"%s\">\n", copyright);
257   if (docnumber != NULL)
258     fprintf(*out, "<META NAME=\"docnumber\" CONTENT=\"%s\">\n", docnumber);
259   fprintf(*out, "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; CHARSET=%s\">\n",
260           _htmlCharSet);
261 
262   fputs("<LINK REL=\"Start\" HREF=\"index.html\">\n", *out);
263 
264   if (TitlePage)
265     fputs("<LINK REL=\"Contents\" HREF=\"toc.html\">\n", *out);
266   else
267     fputs("<LINK REL=\"Contents\" HREF=\"index.html\">\n", *out);
268 
269   if (heading >= 0)
270   {
271     if (heading > 0)
272       fprintf(*out, "<LINK REL=\"Prev\" HREF=\"%s.html\">\n", headings[heading - 1]);
273 
274     if ((size_t)heading < (num_headings - 1))
275       fprintf(*out, "<LINK REL=\"Next\" HREF=\"%s.html\">\n", headings[heading + 1]);
276   }
277 
278   fputs("<STYLE TYPE=\"text/css\"><!--\n", *out);
279   fprintf(*out, "BODY { font-family: %s; }\n", families[_htmlBodyFont]);
280   fprintf(*out, "H1 { font-family: %s; }\n", families[_htmlHeadingFont]);
281   fprintf(*out, "H2 { font-family: %s; }\n", families[_htmlHeadingFont]);
282   fprintf(*out, "H3 { font-family: %s; }\n", families[_htmlHeadingFont]);
283   fprintf(*out, "H4 { font-family: %s; }\n", families[_htmlHeadingFont]);
284   fprintf(*out, "H5 { font-family: %s; }\n", families[_htmlHeadingFont]);
285   fprintf(*out, "H6 { font-family: %s; }\n", families[_htmlHeadingFont]);
286   fputs("SUB { font-size: smaller; }\n", *out);
287   fputs("SUP { font-size: smaller; }\n", *out);
288   fputs("PRE { font-family: monospace; margin-left: 36pt; }\n", *out);
289 
290   if (!LinkStyle)
291     fputs("A { text-decoration: none; }\n", *out);
292 
293   fputs("--></STYLE>\n", *out);
294   fputs("</HEAD>\n", *out);
295 
296   if (BodyImage[0])
297     fprintf(*out, "<BODY BACKGROUND=\"%s\"", file_basename(BodyImage));
298   else if (BodyColor[0])
299     fprintf(*out, "<BODY BGCOLOR=\"%s\"", BodyColor);
300   else
301     fputs("<BODY", *out);
302 
303   if (_htmlTextColor[0])
304     fprintf(*out, " TEXT=\"%s\"", _htmlTextColor);
305 
306   if (LinkColor[0])
307     fprintf(*out, " LINK=\"%s\" VLINK=\"%s\" ALINK=\"%s\"", LinkColor,
308             LinkColor, LinkColor);
309 
310   fputs(">\n", *out);
311 
312   if (heading >= 0)
313   {
314     if (LogoImage[0])
315       fprintf(*out, "<IMG SRC=\"%s\">\n", file_basename(LogoImage));
316 
317     for (int hfi = 0; hfi < MAX_HF_IMAGES; ++hfi)
318       if (HFImage[hfi][0])
319         fprintf(*out, "<IMG SRC=\"%s\">\n", file_basename(HFImage[hfi]));
320 
321     if (TitlePage)
322       fputs("<A HREF=\"toc.html\">Contents</A>\n", *out);
323     else
324       fputs("<A HREF=\"index.html\">Contents</A>\n", *out);
325 
326     if (heading > 0)
327       fprintf(*out, "<A HREF=\"%s.html\">Previous</A>\n", headings[heading - 1]);
328 
329     if ((size_t)heading < (num_headings - 1))
330       fprintf(*out, "<A HREF=\"%s.html\">Next</A>\n", headings[heading + 1]);
331 
332     fputs("<HR NOSHADE>\n", *out);
333   }
334 }
335 
336 
337 /*
338  * 'write_footer()' - Output the standard "footer" for a HTML file.
339  */
340 
341 static void
write_footer(FILE ** out,int heading)342 write_footer(FILE **out,	/* IO - Output file pointer */
343 	     int  heading)	/* I  - Current heading */
344 {
345   if (*out == NULL)
346     return;
347 
348   fputs("<HR NOSHADE>\n", *out);
349 
350   if (heading >= 0)
351   {
352     if (LogoImage[0])
353       fprintf(*out, "<IMG SRC=\"%s\">\n", file_basename(LogoImage));
354 
355     for (int hfi = 0; hfi < MAX_HF_IMAGES; ++hfi)
356       if (HFImage[hfi][0])
357         fprintf(*out, "<IMG SRC=\"%s\">\n", file_basename(HFImage[hfi]));
358 
359     if (TitlePage)
360       fputs("<A HREF=\"toc.html\">Contents</A>\n", *out);
361     else
362       fputs("<A HREF=\"index.html\">Contents</A>\n", *out);
363 
364     if (heading > 0)
365       fprintf(*out, "<A HREF=\"%s.html\">Previous</A>\n", headings[heading - 1]);
366 
367     if ((size_t)heading < (num_headings - 1))
368       fprintf(*out, "<A HREF=\"%s.html\">Next</A>\n", headings[heading + 1]);
369   }
370 
371   fputs("</BODY>\n", *out);
372   fputs("</HTML>\n", *out);
373 
374   progress_error(HD_ERROR_NONE, "BYTES: %ld", ftell(*out));
375 
376   fclose(*out);
377   *out = NULL;
378 }
379 
380 
381 /*
382  * 'write_title()' - Write a title page...
383  */
384 
385 static void
write_title(FILE * out,uchar * title,uchar * author,uchar * copyright,uchar * docnumber)386 write_title(FILE  *out,		/* I - Output file */
387             uchar *title,	/* I - Title for document */
388             uchar *author,	/* I - Author for document */
389             uchar *copyright,	/* I - Copyright for document */
390             uchar *docnumber)	/* I - ID number for document */
391 {
392   FILE		*fp;		/* Title file */
393   const char	*title_ext,	/* Extension of title file */
394 		*title_file;	/* Location of title file */
395   tree_t	*t;		/* Title file document tree */
396 
397 
398   if (out == NULL)
399     return;
400 
401   title_ext = file_extension(TitleImage);
402 
403 #ifdef WIN32
404   if (TitleImage[0] &&
405       stricmp(title_ext, "bmp") != 0 &&
406       stricmp(title_ext, "gif") != 0 &&
407       stricmp(title_ext, "jpg") != 0 &&
408       stricmp(title_ext, "png") != 0)
409 #else
410   if (TitleImage[0] &&
411       strcmp(title_ext, "bmp") != 0 &&
412       strcmp(title_ext, "gif") != 0 &&
413       strcmp(title_ext, "jpg") != 0 &&
414       strcmp(title_ext, "png") != 0)
415 #endif // WIN32
416   {
417     // Find the title page file...
418     if ((title_file = file_find(Path, TitleImage)) == NULL)
419     {
420       progress_error(HD_ERROR_FILE_NOT_FOUND,
421                      "Unable to find title file \"%s\"!", TitleImage);
422       return;
423     }
424 
425     // Write a title page from HTML source...
426     if ((fp = fopen(title_file, "rb")) == NULL)
427     {
428       progress_error(HD_ERROR_FILE_NOT_FOUND,
429                      "Unable to open title file \"%s\" - %s!",
430                      TitleImage, strerror(errno));
431       return;
432     }
433 
434 #ifdef _WIN32
435     if (!stricmp(title_ext, "md"))
436 #else
437     if (!strcmp(title_ext, "md"))
438 #endif // _WIN32
439       t = mdReadFile(NULL, fp, file_directory(TitleImage));
440     else
441       t = htmlReadFile(NULL, fp, file_directory(TitleImage));
442 
443     htmlFixLinks(t, t, (uchar *)file_directory(TitleImage));
444     fclose(fp);
445 
446     write_all(out, t, 0);
447     htmlDeleteTree(t);
448   }
449   else
450   {
451     // Write a "standard" title page with image...
452     fputs("<CENTER>", out);
453 
454     if (TitleImage[0])
455     {
456       image_t *img = image_load(TitleImage, !OutputColor);
457 
458       fprintf(out, "<IMG SRC=\"%s\" WIDTH=\"%d\" HEIGHT=\"%d\" "
459 	           "ALT=\"%s\"><BR>\n",
460               file_basename((char *)TitleImage), img->width, img->height,
461 	      title ? (char *)title : "");
462     }
463 
464     if (title != NULL)
465       fprintf(out, "<H1>%s</H1><BR>\n", title);
466     else
467       fputs("\n", out);
468 
469     if (docnumber != NULL)
470       fprintf(out, "%s<BR>\n", docnumber);
471 
472     if (author != NULL)
473       fprintf(out, "%s<BR>\n", author);
474 
475     if (copyright != NULL)
476       fprintf(out, "%s<BR>\n", copyright);
477 
478     fputs("<A HREF=\"toc.html\">Table of Contents</A>", out);
479     fputs("</CENTER>\n", out);
480   }
481 }
482 
483 
484 /*
485  * 'write_all()' - Write all markup text for the given tree.
486  */
487 
488 static int			/* O - Current column */
write_all(FILE * out,tree_t * t,int col)489 write_all(FILE   *out,		/* I - Output file */
490           tree_t *t,		/* I - Document tree */
491           int    col)		/* I - Current column */
492 {
493   if (out == NULL)
494     return (0);
495 
496   while (t != NULL)
497   {
498     col = write_node(out, t, col);
499 
500     if (t->markup != MARKUP_HEAD && t->markup != MARKUP_TITLE)
501       col = write_all(out, t->child, col);
502 
503     col = write_nodeclose(out, t, col);
504 
505     t = t->next;
506   }
507 
508   return (col);
509 }
510 
511 
512 /*
513  * 'write_doc()' - Write the entire document.
514  */
515 
516 static int				// O - Current column
write_doc(FILE ** out,tree_t * t,int col,int * heading,uchar * title,uchar * author,uchar * copyright,uchar * docnumber)517 write_doc(FILE   **out,			// I - Output file
518           tree_t *t,			// I - Document tree
519           int    col,			// I - Current column
520           int    *heading,		// IO - Current heading
521 	  uchar  *title,		// I  - Title
522           uchar  *author,		// I  - Author
523 	  uchar  *copyright,		// I  - Copyright
524 	  uchar  *docnumber)		// I  - Document number
525 {
526   uchar	filename[1024];			// Filename
527 
528 
529   while (t != NULL)
530   {
531     if (t->markup >= MARKUP_H1 && t->markup < (MARKUP_H1 + TocLevels) &&
532         htmlGetVariable(t, (uchar *)"_HD_OMIT_TOC") == NULL)
533     {
534       if (*heading >= 0)
535         write_footer(out, *heading);
536 
537       (*heading) ++;
538 
539       if (*heading >= 0)
540       {
541 	snprintf((char *)filename, sizeof(filename), "%s.html",
542 	         headings[*heading]);
543 	write_header(out, filename, title, author, copyright, docnumber,
544                      *heading);
545       }
546     }
547 
548     col = write_node(*out, t, col);
549 
550     if (t->markup != MARKUP_HEAD && t->markup != MARKUP_TITLE)
551       col = write_doc(out, t->child, col, heading,
552                       title, author, copyright, docnumber);
553 
554     col = write_nodeclose(*out, t, col);
555 
556     t = t->next;
557   }
558 
559   return (col);
560 }
561 
562 
563 /*
564  * 'write_node()' - Write a single tree node.
565  */
566 
567 static int			/* O - Current column */
write_node(FILE * out,tree_t * t,int col)568 write_node(FILE   *out,		/* I - Output file */
569            tree_t *t,		/* I - Document tree node */
570            int    col)		/* I - Current column */
571 {
572   int		i;		/* Looping var */
573   uchar		*ptr,		/* Pointer to output string */
574 		*entity,	/* Entity string */
575 		*src,		/* Source image */
576 		*realsrc,	/* Real source image */
577 		newsrc[1024];	/* New source image filename */
578 
579 
580   if (out == NULL)
581     return (0);
582 
583   switch (t->markup)
584   {
585     case MARKUP_NONE :
586         if (t->data == NULL)
587 	  break;
588 
589 	if (t->preformatted)
590 	{
591           for (ptr = t->data; *ptr; ptr ++)
592             fputs((char *)iso8859(*ptr), out);
593 
594 	  if (t->data[0] && t->data[strlen((char *)t->data) - 1] == '\n')
595             col = 0;
596 	  else
597             col += strlen((char *)t->data);
598 	}
599 	else
600 	{
601 	  if ((col + (int)strlen((char *)t->data)) > 72 && col > 0)
602 	  {
603             putc('\n', out);
604             col = 0;
605 	  }
606 
607           for (ptr = t->data; *ptr; ptr ++)
608             fputs((char *)iso8859(*ptr), out);
609 
610 	  col += strlen((char *)t->data);
611 
612 	  if (col > 72)
613 	  {
614             putc('\n', out);
615             col = 0;
616 	  }
617 	}
618 	break;
619 
620     case MARKUP_COMMENT :
621     case MARKUP_UNKNOWN :
622         fputs("\n<!--", out);
623 	for (ptr = t->data; *ptr; ptr ++)
624 	  fputs((char *)iso8859(*ptr), out);
625 	fputs("-->\n", out);
626 	col = 0;
627 	break;
628 
629     case MARKUP_AREA :
630     case MARKUP_BODY :
631     case MARKUP_DOCTYPE :
632     case MARKUP_ERROR :
633     case MARKUP_FILE :
634     case MARKUP_HEAD :
635     case MARKUP_HTML :
636     case MARKUP_MAP :
637     case MARKUP_META :
638     case MARKUP_TITLE :
639         break;
640 
641     case MARKUP_BR :
642     case MARKUP_CENTER :
643     case MARKUP_DD :
644     case MARKUP_DL :
645     case MARKUP_DT :
646     case MARKUP_H1 :
647     case MARKUP_H2 :
648     case MARKUP_H3 :
649     case MARKUP_H4 :
650     case MARKUP_H5 :
651     case MARKUP_H6 :
652     case MARKUP_H7 :
653     case MARKUP_H8 :
654     case MARKUP_H9 :
655     case MARKUP_H10 :
656     case MARKUP_H11 :
657     case MARKUP_H12 :
658     case MARKUP_H13 :
659     case MARKUP_H14 :
660     case MARKUP_H15 :
661     case MARKUP_HR :
662     case MARKUP_LI :
663     case MARKUP_OL :
664     case MARKUP_P :
665     case MARKUP_PRE :
666     case MARKUP_TABLE :
667     case MARKUP_TR :
668     case MARKUP_UL :
669         if (col > 0)
670         {
671           putc('\n', out);
672           col = 0;
673         }
674 
675     default :
676 	if (t->markup == MARKUP_IMG &&
677             (src = htmlGetVariable(t, (uchar *)"SRC")) != NULL &&
678             (realsrc = htmlGetVariable(t, (uchar *)"REALSRC")) != NULL)
679 	{
680 	 /*
681           * Update local images...
682           */
683 
684           if (file_method((char *)src) == NULL &&
685               src[0] != '/' && src[0] != '\\' &&
686 	      (!isalpha(src[0]) || src[1] != ':'))
687           {
688             image_copy((char *)src, (char *)realsrc, OutputPath);
689             strlcpy((char *)newsrc, file_basename((char *)src), sizeof(newsrc));
690             htmlSetVariable(t, (uchar *)"SRC", newsrc);
691           }
692 	}
693 
694         if (t->markup != MARKUP_EMBED)
695 	{
696 	  col += fprintf(out, "<%s", _htmlMarkups[t->markup]);
697 	  for (i = 0; i < t->nvars; i ++)
698 	  {
699 	    if (strcasecmp((char *)t->vars[i].name, "BREAK") == 0 &&
700 	        t->markup == MARKUP_HR)
701 	      continue;
702 
703 	    if (strcasecmp((char *)t->vars[i].name, "REALSRC") == 0 &&
704 	        t->markup == MARKUP_IMG)
705 	      continue;
706 
707             if (strncasecmp((char *)t->vars[i].name, "_HD_", 4) == 0)
708 	      continue;
709 
710 	    if (col > 72 && !t->preformatted)
711 	    {
712               putc('\n', out);
713               col = 0;
714 	    }
715 
716             if (col > 0)
717             {
718               putc(' ', out);
719               col ++;
720             }
721 
722 	    if (t->vars[i].value == NULL)
723               col += fprintf(out, "%s", t->vars[i].name);
724 	    else
725 	    {
726 	      col += fprintf(out, "%s=\"", t->vars[i].name);
727 	      for (ptr = t->vars[i].value; *ptr; ptr ++)
728 	      {
729 		entity = iso8859(*ptr);
730 		fputs((char *)entity, out);
731 		col += strlen((char *)entity);
732 	      }
733 
734 	      putc('\"', out);
735 	      col ++;
736 	    }
737 	  }
738 
739 	  putc('>', out);
740 	  col ++;
741 
742 	  if (col > 72 && !t->preformatted)
743 	  {
744 	    putc('\n', out);
745 	    col = 0;
746 	  }
747 	}
748 	break;
749   }
750 
751   return (col);
752 }
753 
754 
755 /*
756  * 'write_nodeclose()' - Close a single tree node.
757  */
758 
759 static int			/* O - Current column */
write_nodeclose(FILE * out,tree_t * t,int col)760 write_nodeclose(FILE   *out,	/* I - Output file */
761                 tree_t *t,	/* I - Document tree node */
762                 int    col)	/* I - Current column */
763 {
764   if (out == NULL)
765     return (0);
766 
767   if (t->markup != MARKUP_HEAD && t->markup != MARKUP_TITLE)
768   {
769     if (col > 72 && !t->preformatted)
770     {
771       putc('\n', out);
772       col = 0;
773     }
774 
775     switch (t->markup)
776     {
777       case MARKUP_BODY :
778       case MARKUP_ERROR :
779       case MARKUP_FILE :
780       case MARKUP_HEAD :
781       case MARKUP_HTML :
782       case MARKUP_NONE :
783       case MARKUP_TITLE :
784 
785       case MARKUP_APPLET :
786       case MARKUP_AREA :
787       case MARKUP_BR :
788       case MARKUP_COMMENT :
789       case MARKUP_DOCTYPE :
790       case MARKUP_EMBED :
791       case MARKUP_HR :
792       case MARKUP_IMG :
793       case MARKUP_INPUT :
794       case MARKUP_ISINDEX :
795       case MARKUP_LINK :
796       case MARKUP_META :
797       case MARKUP_NOBR :
798       case MARKUP_SPACER :
799       case MARKUP_WBR :
800       case MARKUP_UNKNOWN :
801           break;
802 
803       case MARKUP_CENTER :
804       case MARKUP_DD :
805       case MARKUP_DL :
806       case MARKUP_DT :
807       case MARKUP_H1 :
808       case MARKUP_H2 :
809       case MARKUP_H3 :
810       case MARKUP_H4 :
811       case MARKUP_H5 :
812       case MARKUP_H6 :
813       case MARKUP_H7 :
814       case MARKUP_H8 :
815       case MARKUP_H9 :
816       case MARKUP_H10 :
817       case MARKUP_H11 :
818       case MARKUP_H12 :
819       case MARKUP_H13 :
820       case MARKUP_H14 :
821       case MARKUP_H15 :
822       case MARKUP_LI :
823       case MARKUP_OL :
824       case MARKUP_P :
825       case MARKUP_PRE :
826       case MARKUP_TABLE :
827       case MARKUP_TR :
828       case MARKUP_UL :
829           fprintf(out, "</%s>\n", _htmlMarkups[t->markup]);
830           col = 0;
831           break;
832 
833       default :
834           col += fprintf(out, "</%s>", _htmlMarkups[t->markup]);
835 	  break;
836     }
837   }
838 
839   return (col);
840 }
841 
842 
843 /*
844  * 'write_toc()' - Write all markup text for the given table-of-contents.
845  */
846 
847 static int			/* O - Current column */
write_toc(FILE * out,tree_t * t,int col)848 write_toc(FILE   *out,		/* I - Output file */
849           tree_t *t,		/* I - Document tree */
850           int    col)		/* I - Current column */
851 {
852   if (out == NULL)
853     return (0);
854 
855   while (t != NULL)
856   {
857     if (htmlGetVariable(t, (uchar *)"_HD_OMIT_TOC") == NULL)
858     {
859       col = write_node(out, t, col);
860 
861       if (t->markup != MARKUP_HEAD && t->markup != MARKUP_TITLE)
862 	col = write_toc(out, t->child, col);
863 
864       col = write_nodeclose(out, t, col);
865     }
866 
867     t = t->next;
868   }
869 
870   return (col);
871 }
872 
873 
874 /*
875  * 'get_title()' - Get the title string for the given document...
876  */
877 
878 static uchar *		/* O - Title string */
get_title(tree_t * doc)879 get_title(tree_t *doc)	/* I - Document tree */
880 {
881   uchar	*temp;		/* Temporary pointer to title */
882 
883 
884   while (doc != NULL)
885   {
886     if (doc->markup == MARKUP_TITLE)
887       return (htmlGetText(doc->child));
888     else if (doc->child != NULL)
889       if ((temp = get_title(doc->child)) != NULL)
890         return (temp);
891 
892     doc = doc->next;
893   }
894 
895   return (NULL);
896 }
897 
898 
899 //
900 // 'add_heading()' - Add a heading to the list of headings...
901 //
902 
903 static void
add_heading(tree_t * t)904 add_heading(tree_t *t)			// I - Heading node
905 {
906   size_t	i,			// Looping var
907 		count;			// Count of headings with this name
908   uchar		*heading,		// Heading text for this node
909 		*ptr,			// Pointer into text
910 		*ptr2,			// Second pointer into text
911 		s[1024],		// New text if we have a conflict
912 		**temp;			// New heading array pointer
913 
914 
915   // Start by getting the heading text...
916   heading = htmlGetText(t->child);
917   if (!heading || !*heading)
918   {
919     free(heading);
920     return;				// Nothing to do!
921   }
922 
923   // Sanitize the text...
924   for (ptr = heading; *ptr;)
925     if (!isalnum(*ptr))
926     {
927       // Remove anything but letters and numbers from the filename
928       for (ptr2 = ptr; *ptr2; ptr2 ++)
929         *ptr2 = ptr2[1];
930 
931       *ptr2 = '\0';
932     }
933     else
934       ptr ++;
935 
936   // Now loop through the existing headings and check for dups...
937   for (ptr = heading, i = 0, count = 0; i < num_headings; i ++)
938     if (strcmp((char *)headings[i], (char *)ptr) == 0)
939     {
940       // Create a new instance of the heading...
941       count ++;
942       snprintf((char *)s, sizeof(s), "%s%d", heading, (int)count);
943       ptr = s;
944     }
945 
946   // Now add the heading...
947   if (num_headings >= alloc_headings)
948   {
949     // Allocate more headings...
950     alloc_headings += ALLOC_HEADINGS;
951 
952     if (num_headings == 0)
953       temp = (uchar **)malloc(sizeof(uchar *) * alloc_headings);
954     else
955       temp = (uchar **)realloc(headings, sizeof(uchar *) * alloc_headings);
956 
957     if (temp == NULL)
958     {
959       progress_error(HD_ERROR_OUT_OF_MEMORY,
960 	             "Unable to allocate memory for %d headings - %s",
961 	             (int)alloc_headings, strerror(errno));
962       alloc_headings -= ALLOC_HEADINGS;
963       return;
964     }
965 
966     headings = temp;
967   }
968 
969   if (ptr == heading)
970   {
971     // Reuse the already-allocated string...
972     headings[num_headings] = ptr;
973   }
974   else
975   {
976     // Make a copy of the string "s" and free the old heading string...
977     headings[num_headings] = (uchar *)strdup((char *)s);
978     free(heading);
979   }
980 
981   num_headings ++;
982 }
983 
984 
985 /*
986  * 'add_link()' - Add a named link...
987  */
988 
989 static void
add_link(uchar * name)990 add_link(uchar *name)		/* I - Name of link */
991 {
992   uchar		*filename;	/* File for link */
993   link_t	*temp;		/* New name */
994 
995 
996   if (num_headings)
997     filename = headings[num_headings - 1];
998   else
999     filename = (uchar *)"noheading";
1000 
1001   if ((temp = find_link(name)) != NULL)
1002     temp->filename = filename;
1003   else
1004   {
1005     // See if we need to allocate memory for links...
1006     if (num_links >= alloc_links)
1007     {
1008       // Allocate more links...
1009       alloc_links += ALLOC_LINKS;
1010 
1011       if (num_links == 0)
1012         temp = (link_t *)malloc(sizeof(link_t) * alloc_links);
1013       else
1014         temp = (link_t *)realloc(links, sizeof(link_t) * alloc_links);
1015 
1016       if (temp == NULL)
1017       {
1018 	progress_error(HD_ERROR_OUT_OF_MEMORY,
1019 	               "Unable to allocate memory for %d links - %s",
1020 	               (int)alloc_links, strerror(errno));
1021         alloc_links -= ALLOC_LINKS;
1022 	return;
1023       }
1024 
1025       links = temp;
1026     }
1027 
1028     // Add a new link...
1029     temp = links + num_links;
1030     num_links ++;
1031 
1032     strlcpy((char *)temp->name, (char *)name, sizeof(temp->name));
1033     temp->filename = filename;
1034 
1035     if (num_links > 1)
1036       qsort(links, num_links, sizeof(link_t), (compare_func_t)compare_links);
1037   }
1038 }
1039 
1040 
1041 /*
1042  * 'find_link()' - Find a named link...
1043  */
1044 
1045 static link_t *
find_link(uchar * name)1046 find_link(uchar *name)		/* I - Name to find */
1047 {
1048   uchar		*target;	/* Pointer to target name portion */
1049   link_t	key,		/* Search key */
1050 		*match;		/* Matching name entry */
1051 
1052 
1053   if (name == NULL || num_links == 0)
1054     return (NULL);
1055 
1056   if ((target = (uchar *)file_target((char *)name)) == NULL)
1057     return (NULL);
1058 
1059   strlcpy((char *)key.name, (char *)target, sizeof(key.name));
1060   key.name[sizeof(key.name) - 1] = '\0';
1061   match = (link_t *)bsearch(&key, links, num_links, sizeof(link_t),
1062                             (compare_func_t)compare_links);
1063 
1064   return (match);
1065 }
1066 
1067 
1068 /*
1069  * 'compare_links()' - Compare two named links.
1070  */
1071 
1072 static int			/* O - 0 = equal, -1 or 1 = not equal */
compare_links(link_t * n1,link_t * n2)1073 compare_links(link_t *n1,	/* I - First name */
1074               link_t *n2)	/* I - Second name */
1075 {
1076   return (strcasecmp((char *)n1->name, (char *)n2->name));
1077 }
1078 
1079 
1080 /*
1081  * 'scan_links()' - Scan a document for link targets, and keep track of
1082  *                  the files they are in...
1083  */
1084 
1085 static void
scan_links(tree_t * t)1086 scan_links(tree_t *t)		/* I - Document tree */
1087 {
1088   uchar	*name;			/* Name of link */
1089 
1090 
1091   while (t != NULL)
1092   {
1093     if (t->markup >= MARKUP_H1 && t->markup < (MARKUP_H1 + TocLevels) &&
1094         htmlGetVariable(t, (uchar *)"_HD_OMIT_TOC") == NULL)
1095       add_heading(t);
1096 
1097     if (t->markup == MARKUP_A &&
1098         (name = htmlGetVariable(t, (uchar *)"NAME")) != NULL)
1099       add_link(name);
1100 
1101     if (t->child != NULL)
1102       scan_links(t->child);
1103 
1104     t = t->next;
1105   }
1106 }
1107 
1108 
1109 /*
1110  * 'update_links()' - Update links as needed.
1111  */
1112 
1113 static void
update_links(tree_t * t,int * heading)1114 update_links(tree_t *t,		/* I - Document tree */
1115              int    *heading)	/* I - Current heading */
1116 {
1117   link_t	*link;		/* Link */
1118   uchar		*href;		/* Reference name */
1119   uchar		newhref[1024];	/* New reference name */
1120   uchar		*filename;	/* Current filename */
1121 
1122 
1123   // Scan the document, rewriting HREF's as needed...
1124   while (t != NULL)
1125   {
1126     if (t->markup >= MARKUP_H1 && t->markup < (MARKUP_H1 + TocLevels) &&
1127         htmlGetVariable(t, (uchar *)"_HD_OMIT_TOC") == NULL && heading)
1128       (*heading) ++;
1129 
1130     // Figure out the current filename based upon the current heading number...
1131     if (!heading || *heading < 0 || (size_t)*heading >= num_headings)
1132       filename = (uchar *)"noheading";
1133     else
1134       filename = headings[*heading];
1135 
1136     if (t->markup == MARKUP_A &&
1137         (href = htmlGetVariable(t, (uchar *)"HREF")) != NULL)
1138     {
1139       // Update this link as needed...
1140       if (href[0] == '#' && (link = find_link(href)) != NULL)
1141       {
1142         // The filename in the link structure is a copy of the heading
1143 	// pointer...
1144         if (filename != link->filename)
1145 	{
1146 	  // Rewrite using the new name...
1147 	  snprintf((char *)newhref, sizeof(newhref), "%s.html%s",
1148 	           link->filename, href);
1149 	  htmlSetVariable(t, (uchar *)"HREF", newhref);
1150 	}
1151       }
1152     }
1153 
1154     // Descend the tree as needed...
1155     if (t->child != NULL)
1156       update_links(t->child, heading);
1157 
1158     // Move to the next node at this level...
1159     t = t->next;
1160   }
1161 }
1162