1 /*  File: helpsubs.c
2  *  Author: Fred Wobus (fw@sanger.ac.uk)
3  *  Copyright (C) J Thierry-Mieg and R Durbin, 1998
4  *-------------------------------------------------------------------
5  * This file is part of the ACEDB genome database package, written by
6  * 	Richard Durbin (MRC LMB, UK) rd@sanger.ac.uk, and
7  *	Jean Thierry-Mieg (CRBM du CNRS, France) mieg@kaa.cnrs-mop.fr
8  *
9  * SCCS: %W% %G%
10  * Description: controls the help system, provides HTML parsing
11  * Exported functions:
12  * HISTORY:
13  * Last edited: Dec  4 14:30 1998 (fw)
14  * * Oct 12 12:27 1998 (fw): checkSubject now case-insensitive
15  * * Oct  8 17:23 1998 (fw): removed warning, in case that
16     an open-list tag (e.g. <UL> was directly followed by a close-list
17     tag (e.g. </UL>). The warning tried to enforce that
18     every type of list only has a certain type of items.
19  * * Oct  8 11:36 1998 (fw): helpSubjectGetFilename takes over logic
20            from readHelpfile to locate the file containing the
21 	   help for a particular subject
22  * Created: Tue Aug 18 16:11:07 1998 (fw)
23  *-------------------------------------------------------------------
24  */
25 
26 #include "help_.h"
27 
28 /************************************************************/
29 static char     *makeHtmlIndex (STORE_HANDLE handle);
30 static char     *makeHtmlImagePage (char *link, STORE_HANDLE handle);
31 static HtmlNode *parseHtmlText (char *text, STORE_HANDLE handle);
32 static BOOL      parseSection  (char **cp, HtmlNode **resultnode,
33 				STORE_HANDLE handle);
34 /************************************************************/
35 
36 
37 /************ directory where help files are stored *********/
38 static char helpDir[MAXPATHLEN] = "" ;
39 
40 
41 /************************************************************/
42 /* function to register the helpOnRoutine
43    This can be called at any stage (before the first helpOn,
44    or later on, it will affect the system next time helpOn
45    is called. */
46 /************************************************************/
47 static QueryRoutine helpOnRoutine = 0;
helpOnRegister(QueryRoutine func)48 UTIL_FUNC_DEF QueryRoutine helpOnRegister (QueryRoutine func)
49 /* call with func = 0x0 just to check whether
50    anything has been registered yet */
51 {
52   QueryRoutine old = helpOnRoutine ;
53 
54   if (func)
55     helpOnRoutine = func ;
56 
57   return old ;
58 }
59 
60 
61 
62 /************************************************************/
63 /* Sets the helpDir;  */
64 /************************************************************/
helpSetDir(char * dirname)65 UTIL_FUNC_DEF char *helpSetDir (char *dirname)
66 {
67   if (dirname)
68     {
69       strcpy (helpDir, dirname);
70 
71       if (filName (dirname,0,"rd"))
72 	return (char*)&helpDir[0];
73       else
74 	return (char*)0;
75     }
76   else
77     {
78       strcpy (helpDir, filGetFullPath ("whelp"));
79 
80       if (filName (helpDir, 0, "rd"))
81 	return (char*)&helpDir[0];
82     }
83 
84   return (char*)0;
85 } /* helpGetDir */
86 
87 
88 
89 /************************************************************/
90 /* return the current helpDirectory or
91    initialise if not previously set */
helpGetDir(void)92 UTIL_FUNC_DEF char *helpGetDir (void)
93 {
94   if (!*helpDir)
95     return (helpSetDir(0)) ;
96 
97   return (char*)&helpDir[0];
98 } /* helpGetDir */
99 
100 
101 
102 /************************************************************/
103 /* pop up help on the given subject, depending on the registered
104    display function, that will be textual, in the built-in
105    simple HTML browser or even launch an external browser
106    to display the help document */
107 /************************************************************/
helpOn(char * subject)108 UTIL_FUNC_DEF BOOL helpOn (char *subject)
109 {
110   char *helpFilename;
111 
112   if (!helpGetDir() || !filName(helpGetDir(), "", "rd"))
113     {
114       messout ("Sorry, No help available ! "
115 	       "Could not open the HTML help directory "
116 	       "%s\n"
117 	       "(%s)",
118 	       helpGetDir(),
119 	       messSysErrorText());
120 
121       return FALSE;
122     }
123 
124   helpFilename = helpSubjectGetFilename(subject);
125   /* may be NULL if file could not be found,
126      the registered helpOnRoutine has to cope
127      with this case and may decide to display an
128      index instead */
129 
130   if (helpOnRoutine)
131     return ((*helpOnRoutine)(helpFilename));
132 
133 
134   return (helpPrint (helpFilename)); /* textual help as default */
135 } /* helpOn */
136 /************************************************************/
137 
138 
helpSubjectGetFilename(char * subject)139 UTIL_FUNC_DEF char *helpSubjectGetFilename (char *subject)
140 /* this function attempts to find the file name corresponding
141    to a particular help-subject.
142    It will attempt to find a matching file according to
143    the current settings of helpDir and HELP_FILE_EXTENSION.
144 
145    the subject '?' will just return ? again. This is a special
146    code within the help system to tell the help display
147    function that the user required some kind of help.
148    Usually the helpOnRegister'd function would display a
149    dynamically created index of the help-directory.
150 
151    this function can be even cleverer by doing keyword searches
152    on <TITLE> and <H1> strings in files that might be relevant
153    of no obvious match is found.
154 */
155 {
156   static char filename_array[MAXPATHLEN] = "";
157   char *filename = &filename_array[0];
158   char *subject_copy;
159   Array dirList;
160 
161   if (subject == NULL)
162     return NULL;
163 
164   if (strlen(subject) == 0)
165     return NULL;
166 
167   if (strcmp(subject, "?") == 0)
168     {
169       /* return ? to signal that the calling
170 	 function needs to display a dynamically
171 	 created index or show some kind of help.
172        */
173       /* if the construct
174 	 page = htmlPageCreate(helpGetFilename(subject_requested));
175 	 is used, the resulting page will therefor be a marked up
176 	 directory listing of helpsubjects
177        */
178       strcpy (filename, "?");
179       return filename;
180     }
181 
182   subject_copy = strnew (subject, 0);
183 
184   strcpy (filename, "");	/* intialise, if this is
185 				   non-empty at the end of the loop,
186 				   we found a matching helpfile */
187   while (TRUE)
188     {
189       /* simple attempt to locate file - path/helpDir/subject.html */
190       sprintf(filename, "%s%s%s.%s",
191 	      filGetFullPath(helpGetDir()),
192 	      SUBDIR_DELIMITER_STR,
193 	      subject_copy, HELP_FILE_EXTENSION);
194 
195       if (filName(filename, 0, "r"))
196 	break;
197 
198       /* advanced attempt, try to find a matching file from
199 	 the list of available ones by scanning the directory
200 	 contents of the helpdirectory */
201       if ((dirList = filDirectoryCreate
202 	   (helpGetDir(), HELP_FILE_EXTENSION, "r")) )
203 	{
204 	  int i;
205 	  int matches;
206 	  char *s;
207 
208 	  /* first look for an exact case-insensitive match */
209 	  strcpy (filename, "");
210 	  for (i = 0 ; i < arrayMax(dirList) ; i++)
211 	    {
212 	      s = arr(dirList,i,char*);
213 
214 	      if (strcasecmp (s, subject_copy) == 0)
215 		{
216 		  sprintf(filename, "%s%s%s.%s",
217 			  filGetFullPath(helpGetDir()),
218 			  SUBDIR_DELIMITER_STR,
219 			  s, HELP_FILE_EXTENSION);
220 		  if (filName(filename, 0, "r"))
221 		    break;	/* exit for-loop */
222 
223 		  strcpy (filename, "");
224 		}
225 	    }
226 
227 	  if (strlen(filename) > 0)
228 	    break;		/* exit while(true) loop */
229 
230 	  /* count the number of filenames starting with the
231 	     given subject string */
232 	  matches = 0;
233 	  for (i = 0 ; i < arrayMax(dirList) ; i++)
234 	    {
235 	      s = arr(dirList,i,char*);
236 
237 	      if (strncasecmp (s, subject_copy,
238 			       strlen(subject_copy)) == 0)
239 		{
240 		  sprintf(filename, "%s%s%s.%s",
241 			  filGetFullPath(helpGetDir()),
242 			  SUBDIR_DELIMITER_STR,
243 			  s, HELP_FILE_EXTENSION);
244 		  ++matches;
245 		}
246 	    }
247 
248 	  if (matches == 0)
249 	    {
250 	      strcpy (filename, ""); /* not found */
251 	    }
252 	  else if (matches == 1)
253 	    {
254 	      /* the one exact match (already in filename string)
255 		 is the complete filename */
256 	      if (filName(filename, 0, "r"))
257 		break;		/* exit while(true) loop */
258 	    }
259 	  else if (matches > 1)
260 	    {
261 	      /* construct a filename that we know won't work.
262 		 But it may be used by the help display
263 		 function to give a meaningful message
264 		 to say that this subject is ambiguos.
265 		 The returned filename is then considered
266 		 a template, similar to 'ls subject*'
267 		 so the help-display function may give a list
268 		 of possible matching subjects. */
269 
270 	      sprintf(filename, "%s%s%s",
271 		      filGetFullPath(helpGetDir()),
272 		      SUBDIR_DELIMITER_STR, subject_copy);
273 	      break;
274 	    }
275 
276 	  filDirectoryDestroy (dirList);
277 
278 	} /* endif dirList */
279 
280       /* file didn't exist, whichever way we tried so far,
281 	 so we try to chop off the last bit of the subject name.
282 	 In case trySubject was "Tree_Clone_Inside", we now
283 	 go through the look again with "Tree_Clone" and re-try. */
284 
285       if (strchr (subject_copy, '_'))
286 	{
287 	  int j;
288 
289 	  j = strlen (subject_copy);
290 	  while (subject_copy[j--] != '_') ; /* find the last _ char */
291 	  subject_copy[j + 1] = '\0';
292 	}
293       else
294 	{
295 	  /* If we run out of trailing components, then we exit
296 	   * anyway.
297 	   */
298 	  strcpy (filename, "");
299 	  break;		/* exit while(true)loop */
300 	}
301     } /* end-while(true) */
302 
303   messfree (subject_copy);
304 
305 
306   if (strcmp(filename, "") != 0)
307     return filename;		/* success */
308 
309   if ((strcasecmp(subject, "index") == 0) ||
310       (strcasecmp(subject, "home") == 0) ||
311       (strcasecmp(subject, "toc") == 0))
312     {
313       /* we asked for some kind of index-page but couldn't find it,
314 	 so we can always try to return the question mark '?'
315 	 which will ask the calling function to display a
316 	 dynamically created index of help-subjects. */
317 
318       strcpy (filename, "?");
319       return filename;
320     }
321 
322 
323   return NULL;			/* failure - no file found */
324 } /* helpSubjectGetFilename */
325 
326 
327 /************************************************************/
328 /* helpPackage utility to find out the filename of a given
329    link reference. Absolute filenames are returned unchanged,
330    but relative filenames are expanded to be the full path
331    of the helpfile. Can be used for html/gif files referred to
332    by the HREF of anchor tags or the SRC or IMG tags */
333 
334 /* NOTE: the pointer returned is a static copy, which is
335    re-used everytime it is called. If the calling function
336    wants to mess about with the returned string, a copy
337    has to be made.
338    NULL is returned if the resulting file can't be opened.
339    the calling function can inspect the result of
340    messSysErrorText(), the report the resaon for failure */
341 /************************************************************/
helpLinkGetFilename(char * link)342 UTIL_FUNC_DEF char *helpLinkGetFilename (char *link)
343 {
344   static char link_path_array[MAXPATHLEN] = "";
345   char *link_path = &link_path_array[0];
346 
347   if (link[0] == SUBDIR_DELIMITER) /* absolute path (UNIX) */
348     {
349       strcpy (link_path, link);
350     }
351   else				/* relative path */
352     {
353       strcpy (link_path, helpGetDir());
354       strcat (link_path, SUBDIR_DELIMITER_STR);
355       strcat (link_path, link);
356     }
357 
358   if (filName(link_path, "", "r"))
359     return link_path;
360 
361   return NULL;
362 } /* helpLinkGetFilename */
363 
364 
365 /************************************************************/
366 /******************                   ***********************/
367 /************** private helpPackage functions ***************/
368 /******************                   ***********************/
369 /************************************************************/
370 
371 
htmlPageCreate(char * helpFilename)372 HtmlPage *htmlPageCreate (char *helpFilename)
373 /* complemeted by htmlPageDestroy */
374 {
375   FILE *fil;
376   HtmlPage *page = 0;
377 
378   if (!helpFilename)		/* we could get a NULL filename */
379     return 0;                   /* here, which might come from
380 				   helpSubjectGetFilename() that couldn't
381 				   find a file matching the subject */
382 
383   /* create a page with a marked up directory listing */
384   if (strcmp(helpFilename, "?") == 0)
385     {
386       page = messalloc (sizeof(HtmlPage));
387       page->handle = handleCreate();
388       page->htmlText = makeHtmlIndex(page->handle);
389       if (!(page->root = parseHtmlText(page->htmlText, page->handle)))
390 	htmlPageDestroy(page);
391 
392       return page;
393     }
394 
395   if (!(filName(helpFilename, "", "r")))
396     return 0;			/* prevent error caused
397 				   by unsucsessful filopen */
398 
399 
400   /* create a page inlining the image */
401   if (strcasecmp (helpFilename + (strlen(helpFilename)-4), ".gif") == 0)
402     {
403       page = messalloc (sizeof(HtmlPage));
404       page->handle = handleCreate();
405       page->htmlText = makeHtmlImagePage(helpFilename, page->handle);
406       if (!(page->root = parseHtmlText(page->htmlText, page->handle)))
407 	htmlPageDestroy(page);
408 
409       return page;
410     }
411 
412 
413   /* assume HTML page */
414   if ((fil = filopen(helpFilename, "", "r")))
415     {
416       page = htmlPageCreateFromFile (fil);
417       filclose (fil);
418     }
419 
420   return page;
421 } /* htmlPageCreate */
422 /************************************************************/
423 
htmlPageCreateFromFile(FILE * fil)424 HtmlPage *htmlPageCreateFromFile (FILE *fil)
425 {
426   HtmlPage *page;
427   int fileSize;
428 
429   if (!fil)
430     return (HtmlPage*)0;
431 
432   /* determine filesize */
433   rewind (fil);
434   fseek (fil, 0, SEEK_END);
435   fileSize = ftell (fil);
436   rewind (fil);
437 
438   if (fileSize == 0)
439     return (HtmlPage*)0;
440 
441   /* if we have a positive fileSize, we are pretty much
442      guaranteed, that we'll get some HTML text and a parsetree */
443 
444   page = messalloc (sizeof(HtmlPage));
445 
446   page->handle = handleCreate();
447 
448   /* grab the contents of the file */
449   page->htmlText = halloc ((fileSize + 1) * sizeof(char), page->handle);
450   fread (page->htmlText, sizeof (char), fileSize, fil);
451   page->htmlText[fileSize] = '\0'; /* add string terminator */
452 
453   /* get parsetree */
454   page->root = parseHtmlText(page->htmlText, page->handle);
455 
456   return page;
457 } /* htmlPageCreateFromFile */
458 /************************************************************/
459 
htmlPageDestroy(HtmlPage * page)460 void htmlPageDestroy (HtmlPage *page)
461 {
462   if (!page) return;
463 
464   /* clear all memory used during parsing of the page */
465   handleDestroy (page->handle);
466 
467   /* clear the memory taken up by the structure itself */
468   messfree (page);
469 
470   return;
471 } /* htmlPageDestroy */
472 /************************************************************/
473 
stripSpaces(char * cp)474 void stripSpaces (char *cp)
475 /* utility to get rid of multiple spaces from a string */
476 /* we use it on node->text, where the text isn't within <PRE> tags */
477 {
478   char *s ;
479   int i ;
480 
481    /* strip unwanted white spaces from the text */
482   for (i = 0; i < strlen(cp); ++i)
483     if (isspace ((int)cp[i])) cp[i] = ' ' ;
484 
485   while ((s = strstr (cp, "  ")))
486     {
487       s[1] = 0 ;
488       strcat (cp, s+2) ;
489     }
490 
491   if (cp[strlen(cp)-1] == ' ')
492     cp[strlen(cp)-1] = '\0' ;
493 
494   return ;
495 } /* stripSpaces */
496 
497 
498 
499 /************************************************************/
500 /******************                   ***********************/
501 /****************** static functions  ***********************/
502 /******************                   ***********************/
503 /************************************************************/
504 
505 
506 
507 
508 /************************************************************/
509 /* as the helpviewer supports inlined images, it is easy
510    to display image, even when they're not inlined as in
511    <A HREF=image.gif>click here for image</A>.
512    We just return a container page, that inlines the image */
513 /************************************************************/
makeHtmlImagePage(char * link,STORE_HANDLE handle)514 static char *makeHtmlImagePage (char *link, STORE_HANDLE handle)
515 {
516   char *text;
517   int len;
518 
519   len = 0;
520   len = 7+6+strlen(filGetFilename(link))+8+10+strlen(link)+2;
521 
522   text = halloc((len+1)*sizeof(char), handle);
523 
524   sprintf (text,
525 	   "<TITLE>Image %s</TITLE>"
526 	   "<IMG SRC=\"%s\">", filGetFilename(link), link);
527 
528   text[len] = 0;
529 
530   return text;
531 } /* makeHtmlImagePage */
532 
533 
534 
535 /************************************************************/
536 /* reads the directory of helpDir and constructs an HTML-page
537    containing a <UL>-list of all HTML-files in helpDir */
538 /************************************************************/
makeHtmlIndex(STORE_HANDLE handle)539 static char *makeHtmlIndex (STORE_HANDLE handle)
540 {
541   char *cp, *text, *s ;
542   int i, len ;
543   Array dirList;
544 
545   if(!(dirList = filDirectoryCreate
546        (helpGetDir(), HELP_FILE_EXTENSION, "r")) )
547     {
548       messout ("Can't open help directory %s\n"
549 	       "(%s)",
550 	       helpDir, messSysErrorText()) ;
551 
552       return 0 ;
553     }
554 
555   len = 0 ;
556 
557   /* determine the length of the text to be returned */
558   len += 39+15+5+6 ;			/* for header */
559   for (i = 0 ; i < arrayMax(dirList) ; i++)
560     {
561       s = arr(dirList,i,char*) ;
562       len += strlen(s)*2 + strlen(HELP_FILE_EXTENSION) + 19;
563       /* this is the length of each line as written
564 	 to the string by sprintf(cp,"<LI>...") below */
565     }
566 
567   text = (char*)halloc ((len+1) * sizeof(char), handle) ;
568   cp = text ;
569 
570   sprintf (cp,
571 	   "<TITLE>Index of Help Directory</TITLE>\n"
572 	   "<H1>Index</H1>\n"
573 	   "<UL>\n") ;
574   cp += 39+15+5 ;
575 
576   for (i = 0 ; i < arrayMax(dirList) ; i++)
577     {
578       s = arr(dirList, i, char*) ;
579       sprintf (cp, "<LI><A HREF=%s.%s>%s</A>\n",
580 	       s, HELP_FILE_EXTENSION, s) ;
581       cp += strlen(s)*2 + strlen(HELP_FILE_EXTENSION) + 19;
582     }
583   sprintf (cp, "</UL>\n") ;
584   text[len] = 0 ;
585 
586   filDirectoryDestroy (dirList) ;
587 
588   return text ;
589 } /* makeHtmlIndex */
590 /************************************************************/
591 
592 
593 
594 
595 /*************************************************************
596  *****************  HTML Parsing package *********************
597  *** currently very crude parser, will fall over any bad  ****
598  *** whether Mosaic, Netscape or MSIE can deal with or not. **
599  ************************************************************/
600 
601 
parseHtmlText(char * text,STORE_HANDLE handle)602 static HtmlNode *parseHtmlText(char *text, STORE_HANDLE handle)
603 /* return root node of html parse-tree,
604    generated from the HTML source text */
605 {
606   char *cp = text;
607   HtmlNode *node;
608 
609   if (!text) return 0;
610 
611   /* start recursion */
612   parseSection (&cp, &node, handle) ;
613 
614   return node;			/* return root-node */
615 } /* parseHtmlText */
616 /************************************************************/
617 
skipSpaces(char ** cp)618 static void skipSpaces (char **cp)
619 {
620   while (**cp && isspace((int)**cp)) { ++(*cp) ; }
621 } /* skipSpaces */
622 
623 /************************************************************/
624 
replaceEscapeCodes(char * cp)625 static void replaceEscapeCodes (char *cp)
626 {
627   char *s ;
628 
629 /*
630    quotation mark                       &#34;  --> "    &quot;   --> "
631    ampersand                            &#38;  --> &    &amp;    --> &
632    less-than sign                       &#60;  --> <    &lt;     --> <
633    greater-than sign                    &#62;  --> >    &gt;     --> >
634 */
635 
636   s = cp ;
637 
638   while (*s)
639     {
640       if (strncasecmp (s, "&#34;", 5) == 0)
641 	{
642 	  s[0] = '"' ; s[1] = 0 ;
643 	  strcat (s+1, s+5) ;
644 	}
645       else if (strncasecmp (s, "&#38;", 5) == 0)
646 	{
647 	  s[0] = '&' ; s[1] = 0 ;
648 	  strcat (s+1, s+5) ;
649 	}
650       else if (strncasecmp (s, "&#60;", 5) == 0)
651 	{
652 	  s[0] = '<' ; s[1] = 0 ;
653 	  strcat (s+1, s+5) ;
654 	}
655       else if (strncasecmp (s, "&#62;", 5) == 0)
656 	{
657 	  s[0] = '>' ; s[1] = 0 ;
658 	  strcat (s+1, s+5) ;
659 	}
660       else if (strncasecmp (s, "&quot;", 6) == 0)
661 	{
662 	  s[0] = '"' ; s[1] = 0 ;
663 	  strcat (s+1, s+6) ;
664 	}
665       else if (strncasecmp (s, "&amp;", 5) == 0)
666 	{
667 	  s[0] = '&' ; s[1] = 0 ;
668 	  strcat (s+1, s+5) ;
669 	}
670       else if (strncasecmp (s, "&lt;", 4) == 0)
671 	{
672 	  s[0] = '<' ; s[1] = 0 ;
673 	  strcat (s+1, s+4) ;
674 	}
675       else if (strncasecmp (s, "&gt;", 4) == 0)
676 	{
677 	  s[0] = '>' ; s[1] = 0 ;
678 	  strcat (s+1, s+4) ;
679 	}
680       else if (strncasecmp (s, "&nbsp;", 4) == 0)
681 	{
682 	  s[0] = ' ' ; s[1] = 0 ;
683 	  strcat (s+1, s+6) ;
684 	}
685 
686       ++s ;
687     }
688 
689   return ;
690 } /* replaceEscapeCodes */
691 /************************************************************/
692 
makeNode(HtmlNodeType type,STORE_HANDLE handle)693 static HtmlNode *makeNode (HtmlNodeType type, STORE_HANDLE handle)
694 /* allocate a node and initialise the type */
695 {
696   HtmlNode *newnode ;
697 
698   newnode = (HtmlNode*)halloc (sizeof(HtmlNode), handle) ;
699   newnode->type = type ;
700 
701   return (newnode) ;
702 } /* makeNode */
703 /************************************************************/
704 
parseHtml(char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)705 static BOOL parseHtml (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
706 {
707   HtmlNode *node, *leftnode ;
708 
709   *cp += 6 ;			/* skip <HTML> */
710 
711   skipSpaces (cp) ;
712 
713   node = makeNode (HTML_DOC, handle) ;
714 
715   if (!(parseSection (cp, &leftnode, handle)))
716     {
717       printf ("Warning : text inside <HTML> not valid !!\n") ;
718     }
719 
720   skipSpaces (cp) ;
721 
722   if (strncasecmp (*cp, "</HTML>", 7) == 0)
723     {
724       *cp += 7 ;
725     }
726   else
727     {
728       printf ("Warning : <HTML> tag not closed by </HTML> !!\n") ;
729     }
730 
731   node->left = leftnode ;
732   node->right = 0 ;
733 
734   *resultnode = node ;
735 
736   return TRUE ;
737 } /* parseHtml */
738 /************************************************************/
739 
parseHead(char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)740 static BOOL parseHead (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
741 {
742   HtmlNode *node, *leftnode ;
743 
744   *cp += 6 ;			/* skip <HEAD> */
745 
746   skipSpaces (cp) ;
747 
748   node = makeNode (HTML_HEAD, handle) ;
749 
750   if (!(parseSection (cp, &leftnode, handle)))
751     {
752       printf ("Warning : HTML inside <head> not valid !!\n") ;
753     }
754 
755   skipSpaces (cp) ;
756 
757   if (strncasecmp (*cp, "</HEAD>", 7) == 0)
758     {
759       *cp += 7 ;
760     }
761   else
762     {
763       printf ("Warning : <HEAD> tag not closed by </HEAD> !!\n") ;
764     }
765 
766   node->left = leftnode ;
767   node->right = 0 ;
768 
769   *resultnode = node ;
770 
771   return TRUE ;
772 } /* parseHead */
773 /************************************************************/
774 
parseBody(char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)775 static BOOL parseBody (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
776 {
777   HtmlNode *node, *leftnode ;
778 
779   *cp += 6 ;			/* skip <BODY> */
780 
781   skipSpaces (cp) ;
782 
783   node = makeNode (HTML_BODY, handle) ;
784 
785   if (!(parseSection (cp, &leftnode, handle)))
786     {
787       printf ("Warning : HTML inside <BODY> not valid !!\n") ;
788     }
789 
790   skipSpaces (cp) ;
791 
792   if (strncasecmp (*cp, "</BODY>", 7) == 0)
793     {
794       *cp += 7 ;
795     }
796   else
797     {
798       printf ("Warning : <BODY> tag not closed by </BODY> !!\n") ;
799     }
800 
801   node->left = leftnode ;
802   node->right = 0 ;
803 
804   *resultnode = node ;
805 
806   return TRUE ;
807 } /* parseBody */
808 /************************************************************/
809 
parseComment(char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)810 static BOOL parseComment (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
811 {
812   HtmlNode *node ;
813   int len ;
814   char *start ;
815 
816   *cp += 4 ;			/* skip <!-- */
817 
818   start = *cp ;
819   while (**cp && **cp != '>') { ++(*cp) ; }
820 
821   if (!**cp)
822     {
823       *resultnode = 0 ;
824       return FALSE ;
825     }
826 
827   node = makeNode (HTML_COMMENT, handle) ;
828 
829   len = *cp-start ;
830 
831   ++(*cp) ;			/* skip '>' */
832 
833   node->text = (char*)halloc ((len+1) * sizeof(char), handle) ;
834 
835   strncpy (node->text, start, len) ;
836   node->text[len] = 0 ;
837 
838   *resultnode = node ;
839 
840   return TRUE ;
841 } /* parseComment */
842 /************************************************************/
843 
parseTitle(char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)844 static BOOL parseTitle (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
845 {
846   HtmlNode *node ;
847   int len, numspaces=0 ;
848   char *start ;
849 
850   *cp += 7 ;			/* skip <TITLE> */
851 
852   skipSpaces (cp) ;
853 
854   start = *cp ;
855 
856   while (**cp)
857     {
858       if (strncasecmp (*cp, "</title>", 8) == 0)
859 	break ;
860       if (isspace((int)**cp))
861 	++numspaces ;
862       else
863 	numspaces = 0 ;
864       ++(*cp) ;
865     }
866 
867   node = makeNode (HTML_TITLE, handle) ;
868 
869   len = (*cp-start) - numspaces ;
870 
871   if (**cp)
872     *cp += 8 ;
873 
874   node->text = (char*)halloc ((len+1) * sizeof(char), handle);
875 
876   strncpy (node->text, start, len) ;
877   node->text[len] = 0 ;
878 
879   *resultnode = node ;
880 
881   return TRUE ;
882 } /* parseTitle */
883 /************************************************************/
884 
parseHeader(char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)885 static BOOL parseHeader (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
886 {
887   HtmlNode *node, *leftnode ;
888   int level ;
889 
890   level = (*cp)[2]-'0' ;
891 
892   *cp += 4 ;			/* skip <H?> */
893 
894   skipSpaces (cp) ;
895 
896   node = makeNode (HTML_HEADER, handle) ;
897   node->hlevel = level ;
898 
899   if (!(parseSection (cp, &leftnode, handle)))
900     {
901       printf ("Warning : heading%d text not valid !!\n", level) ;
902     }
903 
904   skipSpaces (cp) ;
905 
906   if ((strncasecmp (*cp, "</H", 3) == 0) &&
907       (*cp)[3]-'0' == level && (*cp)[4] == '>')
908     {
909       *cp += 5 ;
910     }
911   else
912     {
913       printf ("Warning : <H%d> tag not closed by </H%d> !!\n", level, level) ;
914     }
915 
916   node->left = leftnode ;
917   node->right = 0 ;
918 
919   *resultnode = node ;
920 
921   return TRUE ;
922 } /* parseHeader */
923 /************************************************************/
924 
parseCode(char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)925 static BOOL parseCode (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
926 {
927   HtmlNode *node, *leftnode ;
928 
929   *cp += 6 ;			/* skip <CODE> */
930 
931   skipSpaces (cp) ;
932 
933   node = makeNode (HTML_CODE_STYLE, handle) ;
934 
935   if (!(parseSection (cp, &leftnode, handle)))
936     {
937       printf ("Warning : <code> text not valid !!\n") ;
938     }
939 
940   skipSpaces (cp) ;
941 
942   if (strncasecmp (*cp, "</CODE>", 7) == 0)
943     {
944       *cp += 7 ;
945     }
946   else
947     {
948       printf ("Warning : <CODE> tag not closed by </CODE> !!\n") ;
949     }
950 
951   node->left = leftnode ;
952   node->right = 0 ;
953 
954   *resultnode = node ;
955 
956   return TRUE ;
957 } /* parseCode */
958 /************************************************************/
959 
parseBold(char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)960 static BOOL parseBold (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
961 {
962   HtmlNode *node, *leftnode ;
963 
964   *cp += 3 ;			/* skip <B> */
965 
966   skipSpaces (cp) ;
967 
968   node = makeNode (HTML_BOLD_STYLE, handle) ;
969 
970   if (!(parseSection (cp, &leftnode, handle)))
971     {
972       printf ("Warning : HTML inside <B> not valid !!\n") ;
973     }
974 
975   skipSpaces (cp) ;
976 
977   if (strncasecmp (*cp, "</B>", 3) == 0)
978     {
979       *cp += 4 ;
980     }
981   else
982     {
983       printf ("Warning : <B> tag not closed by </B> !!\n") ;
984     }
985 
986   node->left = leftnode ;
987   node->right = 0 ;
988 
989   *resultnode = node ;
990 
991   return TRUE ;
992 } /* parseBold */
993 /************************************************************/
994 
parseStrong(char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)995 static BOOL parseStrong (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
996 {
997   HtmlNode *node, *leftnode ;
998 
999   *cp += 8 ;			/* skip <STRONG> */
1000 
1001   skipSpaces (cp) ;
1002 
1003   node = makeNode (HTML_STRONG_STYLE, handle) ;
1004 
1005   if (!(parseSection (cp, &leftnode, handle)))
1006     {
1007       printf ("Warning : strong text not valid !!\n") ;
1008     }
1009 
1010   skipSpaces (cp) ;
1011 
1012   if (strncasecmp (*cp, "</STRONG>", 9) == 0)
1013     {
1014       *cp += 9 ;
1015     }
1016   else
1017     {
1018       printf ("Warning : <STRONG> tag not closed by </STRONG> !!\n") ;
1019     }
1020 
1021   node->left = leftnode ;
1022   node->right = 0 ;
1023 
1024   *resultnode = node ;
1025 
1026   return TRUE ;
1027 } /* parseStrong */
1028 /************************************************************/
1029 
parseItalic(char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)1030 static BOOL parseItalic (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
1031 {
1032   HtmlNode *node, *leftnode ;
1033 
1034   *cp += 3 ;			/* skip <I> */
1035 
1036   skipSpaces (cp) ;
1037 
1038   node = makeNode (HTML_ITALIC_STYLE, handle) ;
1039 
1040   if (!(parseSection (cp, &leftnode, handle)))
1041     {
1042       printf ("Warning : bold text not valid !!\n") ;
1043     }
1044 
1045   skipSpaces (cp) ;
1046 
1047   if (strncasecmp (*cp, "</I>", 3) == 0)
1048     {
1049       *cp += 4 ;
1050     }
1051   else
1052     {
1053       printf ("Warning : <I> tag not closed by </I> !!\n") ;
1054     }
1055 
1056   node->left = leftnode ;
1057   node->right = 0 ;
1058 
1059   *resultnode = node ;
1060 
1061   return TRUE ;
1062 } /* parseItalic */
1063 /************************************************************/
1064 
parseText(char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)1065 static BOOL parseText (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
1066 {
1067   HtmlNode *node ;
1068   int len ;
1069   char *start ;
1070 
1071   start = *cp ;
1072 
1073   while (**cp)
1074     {
1075       /* read until beginning of new TAG */
1076       if (strncasecmp (*cp, "<", 1) == 0)
1077 	break ;
1078       ++(*cp) ;
1079     }
1080 
1081   if (*cp == start)
1082     {
1083       /* an unknown tag had been reached, the text read until that
1084 	 will be of length zero, because parseSection() couldn't
1085 	 recognise it, and passed the text here, where it reads
1086 	 until it finds a '<', which it'll find imediately,
1087 	 so the length will be zero */
1088 
1089       while (**cp)
1090 	{
1091 	  /* read until beginning of new TAG */
1092 	  if (strncasecmp (*cp, ">", 1) == 0)
1093 	    break ;
1094 	  ++(*cp) ;
1095 	}
1096       ++(*cp) ;
1097 
1098       node = makeNode (HTML_UNKNOWN, handle) ;
1099 
1100       /* copy unknown tag into node->text */
1101       len = (*cp-start) ;
1102       node->text = (char*)halloc ((len+1) * sizeof(char), handle);
1103       strncpy (node->text, start, len);
1104       node->text[len] = 0 ;
1105 
1106       *resultnode = node ;
1107       return TRUE ;
1108     }
1109 
1110   node = makeNode (HTML_TEXT, handle) ;
1111 
1112   len = (*cp-start) ;
1113 
1114   node->text = (char*)halloc ((len+1) * sizeof(char), handle);
1115 
1116   strncpy (node->text, start, len) ;
1117   node->text[len] = 0 ;
1118 
1119   replaceEscapeCodes (node->text) ;
1120 
1121   *resultnode = node ;
1122 
1123   return TRUE ;
1124 } /* parseText */
1125 /************************************************************/
1126 
parseHref(char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)1127 static BOOL parseHref (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
1128 {
1129   HtmlNode *node, *leftnode ;
1130   int hlen = -1;		/* init for compiler happiness */
1131   int numspaces ;
1132   char *hstart = NULL;		/* init for compiler happiness */
1133   BOOL HAVE_HREF, IS_NAME_REF ;
1134 
1135   *cp += 2 ;			/* skip '<A' */
1136 
1137   skipSpaces (cp) ;
1138 
1139   IS_NAME_REF = FALSE ;
1140   if (strncasecmp (*cp, "HREF=", 5) == 0)
1141     {
1142       HAVE_HREF = TRUE ;
1143       *cp += 5 ;		/* skip 'HREF=' */
1144     }
1145   else if (strncasecmp (*cp, "NAME=", 5) == 0)
1146     {
1147 
1148       HAVE_HREF = TRUE ;
1149       IS_NAME_REF = TRUE ;
1150       *cp += 5 ;		/* skip 'NAME=' */
1151     }
1152   else
1153     {
1154       printf ("Warning : anchor tag <A without argument !!\n");
1155       HAVE_HREF = FALSE ;
1156     }
1157 
1158   if (HAVE_HREF)
1159     hstart = *cp ;
1160 
1161   /* parse the href destination or if no arg given
1162      just forward to next '>'*/
1163   numspaces = 0 ;
1164   while (**cp)
1165     {
1166       if (strncasecmp (*cp, ">", 1) == 0)
1167 	break ;
1168       if (isspace((int)**cp))
1169 	++numspaces ;
1170       else
1171 	numspaces = 0 ;
1172       ++(*cp) ;
1173     }
1174   if (HAVE_HREF)
1175     hlen = (*cp-hstart) - numspaces ;
1176 
1177   if (**cp)
1178     *cp += 1 ;			/* skip '>' */
1179 
1180   node = makeNode (HTML_HREF, handle) ;
1181 
1182   if (HAVE_HREF)
1183     {
1184       if ((hstart[0] == '"') && (hstart[hlen-1] == '"'))
1185 	{
1186 	  ++hstart ;
1187 	  hlen -= 2 ;
1188 	}
1189       node->isNameRef = IS_NAME_REF ;
1190 
1191       node->link = (char*)halloc ((hlen+1) * sizeof(char), handle);
1192 
1193       strncpy (node->link, hstart, hlen) ;
1194       node->link[hlen] = 0 ;
1195     }
1196   else
1197     node->link = 0 ;		/* no link then */
1198 
1199   if (!(parseSection (cp, &leftnode, handle)))
1200     {
1201       printf ("Warning : referenced text not valid !!\n") ;
1202     }
1203 
1204   skipSpaces (cp) ;
1205   if (strncasecmp (*cp, "</a>", 4) == 0)
1206     {
1207       *cp += 4 ;
1208     }
1209   else
1210     {
1211       printf ("Warning : anchor tag not closed by </A> !!\n") ;
1212     }
1213 
1214   node->left = leftnode ;
1215   node->right = 0 ;
1216 
1217   *resultnode = node ;
1218 
1219   return TRUE ;
1220 } /* parseHref */
1221 
1222 /************************************************************/
1223 
parseImage(char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)1224 static BOOL parseImage (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
1225 {
1226   HtmlNode *node ;
1227   int len, srclen, numspaces ;
1228   char *start, *s ;
1229   BOOL HAVE_SRC=FALSE ;
1230   *cp += 4 ;			/* skip '<IMG' */
1231 
1232   skipSpaces (cp) ;
1233 
1234   start = *cp ;
1235 
1236   /* read in the arguments list until next '>'*/
1237   numspaces = 0 ;
1238   while (**cp)
1239     {
1240       if (strncasecmp (*cp, ">", 1) == 0)
1241 	break ;
1242       if (isspace((int)**cp))
1243 	++numspaces ;
1244       else
1245 	numspaces = 0 ;
1246       ++(*cp) ;
1247     }
1248 
1249   /* the length of everything between the
1250      end of <IMG and the end of the args or the next > */
1251   len = (*cp-start) - numspaces ;
1252 
1253   if (**cp)
1254     *cp += 1 ;			/* skip '>' */
1255 
1256   /* now find the SRC= argument */
1257 
1258   s = start ;
1259   while (*s)
1260     {
1261       if (strncasecmp (s, "src=", 4) == 0)
1262 	{
1263 	  HAVE_SRC = TRUE ;
1264 	  break ;
1265 	}
1266       ++s ;
1267     }
1268   if (HAVE_SRC)
1269     {
1270       s += 4 ;			/* skip 'src=' */
1271       len -= 4;
1272 
1273       start = s ;
1274       srclen = 0 ;
1275 
1276       if (s[0] == '"')	/* if src in quotes then link ends with quote */
1277 	{
1278 	  s++ ; start++ ;
1279 	  while (*s && ++srclen < len && *s != '"')
1280 	    { ++(s) ; }
1281 	  --srclen;		/* discard the quote */
1282 	}
1283       else
1284 	{
1285 	  while (*s && ++srclen < len && !isspace((int)*s))
1286 	    { ++(s) ; }
1287 	}
1288 
1289       node = makeNode (HTML_GIFIMAGE, handle) ;
1290 
1291       /* save the file name of the image */
1292       node->link = (char*)halloc((srclen+1) * sizeof(char), handle);
1293 
1294       strncpy (node->link, start, srclen) ;
1295       node->link[srclen] = 0 ;
1296     }
1297   else
1298     {
1299       node = makeNode (HTML_UNKNOWN, handle) ;
1300     }
1301 
1302   *resultnode = node ;
1303 
1304   return TRUE ;
1305 } /* parseImage */
1306 /************************************************************/
1307 
parseListItem(HtmlListType style,char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)1308 static BOOL parseListItem (HtmlListType   style,
1309 			   char		**cp,
1310 			   HtmlNode     **resultnode,
1311 			   STORE_HANDLE   handle)
1312 {
1313   HtmlNode *node, *leftnode, *rightnode ;
1314   int lstyle = style ;
1315 
1316   skipSpaces (cp) ;
1317 
1318   /* check, whether the next tag is a valid listitem tag */
1319 
1320   /* with <DL> list <LI> and <DD> items are allowed */
1321   if (lstyle == HTML_LIST_NOINDENT &&
1322       !(strncasecmp (*cp, "<dd>", 4) == 0 ||
1323 	strncasecmp (*cp, "<li>", 4) == 0 ||
1324 	strncasecmp (*cp, "<dt>", 4) == 0))
1325     {
1326       *resultnode = 0 ;
1327       return FALSE ;
1328     }
1329   /* only <LI> items in <UL> or <OL> lists */
1330   else if ((lstyle == HTML_LIST_BULLET || lstyle == HTML_LIST_NUMBER) &&
1331 	   !(strncasecmp (*cp, "<li>", 4) == 0))
1332     {
1333       *resultnode = 0 ;
1334       return FALSE ;
1335     }
1336 
1337   if (lstyle == HTML_LIST_NOINDENT)
1338     {
1339       /* in <DL> list a <DD> item becomes indented but no bullet */
1340       if (strncasecmp (*cp, "<dd>", 4) == 0)
1341 	lstyle = HTML_LIST_NOBULLET ;
1342       else if (strncasecmp (*cp, "<dt>", 4) == 0)
1343 	lstyle = HTML_LIST_NOINDENT_NOBULLET ;
1344     }
1345   *cp += 4 ;
1346   /* now cp stands right after an <LI> and parses the following
1347      as a normal section */
1348 
1349   parseSection (cp, &leftnode, handle) ;
1350 
1351   node = makeNode (HTML_LISTITEM, handle) ;
1352 
1353   node->left = leftnode ;
1354   node->lstyle = lstyle ;
1355 
1356   if (parseListItem (style, cp, &rightnode, handle))
1357     {
1358       node->right = rightnode ;
1359     }
1360   else
1361     {
1362       node->right = 0 ;		/* no further list items */
1363     }
1364 
1365   *resultnode = node ;
1366 
1367   return TRUE ;
1368 } /* parseListItem */
1369 /************************************************************/
1370 
parseList(int style,char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)1371 static BOOL parseList (int style, char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
1372 {
1373   HtmlNode *node, *leftnode ;
1374 
1375   *cp += 4 ;			/* skip <UL> */
1376 
1377 #ifdef ALLOW_SECONDLEVEL_LIST_LIST_DOESN_T_YET_WORK
1378   if (strncasecmp (*cp, "<ul>", 4) == 0 ||
1379       strncasecmp (*cp, "<ol>", 4) == 0 ||
1380       strncasecmp (*cp, "<dl>", 4) == 0)
1381     {
1382       /* create list item for this list-in-list */
1383       node = makeNode (HTML_LISTITEM, handle) ;
1384 
1385       node->left = leftnode ;
1386       node->lstyle = lstyle ;
1387 
1388     }
1389 #endif
1390 
1391   parseListItem (style, cp, &leftnode, handle);
1392 
1393   skipSpaces (cp) ;
1394 
1395   if ((style == HTML_LIST_BULLET && strncasecmp (*cp, "</ul>", 5) == 0) ||
1396       (style == HTML_LIST_NOINDENT && strncasecmp (*cp, "</dl>", 5) == 0) ||
1397       (style == HTML_LIST_NUMBER && strncasecmp (*cp, "</ol>", 5) == 0))
1398     {
1399       *cp += 5 ;		/* skip </ul> */
1400     }
1401   else
1402     {
1403       if (style == HTML_LIST_BULLET)
1404 	printf ("Warning : found <UL> without closing </UL> tag !!\n") ;
1405       else if (style == HTML_LIST_NOINDENT)
1406 	printf ("Warning : found <DL> without closing </DL> tag !!\n") ;
1407       else if (style == HTML_LIST_NUMBER)
1408 	printf ("Warning : found <OL> without closing </OL> tag !!\n") ;
1409     }
1410 
1411   node = makeNode (HTML_LIST, handle) ;
1412 
1413   node->left = leftnode ;
1414   node->lstyle = style ;
1415 
1416   *resultnode = node ;
1417 
1418   return TRUE ;
1419 } /* parseList */
1420 /************************************************************/
1421 
parseSection(char ** cp,HtmlNode ** resultnode,STORE_HANDLE handle)1422 static BOOL parseSection (char **cp, HtmlNode **resultnode, STORE_HANDLE handle)
1423 {
1424   HtmlNode *node, *leftnode, *rightnode ;
1425   static BOOL MODE_PREFORMAT=FALSE, MODE_BLOCKQUOTE=FALSE ;
1426 
1427   if (!MODE_PREFORMAT)
1428     skipSpaces (cp) ;
1429 
1430   if (!**cp)			/* EOF */
1431     {
1432       if (MODE_PREFORMAT)
1433 	printf ("Warning : found <PRE> tag "
1434 		"without closing </PRE> tag !!\n") ;
1435       if (MODE_BLOCKQUOTE)
1436 	printf ("Warning : found <BLOCKQUOTE> tag "
1437 		"without closing </BLOCKQUOTE> tag !!\n") ;
1438 
1439       *resultnode = 0 ;
1440       return TRUE ;
1441     }
1442 
1443   if (strncasecmp (*cp, "<!--", 4) == 0)
1444     {
1445       if (!parseComment (cp, &leftnode, handle))
1446 	{
1447 	  *resultnode = 0 ;
1448 	  return FALSE ;
1449 	}
1450     }
1451   else if (strncasecmp (*cp, "<html>", 6) == 0)
1452     {
1453       if (!(parseHtml (cp, &leftnode, handle)))
1454 	{
1455 	  *resultnode = 0 ;
1456 	  return FALSE ;
1457 	}
1458     }
1459   else if (strncasecmp (*cp, "</html>", 7) == 0)
1460     {
1461       *resultnode = 0 ;
1462       return TRUE ;
1463     }
1464   else if (strncasecmp (*cp, "<head>", 6) == 0)
1465     {
1466       if (!(parseHead (cp, &leftnode, handle)))
1467 	{
1468 	  *resultnode = 0 ;
1469 	  return FALSE ;
1470 	}
1471     }
1472   else if (strncasecmp (*cp, "</head>", 7) == 0)
1473     {
1474       *resultnode = 0 ;
1475       return TRUE ;
1476     }
1477   else if (strncasecmp (*cp, "<body>", 6) == 0)
1478     {
1479       if (!(parseBody (cp, &leftnode, handle)))
1480 	{
1481 	  *resultnode = 0 ;
1482 	  return FALSE ;
1483 	}
1484     }
1485   else if (strncasecmp (*cp, "</body>", 7) == 0)
1486     {
1487       *resultnode = 0 ;
1488       return TRUE ;
1489     }
1490   else if (strncasecmp (*cp, "<title>", 7) == 0)
1491     {
1492       if (!parseTitle (cp, &leftnode, handle))
1493 	{
1494 	  *resultnode = 0 ;
1495 	  return FALSE ;
1496 	}
1497     }
1498   else if ((strncasecmp (*cp, "<H", 2) == 0) &&
1499 	   (*cp)[2]-'0' >= 1 && (*cp)[2]-'0' <= 7 && (*cp)[3] == '>')
1500     {
1501       if (!parseHeader (cp, &leftnode, handle))
1502 	{
1503 	  *resultnode = 0 ;
1504 	  return FALSE ;
1505 	}
1506     }
1507   else if ((strncasecmp (*cp, "</H", 3) == 0) &&
1508 	   (*cp)[3]-'0' >= 1 && (*cp)[3]-'0' <= 7 && (*cp)[4] == '>')
1509     {
1510       *resultnode = 0 ;
1511       return TRUE ;
1512     }
1513   else if (strncasecmp (*cp, "<a", 2) == 0 &&
1514 	   (isspace((int)(*cp)[2]) || (*cp)[2] == '\n'))
1515     {
1516       if (!parseHref (cp, &leftnode, handle))
1517 	{
1518 	  *resultnode = 0 ;
1519 	  return FALSE ;
1520 	}
1521     }
1522   else if (strncasecmp (*cp, "</a>", 4) == 0)
1523     {
1524       *resultnode = 0 ;
1525       return TRUE ;
1526     }
1527   else if (strncasecmp (*cp, "<img", 4) == 0)
1528     {
1529       if (!parseImage (cp, &leftnode, handle))
1530 	{
1531 	  *resultnode = 0 ;
1532 	  return FALSE ;
1533 	}
1534     }
1535   else if (strncasecmp (*cp, "<ul>", 4) == 0)
1536     {
1537       if (!parseList (HTML_LIST_BULLET, cp, &leftnode, handle))
1538 	{
1539 	  *resultnode = 0 ;
1540 	  return FALSE ;
1541 	}
1542     }
1543   else if (strncasecmp (*cp, "<ol>", 4) == 0)
1544     {
1545       if (!parseList (HTML_LIST_NUMBER, cp, &leftnode, handle))
1546 	{
1547 	  *resultnode = 0 ;
1548 	  return FALSE ;
1549 	}
1550     }
1551   else if (strncasecmp (*cp, "<dl>", 4) == 0)
1552     {
1553       if (!parseList (HTML_LIST_NOINDENT, cp, &leftnode, handle))
1554 	{
1555 	  *resultnode = 0 ;
1556 	  return FALSE ;
1557 	}
1558     }
1559   else if (strncasecmp (*cp, "<li>", 4) == 0)
1560     {
1561       /* LI isn't a section, so we've hit the end of a section */
1562       *resultnode = 0 ;
1563       return TRUE ;
1564     }
1565   else if (strncasecmp (*cp, "<dd>", 4) == 0)
1566     {
1567       /* DD isn't a section, so we've hit the end of a section */
1568       *resultnode = 0 ;
1569       return TRUE ;
1570     }
1571   else if (strncasecmp (*cp, "<dt>", 4) == 0)
1572     {
1573       /* DT isn't a section, so we've hit the end of a section */
1574       *resultnode = 0 ;
1575       return TRUE ;
1576     }
1577   else if (strncasecmp (*cp, "</ul>", 5) == 0)
1578     {
1579       *resultnode = 0 ;
1580       return TRUE ;
1581     }
1582   else if (strncasecmp (*cp, "</ol>", 5) == 0)
1583     {
1584       *resultnode = 0 ;
1585       return TRUE ;
1586     }
1587   else if (strncasecmp (*cp, "</dl>", 5) == 0)
1588     {
1589       *resultnode = 0 ;
1590       return TRUE ;
1591     }
1592   else if (strncasecmp (*cp, "<hr>", 4) == 0)
1593     {
1594       leftnode = makeNode (HTML_RULER, handle) ;
1595       *cp += 4 ;
1596       skipSpaces (cp) ;
1597     }
1598   else if (strncasecmp (*cp, "<p>", 3) == 0)
1599     {
1600       leftnode = makeNode (HTML_PARAGRAPH, handle) ;
1601       *cp += 3 ;
1602       skipSpaces (cp) ;
1603     }
1604   else if (strncasecmp (*cp, "</p>", 4) == 0)
1605     {
1606       leftnode = makeNode (HTML_PARAGRAPH, handle) ;
1607       *cp += 4 ;
1608       skipSpaces (cp) ;
1609     }
1610   else if (strncasecmp (*cp, "<br>", 4) == 0)
1611     {
1612       leftnode = makeNode (HTML_LINEBREAK, handle) ;
1613       *cp += 4 ;
1614       skipSpaces (cp) ;
1615     }
1616   else if (strncasecmp (*cp, "<pre>", 5) == 0)
1617     {
1618       if (MODE_PREFORMAT)
1619 	printf ("Warning : nesting of <PRE> tags without effect !!\n") ;
1620       MODE_PREFORMAT = TRUE ;
1621 
1622       leftnode = makeNode (HTML_STARTPREFORMAT, handle) ;
1623       *cp += 5 ;
1624       skipSpaces (cp) ;
1625     }
1626   else if (strncasecmp (*cp, "</pre>", 6) == 0)
1627     {
1628       if (!MODE_PREFORMAT)
1629 	printf ("Warning : found </PRE> without preceeding <PRE>\n") ;
1630       MODE_PREFORMAT = FALSE ;
1631 
1632       leftnode = makeNode (HTML_ENDPREFORMAT, handle) ;
1633       *cp += 6 ;
1634       skipSpaces (cp) ;
1635     }
1636   else if (strncasecmp (*cp, "<blockquote>", 12) == 0)
1637     {
1638       if (!MODE_BLOCKQUOTE)
1639 	{
1640 	  leftnode = makeNode (HTML_STARTBLOCKQUOTE, handle) ;
1641 	  MODE_BLOCKQUOTE = TRUE ;
1642 	}
1643       else
1644 	printf ("Warning : nesting of <BLOCKQUOTE> tags "
1645 		"without effect !!\n") ;
1646 
1647       *cp += 12 ;
1648       skipSpaces (cp) ;
1649     }
1650   else if (strncasecmp (*cp, "</blockquote>", 13) == 0)
1651     {
1652       if (MODE_BLOCKQUOTE)
1653 	{
1654 	  leftnode = makeNode (HTML_ENDBLOCKQUOTE, handle) ;
1655 	  MODE_BLOCKQUOTE = FALSE ;
1656 	}
1657       else
1658 	printf ("Warning : found </BLOCKQUOTE> "
1659 		"without preceeding <BLOCKQUOTE>\n") ;
1660 
1661       *cp += 13 ;
1662       skipSpaces (cp) ;
1663     }
1664   else if (strncasecmp (*cp, "<code>", 6) == 0)
1665     {
1666       if (!(parseCode (cp, &leftnode, handle)))
1667 	{
1668 	  *resultnode = 0 ;
1669 	  return FALSE ;
1670 	}
1671     }
1672   else if (strncasecmp (*cp, "</code>", 7) == 0)
1673     {
1674       *resultnode = 0 ;
1675       return TRUE ;
1676     }
1677   else if (strncasecmp (*cp, "<b>", 3) == 0)
1678     {
1679       if (!(parseBold (cp, &leftnode, handle)))
1680 	{
1681 	  *resultnode = 0 ;
1682 	  return FALSE ;
1683 	}
1684     }
1685   else if (strncasecmp (*cp, "</b>", 4) == 0)
1686     {
1687       *resultnode = 0 ;
1688       return TRUE ;
1689     }
1690   else if (strncasecmp (*cp, "<strong>", 8) == 0)
1691     {
1692       if (!(parseStrong (cp, &leftnode, handle)))
1693 	{
1694 	  *resultnode = 0 ;
1695 	  return FALSE ;
1696 	}
1697     }
1698   else if (strncasecmp (*cp, "</strong>", 9) == 0)
1699     {
1700       *resultnode = 0 ;
1701       return TRUE ;
1702     }
1703   else if (strncasecmp (*cp, "<i>", 3) == 0)
1704     {
1705       if (!(parseItalic (cp, &leftnode, handle)))
1706 	{
1707 	  *resultnode = 0 ;
1708 	  return FALSE ;
1709 	}
1710     }
1711   else if (strncasecmp (*cp, "</i>", 4) == 0)
1712     {
1713       *resultnode = 0 ;
1714       return TRUE ;
1715     }
1716   else
1717     {
1718       if (!parseText (cp, &leftnode, handle))
1719 	{
1720 	  *resultnode = 0 ;
1721 	  return FALSE ;
1722 	}
1723     }
1724 
1725   node = makeNode (HTML_SECTION, handle) ;
1726   node->left = leftnode ;
1727   if (leftnode->type == 0)
1728     {
1729       printf ("section on section \n") ;
1730     }
1731   if (parseSection (cp, &rightnode, handle))
1732     {
1733       node->right = rightnode ;
1734       *resultnode = node ;
1735       return TRUE ;
1736     }
1737   else
1738     {
1739       node->right = 0 ;
1740       *resultnode = node ;
1741       return FALSE ;
1742     }
1743 
1744 } /* parseSection */
1745 /************************************************************/
1746 
1747 
1748