1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil -*- */
2 /*
3  * Copyright (C) 2005 Davyd Madeley <davyd@madeley.id.au>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of the
8  * License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public
16  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
17  *
18  * Author: Davyd Madeley  <davyd@madeley.id.au>
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 
25 #include <glib.h>
26 #include <gtk/gtk.h>
27 #include <string.h>
28 
29 #include "yelp-info-parser.h"
30 #include "yelp-magic-decompressor.h"
31 #include "yelp-debug.h"
32 
33 
34 static GtkTreeIter *  find_real_top                      (GtkTreeModel *model,
35 							  GtkTreeIter *it);
36 static GtkTreeIter *  find_real_sibling                  (GtkTreeModel *model,
37 							  GtkTreeIter *it,
38 							  GtkTreeIter *comp);
39 static xmlNodePtr     yelp_info_parse_menu               (GtkTreeStore *tree,
40 							  xmlNodePtr *node,
41 							  gchar *page_content,
42 							  gboolean notes);
43 static gboolean       get_menuoptions                    (gchar *line,
44 							  gchar **title,
45 							  gchar **ref,
46 							  gchar **desc,
47 							  gchar **xref);
48 static gboolean       resolve_frag_id                    (GtkTreeModel *model,
49 							  GtkTreePath *path,
50 							  GtkTreeIter *iter,
51 							  gpointer data);
52 static void	      info_process_text_notes            (xmlNodePtr *node,
53 							  gchar *content,
54 							  GtkTreeStore
55 							  *tree);
56 
57 /*
58   Used to output the correct <heading level="?" /> tag.
59  */
60 static const gchar* level_headings[] = { NULL, "1", "2", "3" };
61 
62 static GHashTable *
info_image_get_attributes(gchar const * string)63 info_image_get_attributes (gchar const* string)
64 {
65   GMatchInfo *match_info;
66   GRegex *regex;
67   GHashTable *h;
68 
69   h = 0;
70   regex = g_regex_new ("([^\\s][^\\s=]+)=(?:([^\\s \"]+)|(?:\"((?:[^\\\"]|\\\\[\\\\\"])*)\"))", 0, 0, NULL);
71   g_regex_match (regex, string, 0, &match_info);
72   while (g_match_info_matches (match_info))
73     {
74       gchar *key;
75       gchar *value;
76 
77       if (!h)
78 	h = g_hash_table_new (g_str_hash, g_str_equal);
79       key = g_match_info_fetch (match_info, 1);
80       value = g_match_info_fetch (match_info, 2);
81       if (!*value)
82 	value = g_match_info_fetch (match_info, 3);
83       g_hash_table_insert (h, key, value);
84       g_match_info_next (match_info, NULL);
85     }
86   g_match_info_free (match_info);
87   g_regex_unref (regex);
88 
89   return h;
90 }
91 
92 /*
93   info elements look like \0\b[<TAGNAME>\0\b] and take attribute=value
94   pairs, i.e. for image: \0\b[image src="foo.png" \0\b]
95 */
96 #define INFO_TAG_0 "\0"
97 #define INFO_TAG_1 "\b"
98 #define INFO_TAG_OPEN_2 INFO_TAG_1 "["
99 #define INFO_TAG_CLOSE_2 INFO_TAG_1 "]"
100 #define INFO_TAG_OPEN_2_RE INFO_TAG_1 "[[]"
101 #define INFO_TAG_CLOSE_2_RE INFO_TAG_1 "[]]"
102 #define INFO_TAG_OPEN INFO_TAG_0 INFO_TAG_1 INFO_TAG_OPEN_2
103 #define INFO_TAG_CLOSE INFO_TAG_0 INFO_TAG_1 INFO_TAG_CLOSE_2
104 #define INFO_TAG_OPEN_RE INFO_TAG_0 INFO_TAG_1 INFO_TAG_OPEN_2_RE
105 #define INFO_TAG_CLOSE_RE INFO_TAG_0 INFO_TAG_1 INFO_TAG_CLOSE_2_RE
106 /* C/glib * cannot really handle \0 in strings, convert to '@' */
107 #define INFO_C_TAG_0 "@"
108 #define INFO_C_TAG_OPEN INFO_C_TAG_0 INFO_TAG_OPEN_2
109 #define INFO_C_TAG_CLOSE INFO_C_TAG_0 INFO_TAG_CLOSE_2
110 #define INFO_C_TAG_OPEN_RE INFO_C_TAG_0 INFO_TAG_OPEN_2_RE
111 #define INFO_C_TAG_CLOSE_RE INFO_C_TAG_0 INFO_TAG_CLOSE_2_RE
112 #define INFO_C_IMAGE_TAG_OPEN INFO_C_TAG_OPEN "image"
113 #define INFO_C_IMAGE_TAG_OPEN_RE INFO_C_TAG_OPEN_RE "image"
114 
115 static xmlNodePtr
info_insert_image(xmlNodePtr parent,GMatchInfo * match_info)116 info_insert_image (xmlNodePtr parent, GMatchInfo *match_info)
117 {
118   gchar *title;
119   gchar *text;
120   gchar *alt;
121   xmlNodePtr img;
122   GHashTable *h = info_image_get_attributes (g_match_info_fetch (match_info, 1));
123   gchar *source;
124   if (h)
125     source = (gchar*)g_hash_table_lookup (h, "src");
126 
127   if (!h || !source || !*source)
128     return xmlNewTextChild (parent, NULL, BAD_CAST "para",
129                             BAD_CAST "[broken image]");
130 
131   title = (gchar*)g_hash_table_lookup (h, "title");
132   text = (gchar*)g_hash_table_lookup (h, "text");
133   alt = (gchar*)g_hash_table_lookup (h, "alt");
134   g_hash_table_destroy (h);
135   img = xmlNewChild (parent, NULL, BAD_CAST "img", NULL);
136   xmlNewProp (img, BAD_CAST "src", BAD_CAST source);
137   xmlNewProp (img, BAD_CAST "title", BAD_CAST (title ? title : ""));
138   xmlNewProp (img, BAD_CAST "text", BAD_CAST (text ? text : ""));
139   xmlNewProp (img, BAD_CAST "alt", BAD_CAST (alt ? alt : ""));
140   g_free (source);
141   g_free (title);
142   g_free (alt);
143   return parent;
144 }
145 
146 /*
147   If every element of `str' is `ch' then return TRUE, else FALSE.
148  */
149 static gboolean
string_all_char_p(const gchar * str,gchar ch)150 string_all_char_p (const gchar* str, gchar ch)
151 {
152   for (; *str; str++) {
153     if (*str != ch) return FALSE;
154   }
155   return TRUE;
156 }
157 
158 /*
159   If `line' is a line of '*', '=' or '-', return 1,2,3 respectively
160   for the heading level. If it's anything else, return 0.
161  */
162 static int
header_underline_level(const gchar * line)163 header_underline_level (const gchar* line)
164 {
165   if (*line != '*' && *line != '=' && *line != '-')
166     return 0;
167 
168   if (string_all_char_p (line, '*')) return 1;
169   if (string_all_char_p (line, '=')) return 2;
170   if (string_all_char_p (line, '-')) return 3;
171 
172   return 0;
173 }
174 
175 /*
176   Use g_strjoinv to join up the strings from `strings', but they might
177   not actually be a null-terminated array. `end' should be strings+n,
178   where I want the first n strings (strings+0, ..., strings+(n-1)). It
179   shouldn't point outside of the array allocated, but it can point at
180   the null string at the end.
181  */
182 static gchar*
join_strings_subset(const gchar * separator,gchar ** strings,gchar ** end)183 join_strings_subset (const gchar *separator,
184                      gchar** strings, gchar** end)
185 {
186   gchar *ptr;
187   gchar *glob;
188 
189   g_assert(end > strings);
190 
191   ptr = *end;
192   *end = NULL;
193 
194   glob = g_strjoinv (separator, strings);
195   *end = ptr;
196   return glob;
197 }
198 
199 /*
200   Create a text node, child of `parent', with the lines strictly
201   between `first' and `last'.
202 */
203 static void
lines_subset_text_child(xmlNodePtr parent,xmlNsPtr ns,gchar ** first,gchar ** last)204 lines_subset_text_child (xmlNodePtr parent, xmlNsPtr ns,
205                          gchar** first, gchar** last)
206 {
207   /* TODO? Currently we're copying the split strings again, which is
208      less efficient than somehow storing lengths and using a sort of
209      window on `content'. But that's much more difficult, so unless
210      there's a problem, let's go with the stupid approach. */
211   gchar *glob;
212 
213   if (last > first) {
214     glob = join_strings_subset ("\n", first, last);
215     xmlAddChild (parent, xmlNewText (BAD_CAST glob));
216     g_free (glob);
217   }
218 }
219 
220 /*
221   Convert body text CONTENT to xml nodes. This function is responsible
222   for spotting headings etc and splitting them out correctly.
223 
224   paragraph is as described in info_body_text, but cannot be null.
225 
226   If `inline_p' is true, end with a <para1> tag. Otherwise, end with a
227   <para> tag.
228 
229   TODO: IWBN add a regex match for *Note: here and call the *Note ==>
230   <a href> logic of info_process_text_notes from here.
231  */
232 static void
info_body_parse_text(xmlNodePtr parent,xmlNodePtr * paragraph,xmlNsPtr ns,gboolean inline_p,const gchar * content)233 info_body_parse_text (xmlNodePtr parent, xmlNodePtr *paragraph,
234                       xmlNsPtr ns,
235                       gboolean inline_p, const gchar *content)
236 {
237   /* The easiest things to spot are headings: they look like a line of
238    * '*','=' or '-', corresponding to heading levels 1,2 or 3. To spot
239    * them, we split content into single lines and work with them. */
240   gchar **lines = g_strsplit (content, "\n", 0);
241   gchar **first = lines, **last = lines;
242   int header_level;
243   xmlNodePtr header_node;
244 
245   /* Deal with the possibility that `content' is empty */
246   if (*lines == NULL) {
247     if (!inline_p) {
248       xmlNewTextChild (parent, NULL, BAD_CAST "para", BAD_CAST "");
249     }
250     return;
251   }
252 
253   /* Use a pair of pointers, first and last, which point to two lines,
254    * the chunk of the body we're displaying (inclusive) */
255   for (; *last; last++) {
256 
257     /* Check for a blank line */
258     if (**last == '\0') {
259       if (last != first) {
260         if (!*paragraph) {
261           *paragraph = xmlNewChild (parent, ns, BAD_CAST "para", NULL);
262         }
263         lines_subset_text_child (*paragraph, ns, first, last);
264       }
265       /* On the next iteration, last==first both pointing at the next
266          line. */
267       first = last+1;
268       *paragraph = NULL;
269 
270       continue;
271     }
272 
273     /* Check for a header */
274     header_level = header_underline_level (*last);
275     if (header_level) {
276       /* Write out any lines beforehand */
277       lines_subset_text_child (parent, ns, first, last-1);
278       /* Now write out the actual header line */
279       header_node = xmlNewTextChild (parent, ns, BAD_CAST "header",
280                                      BAD_CAST *(last-1));
281       xmlNewProp (header_node, BAD_CAST "level",
282                   BAD_CAST level_headings[header_level]);
283 
284       first = last+1;
285       last = first-1;
286     }
287   }
288 
289   /* Write out any lines left */
290   if (!*paragraph) {
291     *paragraph = xmlNewChild (parent, ns, BAD_CAST "para", NULL);
292   }
293   lines_subset_text_child (*paragraph, ns, first, last);
294 
295   g_strfreev (lines);
296 }
297 
298 /*
299   info_body_text is responsible for taking a hunk of the info page's
300   body and turning it into paragraph tags. It searches out images and
301   marks them up properly if necessary.
302 
303   parent should be the node in which we're currently storing text and
304   paragraph a pointer to a <para> tag or NULL. At blank lines, we
305   finish with the current para tag and switch to a new one.
306 
307   It uses info_body_parse_text to mark up the actual bits of text.
308  */
309 static void
info_body_text(xmlNodePtr parent,xmlNodePtr * paragraph,xmlNsPtr ns,gboolean inline_p,gchar const * content)310 info_body_text (xmlNodePtr parent, xmlNodePtr *paragraph, xmlNsPtr ns,
311                 gboolean inline_p, gchar const *content)
312 {
313   xmlNodePtr thepara = NULL;
314   gint content_len;
315   gint pos;
316   GRegex *regex;
317   GMatchInfo *match_info;
318   gchar *after;
319   if (paragraph == NULL) paragraph = &thepara;
320 
321   if (!strstr (content, INFO_C_IMAGE_TAG_OPEN)) {
322     info_body_parse_text (parent, paragraph, ns, inline_p, content);
323     return;
324   }
325 
326   content_len = strlen (content);
327   pos = 0;
328   regex = g_regex_new ("(" INFO_C_IMAGE_TAG_OPEN_RE "((?:[^" INFO_TAG_1 "]|[^" INFO_C_TAG_0 "]+" INFO_TAG_1 ")*)" INFO_C_TAG_CLOSE_RE ")", 0, 0, NULL);
329 
330   g_regex_match (regex, content, 0, &match_info);
331   while (g_match_info_matches (match_info))
332     {
333       gint image_start;
334       gint image_end;
335       gboolean image_found = g_match_info_fetch_pos (match_info, 0,
336 						     &image_start, &image_end);
337       gchar *before = g_strndup (&content[pos], image_start - pos);
338       pos = image_end + 1;
339       info_body_parse_text (parent, paragraph, NULL, TRUE, before);
340       g_free (before);
341 
342       /* End the paragraph that was before */
343       *paragraph = NULL;
344 
345       if (image_found)
346 	info_insert_image (parent, match_info);
347       g_match_info_next (match_info, NULL);
348     }
349   after = g_strndup (&content[pos], content_len - pos);
350   info_body_parse_text (parent, paragraph, NULL, TRUE, after);
351   g_free (after);
352 }
353 
354 /* Part 1: Parse File Into Tree Store */
355 
356 enum
357 {
358 	PAGE_TAG_TABLE,
359 	PAGE_NODE,
360 	PAGE_INDIRECT,
361 	PAGE_OTHER
362 };
363 
364 static int
page_type(char * page)365 page_type (char *page)
366 {
367   if (g_ascii_strncasecmp (page, "Tag Table:\n", 11) == 0)
368     return PAGE_TAG_TABLE;
369   else if (g_ascii_strncasecmp (page, "Indirect:\n", 10) == 0)
370     return PAGE_INDIRECT;
371   else if (g_ascii_strncasecmp (page, "File:", 5) == 0 ||
372 	   g_ascii_strncasecmp (page, "Node:", 5) == 0)
373     return PAGE_NODE;
374 
375   else
376     return PAGE_OTHER;
377 }
378 
379 static char
open_info_file(const gchar * file)380 *open_info_file (const gchar *file)
381 {
382     GFile *gfile;
383     GConverter *converter;
384     GFileInputStream *file_stream;
385     GInputStream *stream;
386     gchar buf[1024];
387     gssize bytes;
388     GString *string;
389     gchar *str;
390     gsize i;
391 
392     gfile = g_file_new_for_path (file);
393     file_stream = g_file_read (gfile, NULL, NULL);
394     converter = (GConverter *) yelp_magic_decompressor_new ();
395     stream = g_converter_input_stream_new ((GInputStream *) file_stream, converter);
396     string = g_string_new (NULL);
397 
398     while ((bytes = g_input_stream_read (stream, buf, 1024, NULL, NULL)) > 0)
399         g_string_append_len (string, buf, bytes);
400 
401     g_object_unref (stream);
402 
403     str = string->str;
404 
405     /* C/glib * cannot really handle \0 in strings, convert. */
406     for (i = 0; i < (string->len - 1); i++)
407         if (str[i] == INFO_TAG_OPEN[0] && str[i+1] == INFO_TAG_OPEN[1])
408             str[i] = INFO_C_TAG_OPEN[0];
409 
410     g_string_free (string, FALSE);
411 
412     return str;
413 }
414 
415 static gchar *
find_info_part(gchar * part_name,const gchar * base)416 find_info_part (gchar *part_name, const gchar *base)
417 {
418   /* New and improved.  We now assume that all parts are
419    * in the same subdirectory as the base file.  Makes
420    * life much simpler and is (afaict) always true
421    */
422   gchar *path;
423   gchar *tmp;
424   gchar *bzfname, *gzfname, *lzfd, *fname;
425   gchar *uri = NULL;
426   tmp = g_strrstr (base, "/");
427   path = g_strndup (base, tmp-base);
428 
429   bzfname = g_strconcat (path, "/", part_name, ".bz2", NULL);
430   gzfname = g_strconcat (path, "/", part_name, ".gz", NULL);
431   lzfd = g_strconcat (path, "/", part_name, ".lzma", NULL);
432   fname = g_strconcat (path, "/", part_name, NULL);
433 
434   if (g_file_test (bzfname, G_FILE_TEST_EXISTS))
435     uri = g_strdup (bzfname);
436   else if (g_file_test (gzfname, G_FILE_TEST_EXISTS))
437     uri = g_strdup (gzfname);
438   else if (g_file_test (lzfd, G_FILE_TEST_EXISTS))
439     uri = g_strdup (lzfd);
440   else if (g_file_test (fname, G_FILE_TEST_EXISTS))
441     uri = g_strdup (fname);
442 
443   g_free (bzfname);
444   g_free (gzfname);
445   g_free (lzfd);
446   g_free (fname);
447   g_free (path);
448   return uri;
449 
450 }
451 
452 static char
process_indirect_map(char * page,const gchar * file)453 *process_indirect_map (char *page, const gchar *file)
454 {
455 	char **lines;
456 	char **ptr;
457 	char *composite = NULL;
458         size_t composite_len = 0;
459 
460 	lines = g_strsplit (page, "\n", 0);
461 
462         /*
463           Go backwards down the list so that we allocate composite
464           big enough the first time around.
465         */
466 	for (ptr = lines + 1; *ptr != NULL; ptr++);
467 	for (ptr--; ptr != lines; ptr--)
468 	{
469 		char **items;
470 		char *filename;
471 		char *str;
472 		char **pages;
473 		gsize offset;
474 		gsize plength;
475 
476 		debug_print (DB_DEBUG, "Line: %s\n", *ptr);
477 		items = g_strsplit (*ptr, ": ", 2);
478 
479 		if (items[0])
480 		{
481 		  filename = find_info_part (items[0], file);
482 		  str = open_info_file (filename);
483 		  if (!str) {
484 			g_strfreev (items);
485 		  	continue;
486 		  }
487 			pages = g_strsplit (str, "", 2);
488 			g_free (str);
489 			if (!pages[1]) {
490 				g_strfreev (items);
491 				g_strfreev (pages);
492 		  		continue;
493 			}
494 
495 			offset = (gsize) atoi (items[1]);
496 			plength = strlen(pages[1]);
497 
498 			debug_print (DB_DEBUG, "Need to make string %s+%i bytes = %i\n",
499 				    items[1], plength,
500 				    offset + plength);
501 
502 			if (!composite) /* not yet created, malloc it */
503 			{
504 				composite_len = offset + plength;
505 				composite = g_malloc (sizeof (char) *
506 						      (composite_len + 1));
507 				memset (composite, '-', composite_len);
508 				composite[composite_len] = '\0';
509 			}
510 
511                         /* Because we're going down the list
512                          * backwards, plength should always be short
513                          * enough to fit in the memory allocated. But
514                          * in case something's broken/malicious, we
515                          * should check anyway.
516                          */
517                         if (offset > composite_len)
518                           continue;
519                         if (plength + offset + 1 > composite_len)
520                           plength = composite_len - offset - 1;
521 
522 			composite[offset] = '';
523 			memcpy (composite + offset + 1, pages[1], plength);
524 
525 			g_free (filename);
526 			g_strfreev (pages);
527 		}
528 
529 		g_strfreev (items);
530 	}
531 
532 	g_strfreev (lines);
533 
534 	return composite;
535 }
536 
537 /*
538   Open up the relevant info file and read it all into memory. If there
539   is an indirect table thingy, we resolve that as we go.
540 
541   Returns a NULL-terminated list of pointers to pages on success and
542   NULL otherwise.
543  */
544 static gchar**
expanded_info_file(const gchar * file)545 expanded_info_file (const gchar *file)
546 {
547   gchar *slurp = open_info_file (file);
548   gchar **page_list;
549   gchar **page;
550 
551   if (!slurp) return NULL;
552 
553   /* TODO: There's a lot of copying of bits of memory here. With a bit
554    * more effort we could avoid it. Either we should fix this or
555    * measure the time taken and decide it's irrelevant...
556    *
557    * Note: \x1f\n is ^_\n
558    */
559   page_list = g_strsplit (slurp, "\x1f\n", 0);
560 
561   g_free (slurp);
562 
563   for (page = page_list; *page != NULL; page++) {
564     if (page_type (*page) == PAGE_INDIRECT) {
565 
566       slurp = process_indirect_map (*page, file);
567       g_strfreev (page_list);
568 
569       if (!slurp)
570         return NULL;
571 
572       page_list = g_strsplit (slurp, "\x1f\n", 0);
573       g_free (slurp);
574       break;
575     }
576   }
577 
578   return page_list;
579 }
580 
581 /*
582   Look for strings in source by key. For example, we extract "blah"
583   from "Node: blah," when the key is "Node: ". To know when to stop,
584   there are two strings: end and cancel.
585 
586   If we find a character from end first, return a copy of the string
587   up to (not including) that character. If we find a character of
588   cancel first, return NULL. If we find neither, return the rest of
589   the string.
590 
591   cancel can be NULL, in which case, we don't do its test.
592  */
593 static char*
get_value_after_ext(const char * source,const char * key,const char * end,const char * cancel)594 get_value_after_ext (const char *source, const char *key,
595                      const char *end, const char *cancel)
596 {
597   char *start;
598   size_t not_end, not_cancel;
599 
600   start = strstr (source, key);
601   if (!start) return NULL;
602 
603   start += strlen (key);
604 
605   not_end = strcspn (start, end);
606   not_cancel = (cancel) ? strcspn (start, cancel) : not_end + 1;
607 
608   if (not_cancel < not_end)
609     return NULL;
610 
611   return g_strndup (start, not_end);
612 }
613 
614 static char*
get_value_after(const char * source,const char * key)615 get_value_after (const char* source, const char *key)
616 {
617   return get_value_after_ext (source, key, ",", "\n\x7f");
618 }
619 
620 static int
node2page(GHashTable * nodes2pages,char * node)621 node2page (GHashTable *nodes2pages, char *node)
622 {
623   gpointer p;
624 
625   if (g_hash_table_lookup_extended (nodes2pages, node,
626                                     NULL, &p))
627     return GPOINTER_TO_INT(p);
628 
629   /* This shouldn't happen: we should only ever have to look up pages
630    * that exist. */
631   g_return_val_if_reached (0);
632 }
633 
634 static GtkTreeIter
node2iter(GHashTable * nodes2iters,char * node)635 *node2iter (GHashTable *nodes2iters, char *node)
636 {
637 	GtkTreeIter *iter;
638 
639 	iter = g_hash_table_lookup (nodes2iters, node);
640 	d (if (!iter) debug_print (DB_WARN, "Could not retrieve iter for node !%s!\n", node));
641 	return iter;
642 }
643 
644 GtkTreeIter
find_real_top(GtkTreeModel * model,GtkTreeIter * it)645 *find_real_top (GtkTreeModel *model, GtkTreeIter *it)
646 {
647   GtkTreeIter *r = NULL;
648   GtkTreeIter *tmp = NULL;
649 
650   if (!it)
651     return NULL;
652 
653   r = gtk_tree_iter_copy (it);
654   tmp = g_malloc0 (sizeof (GtkTreeIter));
655   while (gtk_tree_model_iter_parent (model, tmp, r)) {
656     gtk_tree_iter_free (r);
657     r = gtk_tree_iter_copy (tmp);
658   }
659   g_free (tmp);
660 
661   return r;
662 }
663 
find_real_sibling(GtkTreeModel * model,GtkTreeIter * it,GtkTreeIter * comp)664 GtkTreeIter * find_real_sibling (GtkTreeModel *model,
665 				 GtkTreeIter *it, GtkTreeIter *comp)
666 {
667   GtkTreeIter *r;
668   GtkTreeIter *tmp = NULL;
669   gboolean result = FALSE;
670   gchar *title;
671   gchar *reftitle;
672 
673   if (!it) {
674     return NULL;
675   }
676 
677   r = gtk_tree_iter_copy (it);
678   tmp = gtk_tree_iter_copy (it);
679 
680   reftitle = gtk_tree_model_get_string_from_iter (model, comp);
681 
682   result = gtk_tree_model_iter_parent (model, r, it);
683   if (!result)
684     return it;
685 
686   title = gtk_tree_model_get_string_from_iter (model, r);
687 
688   while (!g_str_equal (title, reftitle) && result) {
689     gtk_tree_iter_free (tmp);
690     tmp = gtk_tree_iter_copy (r);
691     result = gtk_tree_model_iter_parent (model, r, tmp);
692     if (result)
693       title = gtk_tree_model_get_string_from_iter (model, r);
694   }
695 
696   if (!g_str_equal (title, reftitle))
697     {
698       gtk_tree_iter_free (tmp);
699       tmp = NULL;
700     }
701 
702   gtk_tree_iter_free (r);
703   g_free (title);
704   g_free (reftitle);
705   return tmp;
706 
707 }
708 
709 static void
process_page(GtkTreeStore * tree,GHashTable * nodes2pages,GHashTable * nodes2iters,int * processed_table,char ** page_list,char * page_text)710 process_page (GtkTreeStore *tree,
711               GHashTable *nodes2pages, GHashTable *nodes2iters,
712               int *processed_table, char **page_list, char *page_text)
713 {
714 	GtkTreeIter *iter;
715 
716 	char **parts;
717 	char *node;
718 	char *up;
719 	char *prev;
720 	char *next;
721 	gchar *tmp;
722 
723 	int page;
724 
725 	/* split out the header line and the text */
726 	parts = g_strsplit (page_text, "\n", 3);
727 
728 	node = get_value_after (parts[0], "Node: ");
729 	up = get_value_after (parts[0], "Up: ");
730 	prev = get_value_after (parts[0], "Prev: ");
731 	next = get_value_after (parts[0], "Next: ");
732 
733 	if (next && g_str_equal (next, "Top")) {
734 	  g_free (next);
735 	  next = NULL;
736 	}
737 	if (node && g_str_equal (node, "Top") && prev != NULL) {
738 	  g_free (prev);
739 	  prev = NULL;
740 	}
741 
742 	/* check to see if this page has been processed already */
743 	page = node2page (nodes2pages, node);
744 	if (processed_table[page]) {
745 		return;
746 	}
747 	processed_table[page] = 1;
748 
749 	debug_print (DB_DEBUG, "-- Processing Page %s\n\tParent: %s\n", node, up);
750 
751 	iter = g_slice_alloc0 (sizeof (GtkTreeIter));
752 	/* check to see if we need to process our parent and siblings */
753 	if (up && g_ascii_strncasecmp (up, "(dir)", 5) && strcmp (up, "Top"))
754 	{
755 		page = node2page (nodes2pages, up);
756 		if (!processed_table[page])
757 		{
758 		  debug_print (DB_DEBUG, "%% Processing Node %s\n", up);
759                   process_page (tree, nodes2pages,
760 				nodes2iters, processed_table, page_list,
761 				page_list[page]);
762 		}
763 	}
764 	if (prev && g_ascii_strncasecmp (prev, "(dir)", 5))
765 	  {
766 	    if (node && strncmp (node, "Top", 3)) {
767 	      /* Special case the Top node to always appear first */
768 	    } else {
769 	      page = node2page (nodes2pages, prev);
770 	      if (!processed_table[page])
771 		{
772 		  debug_print (DB_DEBUG, "%% Processing Node %s\n", prev);
773 		  process_page (tree, nodes2pages,
774 				nodes2iters, processed_table, page_list,
775 				page_list[page]);
776 		}
777 	    }
778 	  }
779 
780 	/* by this point our parent and older sibling should be processed */
781 	if (!up || !g_ascii_strcasecmp (up, "(dir)"))
782 	{
783 	  debug_print (DB_DEBUG, "\t> no parent\n");
784 		if (!prev || !g_ascii_strcasecmp (prev, "(dir)"))
785 		{
786 		  debug_print (DB_DEBUG, "\t> no previous\n");
787 			gtk_tree_store_append (tree, iter, NULL);
788 		}
789 		else if (prev) {
790 		  GtkTreeIter *real;
791 		  real = find_real_top (GTK_TREE_MODEL (tree),
792 					node2iter (nodes2iters, prev));
793 		  if (real) {
794 		    gtk_tree_store_insert_after (tree, iter, NULL,
795 						 real);
796 		    gtk_tree_iter_free (real);
797 		  }
798 		  else
799 		    gtk_tree_store_append (tree, iter, NULL);
800 		}
801 	}
802 	else if (!prev || !g_ascii_strcasecmp (prev, "(dir)") || !strcmp (prev, up))
803 	{
804 	  debug_print (DB_DEBUG, "\t> no previous\n");
805 		gtk_tree_store_append (tree, iter,
806 			node2iter (nodes2iters, up));
807 	}
808 	else if (up && prev)
809 	{
810 	  GtkTreeIter *upit = node2iter (nodes2iters, up);
811 	  GtkTreeIter *previt = node2iter (nodes2iters, prev);
812 	  GtkTreeIter *nit = NULL;
813 	  debug_print (DB_DEBUG, "+++ Parent: %s Previous: %s\n", up, prev);
814 
815 	  d (if (upit) debug_print (DB_DEBUG, "++++ Have parent node!\n"));
816 	  d (if (previt) debug_print (DB_DEBUG, "++++ Have previous node!\n"));
817 	  nit = find_real_sibling (GTK_TREE_MODEL (tree), previt, upit);
818 	  if (nit) {
819 	    gtk_tree_store_insert_after (tree, iter,
820 					 upit,
821 					 nit);
822 	    gtk_tree_iter_free (nit);
823 	  }
824 	  else
825 	    gtk_tree_store_append (tree, iter, upit);
826 	}
827 	else
828 	{
829 	  debug_print (DB_DEBUG, "# node %s was not put in tree\n", node);
830 	  return;
831 	}
832 
833 	d (if (iter) debug_print (DB_DEBUG, "Have a valid iter, storing for %s\n", node));
834 
835 	g_hash_table_insert (nodes2iters, g_strdup (node), iter);
836 	debug_print (DB_DEBUG, "size: %i\n", g_hash_table_size (nodes2iters));
837 
838 	/*tmp = g_strdup_printf ("%i",
839 	  node2page (nodes2pages, node));*/
840 	tmp = g_strdup (node);
841 	tmp = g_strdelimit (tmp, " ", '_');
842 	gtk_tree_store_set (tree, iter,
843 			    INFO_PARSER_COLUMN_PAGE_NO, tmp,
844 			    INFO_PARSER_COLUMN_PAGE_NAME, node,
845 			    INFO_PARSER_COLUMN_PAGE_CONTENT, parts[2],
846 			    -1);
847 
848 	g_free (tmp);
849 	g_free (node);
850 	g_free (up);
851 	g_free (prev);
852 	g_free (next);
853 	g_strfreev (parts);
854 }
855 
856 struct TagTableFix {
857   GHashTable *nodes2pages; /* Build this... */
858   GHashTable *pages2nodes; /* ... using this. */
859 };
860 
861 static void
use_offset2page(gpointer o,gpointer p,gpointer ud)862 use_offset2page (gpointer o, gpointer p, gpointer ud)
863 {
864   struct TagTableFix* ttf = (struct TagTableFix*)ud;
865 
866   const gchar* node = g_hash_table_lookup (ttf->pages2nodes, p);
867   if (node) {
868     g_hash_table_insert (ttf->nodes2pages, g_strdup (node), p);
869   }
870 }
871 
872 /*
873   We had a nodes2offsets hash table, but sometimes these things
874   lie. How terribly rude. Anyway, use offsets2pages and pages2nodes
875   (and injectivity!) to construct the nodes2pages hash table.
876 */
877 static GHashTable *
make_nodes2pages(GHashTable * offsets2pages,GHashTable * pages2nodes)878 make_nodes2pages (GHashTable* offsets2pages,
879                   GHashTable* pages2nodes)
880 {
881   struct TagTableFix ttf;
882 
883   ttf.nodes2pages =
884     g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
885   ttf.pages2nodes = pages2nodes;
886 
887   g_hash_table_foreach (offsets2pages, use_offset2page, &ttf);
888 
889   return ttf.nodes2pages;
890 }
891 
892 /**
893  * Parse file into a GtkTreeStore containing useful information that we can
894  * later convert into a nice XML document or something else.
895  */
896 GtkTreeStore
yelp_info_parser_parse_file(char * file)897 *yelp_info_parser_parse_file (char *file)
898 {
899 	gchar **page_list;
900 	char **ptr;
901 	int pages;
902 	int offset;
903 	GHashTable *offsets2pages = NULL;
904 	GHashTable *pages2nodes = NULL;
905         GHashTable *nodes2pages = NULL;
906 	GHashTable *nodes2iters = NULL;
907 	int *processed_table;
908 	GtkTreeStore *tree;
909 	int pt;
910 
911 	page_list = expanded_info_file (file);
912 	if (!page_list)
913           return NULL;
914 
915 	pages = 0;
916 	offset = 0;
917 
918 	offsets2pages = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
919 					       NULL);
920 	pages2nodes = g_hash_table_new_full (g_direct_hash, g_direct_equal, NULL,
921 					     g_free);
922 
923 	for (ptr = page_list; *ptr != NULL; ptr++)
924 	{
925 	  gchar *name = NULL;
926 
927           g_hash_table_insert (offsets2pages,
928                                g_strdup_printf ("%i", offset),
929                                GINT_TO_POINTER (pages));
930 
931           name = get_value_after (*ptr, "Node: ");
932           if (name)
933             g_hash_table_insert (pages2nodes,
934                                  GINT_TO_POINTER (pages), name);
935 
936           offset += strlen (*ptr);
937           if (pages) offset += 2;
938           pages++;
939 
940           pt = page_type (*ptr);
941           if (pt == PAGE_INDIRECT) {
942             g_warning ("Found an indirect page in a file "
943                        "we thought we'd expanded.");
944           }
945 	}
946 
947         /* Now consolidate (and correct) the two hash tables */
948         nodes2pages = make_nodes2pages (offsets2pages, pages2nodes);
949 
950 	g_hash_table_destroy (offsets2pages);
951         g_hash_table_destroy (pages2nodes);
952 
953 	processed_table = g_malloc0 (pages * sizeof (int));
954 	tree = gtk_tree_store_new (INFO_PARSER_N_COLUMNS, G_TYPE_STRING, G_TYPE_STRING,
955 			G_TYPE_STRING);
956 	nodes2iters = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
957 					     (GDestroyNotify) gtk_tree_iter_free);
958 
959 	for (ptr = page_list; *ptr != NULL; ptr++)
960 	{
961 	  if (page_type (*ptr) != PAGE_NODE) continue;
962 	  process_page (tree, nodes2pages, nodes2iters,
963 			processed_table, page_list, *ptr);
964 	}
965 
966 	g_strfreev (page_list);
967 
968 	g_hash_table_destroy (nodes2iters);
969 	g_hash_table_destroy (nodes2pages);
970 
971 	g_free (processed_table);
972 
973 	return tree;
974 }
975 
976 /* End Part 1 */
977 /* Part 2: Parse Tree into XML */
978 static void
parse_tree_level(GtkTreeStore * tree,xmlNodePtr * node,GtkTreeIter iter)979 parse_tree_level (GtkTreeStore *tree, xmlNodePtr *node, GtkTreeIter iter)
980 {
981     GtkTreeIter children, parent;
982 	xmlNodePtr newnode;
983 
984 	char *page_no = NULL;
985 	char *page_name = NULL;
986 	char *page_content = NULL;
987 	gboolean notes = FALSE;
988 
989 	debug_print (DB_DEBUG, "Decended\n");
990 	do
991 	{
992 		gtk_tree_model_get (GTK_TREE_MODEL (tree), &iter,
993 				INFO_PARSER_COLUMN_PAGE_NO, &page_no,
994 				INFO_PARSER_COLUMN_PAGE_NAME, &page_name,
995 				INFO_PARSER_COLUMN_PAGE_CONTENT, &page_content,
996 				-1);
997 		debug_print (DB_DEBUG, "Got Section: %s\n", page_name);
998 		if (strstr (page_content, "*Note") ||
999 		    strstr (page_content, "*note")) {
1000 		  notes = TRUE;
1001 		}
1002 		if (strstr (page_content, "* Menu:")) {
1003 		  newnode = yelp_info_parse_menu (tree, node, page_content, notes);
1004 		} else {
1005 		  newnode = xmlNewTextChild (*node, NULL,
1006 					     BAD_CAST "Section",
1007 					     NULL);
1008 		  if (!notes)
1009 		    info_body_text (newnode, NULL, NULL, FALSE, page_content);
1010 
1011 		  else {
1012 		    /* Handle notes here */
1013 		    info_process_text_notes (&newnode, page_content, tree);
1014 		  }
1015 		}
1016 		/* if we free the page content, now it's in the XML, we can
1017 		 * save some memory */
1018 		g_free (page_content);
1019 		page_content = NULL;
1020 
1021                 if (gtk_tree_model_iter_parent (GTK_TREE_MODEL (tree), &parent, &iter)) {
1022                     gchar *parent_id;
1023                     gtk_tree_model_get (GTK_TREE_MODEL (tree), &parent,
1024                                         INFO_PARSER_COLUMN_PAGE_NO, &parent_id,
1025                                         -1);
1026                     xmlNewProp (newnode, BAD_CAST "up", BAD_CAST parent_id);
1027                     g_free (parent_id);
1028                 }
1029 
1030 		xmlNewProp (newnode, BAD_CAST "id",
1031 			    BAD_CAST page_no);
1032 		xmlNewProp (newnode, BAD_CAST "name",
1033 			    BAD_CAST page_name);
1034 		if (gtk_tree_model_iter_children (GTK_TREE_MODEL (tree),
1035 				&children,
1036 				&iter))
1037 		  parse_tree_level (tree, &newnode, children);
1038 		g_free (page_no);
1039 		g_free (page_name);
1040 	}
1041 	while (gtk_tree_model_iter_next (GTK_TREE_MODEL (tree), &iter));
1042 	debug_print (DB_DEBUG, "Ascending\n");
1043 }
1044 
1045 xmlDocPtr
yelp_info_parser_parse_tree(GtkTreeStore * tree)1046 yelp_info_parser_parse_tree (GtkTreeStore *tree)
1047 {
1048 	xmlDocPtr doc;
1049 	xmlNodePtr node;
1050 	GtkTreeIter iter;
1051 
1052 	/*
1053 	xmlChar *xmlbuf;
1054 	int bufsiz;
1055 	*/
1056 
1057 	doc = xmlNewDoc (BAD_CAST "1.0");
1058 	node = xmlNewNode (NULL, BAD_CAST "Info");
1059 	xmlDocSetRootElement (doc, node);
1060 
1061 	/* functions I will want:
1062 	gtk_tree_model_get_iter_first;
1063 	gtk_tree_model_iter_next;
1064 	gtk_tree_model_iter_children;
1065 	*/
1066 
1067 	if (gtk_tree_model_get_iter_first (GTK_TREE_MODEL (tree), &iter))
1068 		parse_tree_level (tree, &node, iter);
1069 	d (else debug_print (DB_DEBUG, "Empty tree?\n"));
1070 
1071 	/*
1072 	xmlDocDumpFormatMemory (doc, &xmlbuf, &bufsiz, 1);
1073 	g_print ("XML follows:\n%s\n", xmlbuf);
1074 	*/
1075 
1076 	return doc;
1077 }
1078 
1079 gboolean
resolve_frag_id(GtkTreeModel * model,GtkTreePath * path,GtkTreeIter * iter,gpointer data)1080 resolve_frag_id (GtkTreeModel *model, GtkTreePath *path, GtkTreeIter *iter,
1081 		 gpointer data)
1082 {
1083   gchar *page_no = NULL;
1084   gchar *page_name = NULL;
1085   gchar **xref = data;
1086 
1087   gtk_tree_model_get (GTK_TREE_MODEL (model), iter,
1088 		      INFO_PARSER_COLUMN_PAGE_NO, &page_no,
1089 		      INFO_PARSER_COLUMN_PAGE_NAME, &page_name,
1090 		      -1);
1091   if (g_str_equal (page_name, *xref)) {
1092     g_free (*xref);
1093     *xref = g_strdup (page_name);
1094     *xref = g_strdelimit (*xref, " ", '_');
1095 
1096     g_free (page_name);
1097     g_free (page_no);
1098     return TRUE;
1099   }
1100   g_free (page_name);
1101   g_free (page_no);
1102 
1103   return FALSE;
1104 }
1105 
1106 gboolean
get_menuoptions(gchar * line,gchar ** title,gchar ** ref,gchar ** desc,gchar ** xref)1107 get_menuoptions (gchar *line, gchar **title, gchar **ref, gchar **desc,
1108 		 gchar **xref)
1109 {
1110   /* Since info is actually braindead and allows .s in
1111    * its references, we gotta carefully extract things
1112    * as .s can be in either the title or desc
1113    */
1114   gchar *tmp = line;
1115   gchar *tfind = NULL;
1116 
1117   if (!g_str_has_prefix (line, "* "))
1118     return FALSE;
1119 
1120   tfind = strchr (tmp, ':');
1121 
1122   if (!tfind) /* No : on the line, bail out */
1123     return FALSE;
1124 
1125   (*title) = g_strndup (tmp, tfind-tmp);
1126 
1127   if (tfind[1] == ':') { /* This happens if the title and ref are the same
1128 			 * Most menus are of this type
1129 			 */
1130 
1131     (*ref) = NULL; /* There is no second part.  The rest is description */
1132 
1133     tmp++;
1134     (*xref) = g_strndup (tmp, tfind-tmp);
1135     g_strstrip (*xref);
1136 
1137     tfind+=2;
1138     (*desc) = g_strdup (tfind);
1139   } else { /* The other type of menu option */
1140     gchar *td = NULL;
1141 
1142     tfind++;
1143     td = strchr (tfind, '.');
1144     if (!td)
1145       return FALSE;
1146     (*ref) = g_strndup (tfind, td-tfind);
1147     (*xref) = g_strdup (*ref);
1148     g_strstrip (*xref);
1149 
1150     td++;
1151     (*desc) = g_strdup (td);
1152   }
1153   return TRUE;
1154 }
1155 
1156 /* Find the first non whitespace character in str or return pointer to the
1157  * '\0' if there isn't one. */
1158 static gchar*
first_non_space(gchar * str)1159 first_non_space (gchar* str)
1160 {
1161   /* As long as str is null terminated, this is ok! */
1162   while (g_ascii_isspace (*str)) str++;
1163   return str;
1164 }
1165 
1166 static xmlNodePtr
yelp_info_parse_menu(GtkTreeStore * tree,xmlNodePtr * node,gchar * page_content,gboolean notes)1167 yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
1168 		      gchar *page_content, gboolean notes)
1169 {
1170   gchar **split;
1171   gchar **menuitems;
1172   gchar *tmp = NULL;
1173   xmlNodePtr newnode, menu_node = NULL, mholder = NULL;
1174   int i=0;
1175 
1176   split = g_strsplit (page_content, "* Menu:", 2);
1177 
1178   newnode = xmlNewChild (*node, NULL,
1179 			 BAD_CAST "Section", NULL);
1180 
1181 
1182   if (!notes)
1183     info_body_text (newnode, NULL, NULL, FALSE, split[0]);
1184   else {
1185     info_process_text_notes (&newnode, split[0], tree);
1186   }
1187 
1188   menuitems = g_strsplit (split[1], "\n", -1);
1189   g_strfreev (split);
1190 
1191   /* The output xml should look something like the following:
1192 
1193      <menu>
1194        <menuholder>
1195          <a href="xref:Help-Inv">Help-Inv</a>
1196          <para1>Invisible text in Emacs Info.</para1>
1197        </menuholder>
1198        <menuholder>
1199          <a href="xref:Help-M">Help-M</a>
1200          <para1>Menus.</para1>
1201        </menuholder>
1202        ...
1203      </menu>
1204 
1205      (from the top page of info:info). Note the absence of *'s and
1206      ::'s on the links.
1207 
1208      If there's a line with no "* Blah::", it looks like a child of
1209      the previous menu item so (for i > 0) deal with that correctly by
1210      not "closing" the <menuholder> tag until we find the next
1211      start.
1212   */
1213 
1214   if (menuitems[0] != NULL) {
1215     /* If there are any menu items, make the <menu> node */
1216     menu_node = xmlNewChild (newnode, NULL, BAD_CAST "menu", NULL);
1217   }
1218 
1219   while (menuitems[i] != NULL) {
1220     gboolean menu = FALSE;
1221     gchar *title = NULL;
1222     gchar *ref = NULL;
1223     gchar *desc = NULL;
1224     gchar *xref = NULL;
1225     gchar *link_text = NULL;
1226     xmlNodePtr ref1;
1227 
1228     menu = get_menuoptions (menuitems[i], &title, &ref, &desc, &xref);
1229 
1230     if (menu && (*title == '\0' || *(title + 1) == '\0')) {
1231       g_warning ("Info title unexpectedly short for menu item (%s)",
1232                  menuitems[i]);
1233       menu = FALSE;
1234     }
1235 
1236     if (menu) {
1237       mholder = xmlNewChild (menu_node, NULL, BAD_CAST "menuholder", NULL);
1238       gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &xref);
1239 
1240       if (ref == NULL) { /* A standard type menu */
1241         /* title+2 skips the "* ". We know we haven't jumped over the
1242            end of the string because strlen (title) >= 3 */
1243         link_text = g_strdup (title+2);
1244 
1245         ref1 = xmlNewTextChild (mholder, NULL, BAD_CAST "a",
1246                                 BAD_CAST link_text);
1247 
1248         tmp = g_strconcat ("xref:", xref, NULL);
1249 	xmlNewProp (ref1, BAD_CAST "href", BAD_CAST tmp);
1250         g_free (tmp);
1251       } else { /* Indexy type menu  - we gotta do a  little work to fix the
1252 		* spacing
1253 		*/
1254 	gchar *spacing = ref;
1255 	gint c=0;
1256 	gchar *sp = NULL;
1257 
1258 	while (*spacing ==' ') {
1259 	  c++;
1260 	  spacing++;
1261 	}
1262 	sp = g_strndup (ref, c);
1263 
1264         link_text = g_strdup (title);
1265 
1266 	ref1 = xmlNewTextChild (mholder, NULL, BAD_CAST "a",
1267                                 BAD_CAST link_text);
1268         tmp = g_strconcat ("xref:", xref, NULL);
1269 	xmlNewProp (ref1, BAD_CAST "href", BAD_CAST tmp);
1270         g_free (tmp);
1271 	xmlNewTextChild (mholder, NULL, BAD_CAST "spacing",
1272 			 BAD_CAST sp);
1273 	tmp = g_strconcat (g_strstrip(ref), ".", NULL);
1274 	ref1 = xmlNewTextChild (mholder, NULL, BAD_CAST "a",
1275 				BAD_CAST tmp);
1276 	g_free (tmp);
1277         tmp = g_strconcat ("xref:", xref, NULL);
1278 	xmlNewProp (ref1, BAD_CAST "href", BAD_CAST tmp);
1279 
1280         g_free (tmp);
1281 	g_free (sp);
1282       }
1283 
1284       tmp = g_strconcat ("\n", first_non_space (desc), NULL);
1285 
1286       /*
1287         Don't print the link text a second time, because that looks
1288         really stupid.
1289 
1290         We don't do a straight check for equality because lots of
1291         .info files have something like
1292 
1293           * Foo::    Foo.
1294 
1295         Obviously if the longer explanation has more afterwards, we
1296         don't want to omit it, which is why there's the strlen test.
1297       */
1298       if (strncmp (link_text, tmp + 1, strlen (link_text)) ||
1299           strlen (link_text) + 1 < strlen (tmp + 1)) {
1300         xmlNewTextChild (mholder, NULL,
1301                          BAD_CAST "para1", BAD_CAST tmp);
1302       }
1303 
1304       g_free (tmp);
1305       g_free (link_text);
1306     }
1307     else if (*(menuitems[i]) != '\0') {
1308       tmp = g_strconcat ("\n", first_non_space (menuitems[i]), NULL);
1309       xmlNewTextChild (mholder ? mholder : menu_node,
1310                        NULL, BAD_CAST "para1",
1311 		       BAD_CAST tmp);
1312       g_free (tmp);
1313     }
1314     i++;
1315     g_free (title);
1316     g_free (ref);
1317     g_free (desc);
1318     g_free (xref);
1319 
1320   }
1321   g_strfreev (menuitems);
1322 
1323   return newnode;
1324 }
1325 
1326 void
info_process_text_notes(xmlNodePtr * node,gchar * content,GtkTreeStore * tree)1327 info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
1328 {
1329   gchar **notes;
1330   gchar **current;
1331   xmlNodePtr ref1;
1332   xmlNodePtr paragraph = NULL;
1333   gboolean first = TRUE;
1334 
1335   /*
1336     Split using the regular expression
1337 
1338       \*[Nn]ote(?!_)
1339 
1340     which deals with either case and the last bit is a lookahead so
1341     that we don't split on things of the form *Note:_, which aren't
1342     real notes.
1343   */
1344   notes = g_regex_split_simple ("\\*[Nn]ote(?!_)", content, 0, 0);
1345 
1346   for (current = notes; *current != NULL; current++) {
1347     gchar *url, **urls;
1348     gchar *append;
1349     gchar *alt_append, *alt_append1;
1350     gchar *link_text;
1351     gchar *href = NULL;
1352     gchar *break_point = NULL;
1353     gboolean broken = FALSE;
1354     if (first) {
1355       /* The first node is special.  It doesn't have a note ref at the
1356        * start, so we can just add it and forget about it.
1357        */
1358       first = FALSE;
1359       info_body_text (*node, &paragraph, NULL, TRUE, (*current));
1360       continue;
1361     }
1362 
1363     /* If we got to here, we now gotta parse the note reference */
1364     append = strchr (*current, ':');
1365     if (!append) {
1366       info_body_text (*node, &paragraph, NULL, TRUE, *current);
1367       continue;
1368     }
1369     append++;
1370     alt_append = append;
1371     alt_append1 = alt_append;
1372     append = strchr (append, ':');
1373     alt_append = strchr (alt_append, '.');
1374     if (alt_append && g_str_has_prefix (alt_append, ".info")) {
1375       broken = TRUE;
1376       alt_append++;
1377       alt_append = strchr (alt_append, '.');
1378     }
1379     alt_append1 = strchr (alt_append1, ',');
1380     if (!append && !alt_append && !alt_append1) {
1381       info_body_text (*node, &paragraph, NULL, TRUE, *current);
1382       continue;
1383     }
1384     if (!append || alt_append || alt_append1) {
1385       if (!append) {
1386         if (alt_append) append = alt_append;
1387         else append = alt_append1;
1388       }
1389       if ((alt_append && alt_append < append))
1390         append = alt_append;
1391       if (alt_append1 && alt_append1 < append)
1392         append = alt_append1;
1393     }
1394     append++;
1395     url = g_strndup (*current, append - (*current));
1396 
1397     /* Save a copy of the unadulterated link text for later. */
1398     link_text = g_strconcat ("*Note", url, NULL);
1399 
1400     /* By now, we got 2 things.  First, is append which is the (hopefully)
1401      * non-link text.  Second, we got a url.
1402      * The url can be in several forms:
1403      * 1. linkend::
1404      * 2. linkend:(infofile)Linkend.
1405      * 3. Title: Linkend.
1406      * 4. Title: Linkend, (pretty sure this is just broken)
1407      * 5. Title: (infofile.info)Linkend.
1408      * All possibilities should have been picked up.
1409      * Here:
1410      * Clean up the split.  Should be left with a real url and
1411      * a list of fragments that should be linked
1412      * Also goes through and removes extra spaces, leaving only one
1413      * space in place of many
1414      */
1415     urls = g_strsplit (url, "\n", -1);
1416     break_point = strchr (url, '\n');
1417     while (break_point) {
1418       *break_point = ' ';
1419       break_point = strchr (++break_point, '\n');
1420     }
1421     break_point = strchr (url, ' ');
1422     while (break_point) {
1423       if (*(break_point+1) == ' ') {
1424         /* Massive space.  Fix. */
1425         gchar *next = break_point;
1426         gchar *url_copy;
1427         gchar *old = url;
1428         while (*next == ' ')
1429           next++;
1430         next--;
1431         url_copy = g_strndup (url, break_point-url);
1432         url = g_strconcat (url_copy, next, NULL);
1433         g_free (old);
1434         break_point = strchr (url, ' ');
1435         g_free (url_copy);
1436       } else {
1437         break_point++;
1438         break_point = strchr (break_point, ' ');
1439       }
1440     }
1441     if (url[strlen(url)-1] == '.') { /* The 2nd or 3rd sort of link */
1442       gchar *stop = NULL;
1443       gchar *lurl = NULL;
1444       gchar *zloc = NULL;
1445       stop = strchr (url, ':');
1446       lurl = strchr (stop, '(');
1447       if (!lurl) { /* 3rd type of link */
1448         gchar *link;
1449         gint length;
1450         stop++;
1451         link = g_strdup (stop);
1452         link = g_strstrip (link);
1453         length = strlen (link) - 1;
1454         link[length] = '\0';
1455         href = g_strconcat ("xref:", link, NULL);
1456         link[length] = 'a';
1457         g_free (link);
1458 
1459 
1460       } else { /* 2nd type of link.  Easy. Provided .info is neglected ;) */
1461         if (broken) {
1462           gchar *new_url;
1463           gchar *info;
1464           gchar *stripped;
1465 
1466           new_url = g_strdup (lurl);
1467           info = strstr (new_url, ".info)");
1468           stripped = g_strndup (new_url, info-new_url);
1469           info +=5;
1470           lurl = g_strconcat (stripped, info, NULL);
1471           g_free (stripped);
1472           g_free (new_url);
1473         }
1474         zloc = &(lurl[strlen(lurl)-1]);
1475         *zloc = '\0';
1476         href = g_strconcat ("info:", lurl, NULL);
1477         *zloc = 'a';
1478       }
1479     } else { /* First kind of link */
1480       gchar *tmp1;
1481       gchar *frag;
1482 
1483       tmp1 = strchr (url, ':');
1484       if (!tmp1)
1485         frag = g_strdup (url);
1486       else
1487         frag = g_strndup (url, tmp1 - url);
1488       g_strstrip (frag);
1489       gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &frag);
1490       href = g_strconcat ("xref:", frag, NULL);
1491       g_free (frag);
1492     }
1493 
1494     /* Check we've got a valid paragraph node */
1495     if (!paragraph) {
1496       paragraph = xmlNewChild (*node, NULL, BAD_CAST "para", NULL);
1497     }
1498 
1499     /*
1500       Now we're supposed to actually render the link. I have a list of
1501       bits of URL and actually this is really easy - I want to have
1502       the link *text* exactly the same as it appeared in the .info
1503       file, so don't use the list of strings urls, instead use the
1504       whole lot: url (complete with embedded newlines etc.)
1505     */
1506     ref1 = xmlNewTextChild (paragraph, NULL, BAD_CAST "a",
1507                             BAD_CAST link_text);
1508     g_free (link_text);
1509     xmlNewProp (ref1, BAD_CAST "href", BAD_CAST href);
1510 
1511     g_strfreev (urls);
1512 
1513     /* Finally, we can add the following text as required */
1514     info_body_text (*node, &paragraph, NULL, TRUE, append);
1515 
1516     g_free (url);
1517     g_free (href);
1518   }
1519   g_strfreev (notes);
1520 }
1521