1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil -*- */
2 /*
3 * Copyright (C) 2005 Davyd Madeley <davyd@madeley.id.au>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public
16 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
17 *
18 * Author: Davyd Madeley <davyd@madeley.id.au>
19 */
20
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24
25 #include <glib.h>
26 #include <gtk/gtk.h>
27 #include <string.h>
28
29 #include "yelp-info-parser.h"
30 #include "yelp-magic-decompressor.h"
31 #include "yelp-debug.h"
32
33
34 static GtkTreeIter * find_real_top (GtkTreeModel *model,
35 GtkTreeIter *it);
36 static GtkTreeIter * find_real_sibling (GtkTreeModel *model,
37 GtkTreeIter *it,
38 GtkTreeIter *comp);
39 static xmlNodePtr yelp_info_parse_menu (GtkTreeStore *tree,
40 xmlNodePtr *node,
41 gchar *page_content,
42 gboolean notes);
43 static gboolean get_menuoptions (gchar *line,
44 gchar **title,
45 gchar **ref,
46 gchar **desc,
47 gchar **xref);
48 static gboolean resolve_frag_id (GtkTreeModel *model,
49 GtkTreePath *path,
50 GtkTreeIter *iter,
51 gpointer data);
52 static void info_process_text_notes (xmlNodePtr *node,
53 gchar *content,
54 GtkTreeStore
55 *tree);
56
57 /*
58 Used to output the correct <heading level="?" /> tag.
59 */
60 static const gchar* level_headings[] = { NULL, "1", "2", "3" };
61
62 static GHashTable *
info_image_get_attributes(gchar const * string)63 info_image_get_attributes (gchar const* string)
64 {
65 GMatchInfo *match_info;
66 GRegex *regex;
67 GHashTable *h;
68
69 h = 0;
70 regex = g_regex_new ("([^\\s][^\\s=]+)=(?:([^\\s \"]+)|(?:\"((?:[^\\\"]|\\\\[\\\\\"])*)\"))", 0, 0, NULL);
71 g_regex_match (regex, string, 0, &match_info);
72 while (g_match_info_matches (match_info))
73 {
74 gchar *key;
75 gchar *value;
76
77 if (!h)
78 h = g_hash_table_new (g_str_hash, g_str_equal);
79 key = g_match_info_fetch (match_info, 1);
80 value = g_match_info_fetch (match_info, 2);
81 if (!*value)
82 value = g_match_info_fetch (match_info, 3);
83 g_hash_table_insert (h, key, value);
84 g_match_info_next (match_info, NULL);
85 }
86 g_match_info_free (match_info);
87 g_regex_unref (regex);
88
89 return h;
90 }
91
92 /*
93 info elements look like \0\b[<TAGNAME>\0\b] and take attribute=value
94 pairs, i.e. for image: \0\b[image src="foo.png" \0\b]
95 */
96 #define INFO_TAG_0 "\0"
97 #define INFO_TAG_1 "\b"
98 #define INFO_TAG_OPEN_2 INFO_TAG_1 "["
99 #define INFO_TAG_CLOSE_2 INFO_TAG_1 "]"
100 #define INFO_TAG_OPEN_2_RE INFO_TAG_1 "[[]"
101 #define INFO_TAG_CLOSE_2_RE INFO_TAG_1 "[]]"
102 #define INFO_TAG_OPEN INFO_TAG_0 INFO_TAG_1 INFO_TAG_OPEN_2
103 #define INFO_TAG_CLOSE INFO_TAG_0 INFO_TAG_1 INFO_TAG_CLOSE_2
104 #define INFO_TAG_OPEN_RE INFO_TAG_0 INFO_TAG_1 INFO_TAG_OPEN_2_RE
105 #define INFO_TAG_CLOSE_RE INFO_TAG_0 INFO_TAG_1 INFO_TAG_CLOSE_2_RE
106 /* C/glib * cannot really handle \0 in strings, convert to '@' */
107 #define INFO_C_TAG_0 "@"
108 #define INFO_C_TAG_OPEN INFO_C_TAG_0 INFO_TAG_OPEN_2
109 #define INFO_C_TAG_CLOSE INFO_C_TAG_0 INFO_TAG_CLOSE_2
110 #define INFO_C_TAG_OPEN_RE INFO_C_TAG_0 INFO_TAG_OPEN_2_RE
111 #define INFO_C_TAG_CLOSE_RE INFO_C_TAG_0 INFO_TAG_CLOSE_2_RE
112 #define INFO_C_IMAGE_TAG_OPEN INFO_C_TAG_OPEN "image"
113 #define INFO_C_IMAGE_TAG_OPEN_RE INFO_C_TAG_OPEN_RE "image"
114
115 static xmlNodePtr
info_insert_image(xmlNodePtr parent,GMatchInfo * match_info)116 info_insert_image (xmlNodePtr parent, GMatchInfo *match_info)
117 {
118 gchar *title;
119 gchar *text;
120 gchar *alt;
121 xmlNodePtr img;
122 GHashTable *h = info_image_get_attributes (g_match_info_fetch (match_info, 1));
123 gchar *source;
124 if (h)
125 source = (gchar*)g_hash_table_lookup (h, "src");
126
127 if (!h || !source || !*source)
128 return xmlNewTextChild (parent, NULL, BAD_CAST "para",
129 BAD_CAST "[broken image]");
130
131 title = (gchar*)g_hash_table_lookup (h, "title");
132 text = (gchar*)g_hash_table_lookup (h, "text");
133 alt = (gchar*)g_hash_table_lookup (h, "alt");
134 g_hash_table_destroy (h);
135 img = xmlNewChild (parent, NULL, BAD_CAST "img", NULL);
136 xmlNewProp (img, BAD_CAST "src", BAD_CAST source);
137 xmlNewProp (img, BAD_CAST "title", BAD_CAST (title ? title : ""));
138 xmlNewProp (img, BAD_CAST "text", BAD_CAST (text ? text : ""));
139 xmlNewProp (img, BAD_CAST "alt", BAD_CAST (alt ? alt : ""));
140 g_free (source);
141 g_free (title);
142 g_free (alt);
143 return parent;
144 }
145
146 /*
147 If every element of `str' is `ch' then return TRUE, else FALSE.
148 */
149 static gboolean
string_all_char_p(const gchar * str,gchar ch)150 string_all_char_p (const gchar* str, gchar ch)
151 {
152 for (; *str; str++) {
153 if (*str != ch) return FALSE;
154 }
155 return TRUE;
156 }
157
158 /*
159 If `line' is a line of '*', '=' or '-', return 1,2,3 respectively
160 for the heading level. If it's anything else, return 0.
161 */
162 static int
header_underline_level(const gchar * line)163 header_underline_level (const gchar* line)
164 {
165 if (*line != '*' && *line != '=' && *line != '-')
166 return 0;
167
168 if (string_all_char_p (line, '*')) return 1;
169 if (string_all_char_p (line, '=')) return 2;
170 if (string_all_char_p (line, '-')) return 3;
171
172 return 0;
173 }
174
175 /*
176 Use g_strjoinv to join up the strings from `strings', but they might
177 not actually be a null-terminated array. `end' should be strings+n,
178 where I want the first n strings (strings+0, ..., strings+(n-1)). It
179 shouldn't point outside of the array allocated, but it can point at
180 the null string at the end.
181 */
182 static gchar*
join_strings_subset(const gchar * separator,gchar ** strings,gchar ** end)183 join_strings_subset (const gchar *separator,
184 gchar** strings, gchar** end)
185 {
186 gchar *ptr;
187 gchar *glob;
188
189 g_assert(end > strings);
190
191 ptr = *end;
192 *end = NULL;
193
194 glob = g_strjoinv (separator, strings);
195 *end = ptr;
196 return glob;
197 }
198
199 /*
200 Create a text node, child of `parent', with the lines strictly
201 between `first' and `last'.
202 */
203 static void
lines_subset_text_child(xmlNodePtr parent,xmlNsPtr ns,gchar ** first,gchar ** last)204 lines_subset_text_child (xmlNodePtr parent, xmlNsPtr ns,
205 gchar** first, gchar** last)
206 {
207 /* TODO? Currently we're copying the split strings again, which is
208 less efficient than somehow storing lengths and using a sort of
209 window on `content'. But that's much more difficult, so unless
210 there's a problem, let's go with the stupid approach. */
211 gchar *glob;
212
213 if (last > first) {
214 glob = join_strings_subset ("\n", first, last);
215 xmlAddChild (parent, xmlNewText (BAD_CAST glob));
216 g_free (glob);
217 }
218 }
219
220 /*
221 Convert body text CONTENT to xml nodes. This function is responsible
222 for spotting headings etc and splitting them out correctly.
223
224 paragraph is as described in info_body_text, but cannot be null.
225
226 If `inline_p' is true, end with a <para1> tag. Otherwise, end with a
227 <para> tag.
228
229 TODO: IWBN add a regex match for *Note: here and call the *Note ==>
230 <a href> logic of info_process_text_notes from here.
231 */
232 static void
info_body_parse_text(xmlNodePtr parent,xmlNodePtr * paragraph,xmlNsPtr ns,gboolean inline_p,const gchar * content)233 info_body_parse_text (xmlNodePtr parent, xmlNodePtr *paragraph,
234 xmlNsPtr ns,
235 gboolean inline_p, const gchar *content)
236 {
237 /* The easiest things to spot are headings: they look like a line of
238 * '*','=' or '-', corresponding to heading levels 1,2 or 3. To spot
239 * them, we split content into single lines and work with them. */
240 gchar **lines = g_strsplit (content, "\n", 0);
241 gchar **first = lines, **last = lines;
242 int header_level;
243 xmlNodePtr header_node;
244
245 /* Deal with the possibility that `content' is empty */
246 if (*lines == NULL) {
247 if (!inline_p) {
248 xmlNewTextChild (parent, NULL, BAD_CAST "para", BAD_CAST "");
249 }
250 return;
251 }
252
253 /* Use a pair of pointers, first and last, which point to two lines,
254 * the chunk of the body we're displaying (inclusive) */
255 for (; *last; last++) {
256
257 /* Check for a blank line */
258 if (**last == '\0') {
259 if (last != first) {
260 if (!*paragraph) {
261 *paragraph = xmlNewChild (parent, ns, BAD_CAST "para", NULL);
262 }
263 lines_subset_text_child (*paragraph, ns, first, last);
264 }
265 /* On the next iteration, last==first both pointing at the next
266 line. */
267 first = last+1;
268 *paragraph = NULL;
269
270 continue;
271 }
272
273 /* Check for a header */
274 header_level = header_underline_level (*last);
275 if (header_level) {
276 /* Write out any lines beforehand */
277 lines_subset_text_child (parent, ns, first, last-1);
278 /* Now write out the actual header line */
279 header_node = xmlNewTextChild (parent, ns, BAD_CAST "header",
280 BAD_CAST *(last-1));
281 xmlNewProp (header_node, BAD_CAST "level",
282 BAD_CAST level_headings[header_level]);
283
284 first = last+1;
285 last = first-1;
286 }
287 }
288
289 /* Write out any lines left */
290 if (!*paragraph) {
291 *paragraph = xmlNewChild (parent, ns, BAD_CAST "para", NULL);
292 }
293 lines_subset_text_child (*paragraph, ns, first, last);
294
295 g_strfreev (lines);
296 }
297
298 /*
299 info_body_text is responsible for taking a hunk of the info page's
300 body and turning it into paragraph tags. It searches out images and
301 marks them up properly if necessary.
302
303 parent should be the node in which we're currently storing text and
304 paragraph a pointer to a <para> tag or NULL. At blank lines, we
305 finish with the current para tag and switch to a new one.
306
307 It uses info_body_parse_text to mark up the actual bits of text.
308 */
309 static void
info_body_text(xmlNodePtr parent,xmlNodePtr * paragraph,xmlNsPtr ns,gboolean inline_p,gchar const * content)310 info_body_text (xmlNodePtr parent, xmlNodePtr *paragraph, xmlNsPtr ns,
311 gboolean inline_p, gchar const *content)
312 {
313 xmlNodePtr thepara = NULL;
314 gint content_len;
315 gint pos;
316 GRegex *regex;
317 GMatchInfo *match_info;
318 gchar *after;
319 if (paragraph == NULL) paragraph = &thepara;
320
321 if (!strstr (content, INFO_C_IMAGE_TAG_OPEN)) {
322 info_body_parse_text (parent, paragraph, ns, inline_p, content);
323 return;
324 }
325
326 content_len = strlen (content);
327 pos = 0;
328 regex = g_regex_new ("(" INFO_C_IMAGE_TAG_OPEN_RE "((?:[^" INFO_TAG_1 "]|[^" INFO_C_TAG_0 "]+" INFO_TAG_1 ")*)" INFO_C_TAG_CLOSE_RE ")", 0, 0, NULL);
329
330 g_regex_match (regex, content, 0, &match_info);
331 while (g_match_info_matches (match_info))
332 {
333 gint image_start;
334 gint image_end;
335 gboolean image_found = g_match_info_fetch_pos (match_info, 0,
336 &image_start, &image_end);
337 gchar *before = g_strndup (&content[pos], image_start - pos);
338 pos = image_end + 1;
339 info_body_parse_text (parent, paragraph, NULL, TRUE, before);
340 g_free (before);
341
342 /* End the paragraph that was before */
343 *paragraph = NULL;
344
345 if (image_found)
346 info_insert_image (parent, match_info);
347 g_match_info_next (match_info, NULL);
348 }
349 after = g_strndup (&content[pos], content_len - pos);
350 info_body_parse_text (parent, paragraph, NULL, TRUE, after);
351 g_free (after);
352 }
353
354 /* Part 1: Parse File Into Tree Store */
355
356 enum
357 {
358 PAGE_TAG_TABLE,
359 PAGE_NODE,
360 PAGE_INDIRECT,
361 PAGE_OTHER
362 };
363
364 static int
page_type(char * page)365 page_type (char *page)
366 {
367 if (g_ascii_strncasecmp (page, "Tag Table:\n", 11) == 0)
368 return PAGE_TAG_TABLE;
369 else if (g_ascii_strncasecmp (page, "Indirect:\n", 10) == 0)
370 return PAGE_INDIRECT;
371 else if (g_ascii_strncasecmp (page, "File:", 5) == 0 ||
372 g_ascii_strncasecmp (page, "Node:", 5) == 0)
373 return PAGE_NODE;
374
375 else
376 return PAGE_OTHER;
377 }
378
379 static char
open_info_file(const gchar * file)380 *open_info_file (const gchar *file)
381 {
382 GFile *gfile;
383 GConverter *converter;
384 GFileInputStream *file_stream;
385 GInputStream *stream;
386 gchar buf[1024];
387 gssize bytes;
388 GString *string;
389 gchar *str;
390 gsize i;
391
392 gfile = g_file_new_for_path (file);
393 file_stream = g_file_read (gfile, NULL, NULL);
394 converter = (GConverter *) yelp_magic_decompressor_new ();
395 stream = g_converter_input_stream_new ((GInputStream *) file_stream, converter);
396 string = g_string_new (NULL);
397
398 while ((bytes = g_input_stream_read (stream, buf, 1024, NULL, NULL)) > 0)
399 g_string_append_len (string, buf, bytes);
400
401 g_object_unref (stream);
402
403 str = string->str;
404
405 /* C/glib * cannot really handle \0 in strings, convert. */
406 for (i = 0; i < (string->len - 1); i++)
407 if (str[i] == INFO_TAG_OPEN[0] && str[i+1] == INFO_TAG_OPEN[1])
408 str[i] = INFO_C_TAG_OPEN[0];
409
410 g_string_free (string, FALSE);
411
412 return str;
413 }
414
415 static gchar *
find_info_part(gchar * part_name,const gchar * base)416 find_info_part (gchar *part_name, const gchar *base)
417 {
418 /* New and improved. We now assume that all parts are
419 * in the same subdirectory as the base file. Makes
420 * life much simpler and is (afaict) always true
421 */
422 gchar *path;
423 gchar *tmp;
424 gchar *bzfname, *gzfname, *lzfd, *fname;
425 gchar *uri = NULL;
426 tmp = g_strrstr (base, "/");
427 path = g_strndup (base, tmp-base);
428
429 bzfname = g_strconcat (path, "/", part_name, ".bz2", NULL);
430 gzfname = g_strconcat (path, "/", part_name, ".gz", NULL);
431 lzfd = g_strconcat (path, "/", part_name, ".lzma", NULL);
432 fname = g_strconcat (path, "/", part_name, NULL);
433
434 if (g_file_test (bzfname, G_FILE_TEST_EXISTS))
435 uri = g_strdup (bzfname);
436 else if (g_file_test (gzfname, G_FILE_TEST_EXISTS))
437 uri = g_strdup (gzfname);
438 else if (g_file_test (lzfd, G_FILE_TEST_EXISTS))
439 uri = g_strdup (lzfd);
440 else if (g_file_test (fname, G_FILE_TEST_EXISTS))
441 uri = g_strdup (fname);
442
443 g_free (bzfname);
444 g_free (gzfname);
445 g_free (lzfd);
446 g_free (fname);
447 g_free (path);
448 return uri;
449
450 }
451
452 static char
process_indirect_map(char * page,const gchar * file)453 *process_indirect_map (char *page, const gchar *file)
454 {
455 char **lines;
456 char **ptr;
457 char *composite = NULL;
458 size_t composite_len = 0;
459
460 lines = g_strsplit (page, "\n", 0);
461
462 /*
463 Go backwards down the list so that we allocate composite
464 big enough the first time around.
465 */
466 for (ptr = lines + 1; *ptr != NULL; ptr++);
467 for (ptr--; ptr != lines; ptr--)
468 {
469 char **items;
470 char *filename;
471 char *str;
472 char **pages;
473 gsize offset;
474 gsize plength;
475
476 debug_print (DB_DEBUG, "Line: %s\n", *ptr);
477 items = g_strsplit (*ptr, ": ", 2);
478
479 if (items[0])
480 {
481 filename = find_info_part (items[0], file);
482 str = open_info_file (filename);
483 if (!str) {
484 g_strfreev (items);
485 continue;
486 }
487 pages = g_strsplit (str, "", 2);
488 g_free (str);
489 if (!pages[1]) {
490 g_strfreev (items);
491 g_strfreev (pages);
492 continue;
493 }
494
495 offset = (gsize) atoi (items[1]);
496 plength = strlen(pages[1]);
497
498 debug_print (DB_DEBUG, "Need to make string %s+%i bytes = %i\n",
499 items[1], plength,
500 offset + plength);
501
502 if (!composite) /* not yet created, malloc it */
503 {
504 composite_len = offset + plength;
505 composite = g_malloc (sizeof (char) *
506 (composite_len + 1));
507 memset (composite, '-', composite_len);
508 composite[composite_len] = '\0';
509 }
510
511 /* Because we're going down the list
512 * backwards, plength should always be short
513 * enough to fit in the memory allocated. But
514 * in case something's broken/malicious, we
515 * should check anyway.
516 */
517 if (offset > composite_len)
518 continue;
519 if (plength + offset + 1 > composite_len)
520 plength = composite_len - offset - 1;
521
522 composite[offset] = '';
523 memcpy (composite + offset + 1, pages[1], plength);
524
525 g_free (filename);
526 g_strfreev (pages);
527 }
528
529 g_strfreev (items);
530 }
531
532 g_strfreev (lines);
533
534 return composite;
535 }
536
537 /*
538 Open up the relevant info file and read it all into memory. If there
539 is an indirect table thingy, we resolve that as we go.
540
541 Returns a NULL-terminated list of pointers to pages on success and
542 NULL otherwise.
543 */
544 static gchar**
expanded_info_file(const gchar * file)545 expanded_info_file (const gchar *file)
546 {
547 gchar *slurp = open_info_file (file);
548 gchar **page_list;
549 gchar **page;
550
551 if (!slurp) return NULL;
552
553 /* TODO: There's a lot of copying of bits of memory here. With a bit
554 * more effort we could avoid it. Either we should fix this or
555 * measure the time taken and decide it's irrelevant...
556 *
557 * Note: \x1f\n is ^_\n
558 */
559 page_list = g_strsplit (slurp, "\x1f\n", 0);
560
561 g_free (slurp);
562
563 for (page = page_list; *page != NULL; page++) {
564 if (page_type (*page) == PAGE_INDIRECT) {
565
566 slurp = process_indirect_map (*page, file);
567 g_strfreev (page_list);
568
569 if (!slurp)
570 return NULL;
571
572 page_list = g_strsplit (slurp, "\x1f\n", 0);
573 g_free (slurp);
574 break;
575 }
576 }
577
578 return page_list;
579 }
580
581 /*
582 Look for strings in source by key. For example, we extract "blah"
583 from "Node: blah," when the key is "Node: ". To know when to stop,
584 there are two strings: end and cancel.
585
586 If we find a character from end first, return a copy of the string
587 up to (not including) that character. If we find a character of
588 cancel first, return NULL. If we find neither, return the rest of
589 the string.
590
591 cancel can be NULL, in which case, we don't do its test.
592 */
593 static char*
get_value_after_ext(const char * source,const char * key,const char * end,const char * cancel)594 get_value_after_ext (const char *source, const char *key,
595 const char *end, const char *cancel)
596 {
597 char *start;
598 size_t not_end, not_cancel;
599
600 start = strstr (source, key);
601 if (!start) return NULL;
602
603 start += strlen (key);
604
605 not_end = strcspn (start, end);
606 not_cancel = (cancel) ? strcspn (start, cancel) : not_end + 1;
607
608 if (not_cancel < not_end)
609 return NULL;
610
611 return g_strndup (start, not_end);
612 }
613
614 static char*
get_value_after(const char * source,const char * key)615 get_value_after (const char* source, const char *key)
616 {
617 return get_value_after_ext (source, key, ",", "\n\x7f");
618 }
619
620 static int
node2page(GHashTable * nodes2pages,char * node)621 node2page (GHashTable *nodes2pages, char *node)
622 {
623 gpointer p;
624
625 if (g_hash_table_lookup_extended (nodes2pages, node,
626 NULL, &p))
627 return GPOINTER_TO_INT(p);
628
629 /* This shouldn't happen: we should only ever have to look up pages
630 * that exist. */
631 g_return_val_if_reached (0);
632 }
633
634 static GtkTreeIter
node2iter(GHashTable * nodes2iters,char * node)635 *node2iter (GHashTable *nodes2iters, char *node)
636 {
637 GtkTreeIter *iter;
638
639 iter = g_hash_table_lookup (nodes2iters, node);
640 d (if (!iter) debug_print (DB_WARN, "Could not retrieve iter for node !%s!\n", node));
641 return iter;
642 }
643
644 GtkTreeIter
find_real_top(GtkTreeModel * model,GtkTreeIter * it)645 *find_real_top (GtkTreeModel *model, GtkTreeIter *it)
646 {
647 GtkTreeIter *r = NULL;
648 GtkTreeIter *tmp = NULL;
649
650 if (!it)
651 return NULL;
652
653 r = gtk_tree_iter_copy (it);
654 tmp = g_malloc0 (sizeof (GtkTreeIter));
655 while (gtk_tree_model_iter_parent (model, tmp, r)) {
656 gtk_tree_iter_free (r);
657 r = gtk_tree_iter_copy (tmp);
658 }
659 g_free (tmp);
660
661 return r;
662 }
663
find_real_sibling(GtkTreeModel * model,GtkTreeIter * it,GtkTreeIter * comp)664 GtkTreeIter * find_real_sibling (GtkTreeModel *model,
665 GtkTreeIter *it, GtkTreeIter *comp)
666 {
667 GtkTreeIter *r;
668 GtkTreeIter *tmp = NULL;
669 gboolean result = FALSE;
670 gchar *title;
671 gchar *reftitle;
672
673 if (!it) {
674 return NULL;
675 }
676
677 r = gtk_tree_iter_copy (it);
678 tmp = gtk_tree_iter_copy (it);
679
680 reftitle = gtk_tree_model_get_string_from_iter (model, comp);
681
682 result = gtk_tree_model_iter_parent (model, r, it);
683 if (!result)
684 return it;
685
686 title = gtk_tree_model_get_string_from_iter (model, r);
687
688 while (!g_str_equal (title, reftitle) && result) {
689 gtk_tree_iter_free (tmp);
690 tmp = gtk_tree_iter_copy (r);
691 result = gtk_tree_model_iter_parent (model, r, tmp);
692 if (result)
693 title = gtk_tree_model_get_string_from_iter (model, r);
694 }
695
696 if (!g_str_equal (title, reftitle))
697 {
698 gtk_tree_iter_free (tmp);
699 tmp = NULL;
700 }
701
702 gtk_tree_iter_free (r);
703 g_free (title);
704 g_free (reftitle);
705 return tmp;
706
707 }
708
709 static void
process_page(GtkTreeStore * tree,GHashTable * nodes2pages,GHashTable * nodes2iters,int * processed_table,char ** page_list,char * page_text)710 process_page (GtkTreeStore *tree,
711 GHashTable *nodes2pages, GHashTable *nodes2iters,
712 int *processed_table, char **page_list, char *page_text)
713 {
714 GtkTreeIter *iter;
715
716 char **parts;
717 char *node;
718 char *up;
719 char *prev;
720 char *next;
721 gchar *tmp;
722
723 int page;
724
725 /* split out the header line and the text */
726 parts = g_strsplit (page_text, "\n", 3);
727
728 node = get_value_after (parts[0], "Node: ");
729 up = get_value_after (parts[0], "Up: ");
730 prev = get_value_after (parts[0], "Prev: ");
731 next = get_value_after (parts[0], "Next: ");
732
733 if (next && g_str_equal (next, "Top")) {
734 g_free (next);
735 next = NULL;
736 }
737 if (node && g_str_equal (node, "Top") && prev != NULL) {
738 g_free (prev);
739 prev = NULL;
740 }
741
742 /* check to see if this page has been processed already */
743 page = node2page (nodes2pages, node);
744 if (processed_table[page]) {
745 return;
746 }
747 processed_table[page] = 1;
748
749 debug_print (DB_DEBUG, "-- Processing Page %s\n\tParent: %s\n", node, up);
750
751 iter = g_slice_alloc0 (sizeof (GtkTreeIter));
752 /* check to see if we need to process our parent and siblings */
753 if (up && g_ascii_strncasecmp (up, "(dir)", 5) && strcmp (up, "Top"))
754 {
755 page = node2page (nodes2pages, up);
756 if (!processed_table[page])
757 {
758 debug_print (DB_DEBUG, "%% Processing Node %s\n", up);
759 process_page (tree, nodes2pages,
760 nodes2iters, processed_table, page_list,
761 page_list[page]);
762 }
763 }
764 if (prev && g_ascii_strncasecmp (prev, "(dir)", 5))
765 {
766 if (node && strncmp (node, "Top", 3)) {
767 /* Special case the Top node to always appear first */
768 } else {
769 page = node2page (nodes2pages, prev);
770 if (!processed_table[page])
771 {
772 debug_print (DB_DEBUG, "%% Processing Node %s\n", prev);
773 process_page (tree, nodes2pages,
774 nodes2iters, processed_table, page_list,
775 page_list[page]);
776 }
777 }
778 }
779
780 /* by this point our parent and older sibling should be processed */
781 if (!up || !g_ascii_strcasecmp (up, "(dir)"))
782 {
783 debug_print (DB_DEBUG, "\t> no parent\n");
784 if (!prev || !g_ascii_strcasecmp (prev, "(dir)"))
785 {
786 debug_print (DB_DEBUG, "\t> no previous\n");
787 gtk_tree_store_append (tree, iter, NULL);
788 }
789 else if (prev) {
790 GtkTreeIter *real;
791 real = find_real_top (GTK_TREE_MODEL (tree),
792 node2iter (nodes2iters, prev));
793 if (real) {
794 gtk_tree_store_insert_after (tree, iter, NULL,
795 real);
796 gtk_tree_iter_free (real);
797 }
798 else
799 gtk_tree_store_append (tree, iter, NULL);
800 }
801 }
802 else if (!prev || !g_ascii_strcasecmp (prev, "(dir)") || !strcmp (prev, up))
803 {
804 debug_print (DB_DEBUG, "\t> no previous\n");
805 gtk_tree_store_append (tree, iter,
806 node2iter (nodes2iters, up));
807 }
808 else if (up && prev)
809 {
810 GtkTreeIter *upit = node2iter (nodes2iters, up);
811 GtkTreeIter *previt = node2iter (nodes2iters, prev);
812 GtkTreeIter *nit = NULL;
813 debug_print (DB_DEBUG, "+++ Parent: %s Previous: %s\n", up, prev);
814
815 d (if (upit) debug_print (DB_DEBUG, "++++ Have parent node!\n"));
816 d (if (previt) debug_print (DB_DEBUG, "++++ Have previous node!\n"));
817 nit = find_real_sibling (GTK_TREE_MODEL (tree), previt, upit);
818 if (nit) {
819 gtk_tree_store_insert_after (tree, iter,
820 upit,
821 nit);
822 gtk_tree_iter_free (nit);
823 }
824 else
825 gtk_tree_store_append (tree, iter, upit);
826 }
827 else
828 {
829 debug_print (DB_DEBUG, "# node %s was not put in tree\n", node);
830 return;
831 }
832
833 d (if (iter) debug_print (DB_DEBUG, "Have a valid iter, storing for %s\n", node));
834
835 g_hash_table_insert (nodes2iters, g_strdup (node), iter);
836 debug_print (DB_DEBUG, "size: %i\n", g_hash_table_size (nodes2iters));
837
838 /*tmp = g_strdup_printf ("%i",
839 node2page (nodes2pages, node));*/
840 tmp = g_strdup (node);
841 tmp = g_strdelimit (tmp, " ", '_');
842 gtk_tree_store_set (tree, iter,
843 INFO_PARSER_COLUMN_PAGE_NO, tmp,
844 INFO_PARSER_COLUMN_PAGE_NAME, node,
845 INFO_PARSER_COLUMN_PAGE_CONTENT, parts[2],
846 -1);
847
848 g_free (tmp);
849 g_free (node);
850 g_free (up);
851 g_free (prev);
852 g_free (next);
853 g_strfreev (parts);
854 }
855
856 struct TagTableFix {
857 GHashTable *nodes2pages; /* Build this... */
858 GHashTable *pages2nodes; /* ... using this. */
859 };
860
861 static void
use_offset2page(gpointer o,gpointer p,gpointer ud)862 use_offset2page (gpointer o, gpointer p, gpointer ud)
863 {
864 struct TagTableFix* ttf = (struct TagTableFix*)ud;
865
866 const gchar* node = g_hash_table_lookup (ttf->pages2nodes, p);
867 if (node) {
868 g_hash_table_insert (ttf->nodes2pages, g_strdup (node), p);
869 }
870 }
871
872 /*
873 We had a nodes2offsets hash table, but sometimes these things
874 lie. How terribly rude. Anyway, use offsets2pages and pages2nodes
875 (and injectivity!) to construct the nodes2pages hash table.
876 */
877 static GHashTable *
make_nodes2pages(GHashTable * offsets2pages,GHashTable * pages2nodes)878 make_nodes2pages (GHashTable* offsets2pages,
879 GHashTable* pages2nodes)
880 {
881 struct TagTableFix ttf;
882
883 ttf.nodes2pages =
884 g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
885 ttf.pages2nodes = pages2nodes;
886
887 g_hash_table_foreach (offsets2pages, use_offset2page, &ttf);
888
889 return ttf.nodes2pages;
890 }
891
892 /**
893 * Parse file into a GtkTreeStore containing useful information that we can
894 * later convert into a nice XML document or something else.
895 */
896 GtkTreeStore
yelp_info_parser_parse_file(char * file)897 *yelp_info_parser_parse_file (char *file)
898 {
899 gchar **page_list;
900 char **ptr;
901 int pages;
902 int offset;
903 GHashTable *offsets2pages = NULL;
904 GHashTable *pages2nodes = NULL;
905 GHashTable *nodes2pages = NULL;
906 GHashTable *nodes2iters = NULL;
907 int *processed_table;
908 GtkTreeStore *tree;
909 int pt;
910
911 page_list = expanded_info_file (file);
912 if (!page_list)
913 return NULL;
914
915 pages = 0;
916 offset = 0;
917
918 offsets2pages = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
919 NULL);
920 pages2nodes = g_hash_table_new_full (g_direct_hash, g_direct_equal, NULL,
921 g_free);
922
923 for (ptr = page_list; *ptr != NULL; ptr++)
924 {
925 gchar *name = NULL;
926
927 g_hash_table_insert (offsets2pages,
928 g_strdup_printf ("%i", offset),
929 GINT_TO_POINTER (pages));
930
931 name = get_value_after (*ptr, "Node: ");
932 if (name)
933 g_hash_table_insert (pages2nodes,
934 GINT_TO_POINTER (pages), name);
935
936 offset += strlen (*ptr);
937 if (pages) offset += 2;
938 pages++;
939
940 pt = page_type (*ptr);
941 if (pt == PAGE_INDIRECT) {
942 g_warning ("Found an indirect page in a file "
943 "we thought we'd expanded.");
944 }
945 }
946
947 /* Now consolidate (and correct) the two hash tables */
948 nodes2pages = make_nodes2pages (offsets2pages, pages2nodes);
949
950 g_hash_table_destroy (offsets2pages);
951 g_hash_table_destroy (pages2nodes);
952
953 processed_table = g_malloc0 (pages * sizeof (int));
954 tree = gtk_tree_store_new (INFO_PARSER_N_COLUMNS, G_TYPE_STRING, G_TYPE_STRING,
955 G_TYPE_STRING);
956 nodes2iters = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
957 (GDestroyNotify) gtk_tree_iter_free);
958
959 for (ptr = page_list; *ptr != NULL; ptr++)
960 {
961 if (page_type (*ptr) != PAGE_NODE) continue;
962 process_page (tree, nodes2pages, nodes2iters,
963 processed_table, page_list, *ptr);
964 }
965
966 g_strfreev (page_list);
967
968 g_hash_table_destroy (nodes2iters);
969 g_hash_table_destroy (nodes2pages);
970
971 g_free (processed_table);
972
973 return tree;
974 }
975
976 /* End Part 1 */
977 /* Part 2: Parse Tree into XML */
978 static void
parse_tree_level(GtkTreeStore * tree,xmlNodePtr * node,GtkTreeIter iter)979 parse_tree_level (GtkTreeStore *tree, xmlNodePtr *node, GtkTreeIter iter)
980 {
981 GtkTreeIter children, parent;
982 xmlNodePtr newnode;
983
984 char *page_no = NULL;
985 char *page_name = NULL;
986 char *page_content = NULL;
987 gboolean notes = FALSE;
988
989 debug_print (DB_DEBUG, "Decended\n");
990 do
991 {
992 gtk_tree_model_get (GTK_TREE_MODEL (tree), &iter,
993 INFO_PARSER_COLUMN_PAGE_NO, &page_no,
994 INFO_PARSER_COLUMN_PAGE_NAME, &page_name,
995 INFO_PARSER_COLUMN_PAGE_CONTENT, &page_content,
996 -1);
997 debug_print (DB_DEBUG, "Got Section: %s\n", page_name);
998 if (strstr (page_content, "*Note") ||
999 strstr (page_content, "*note")) {
1000 notes = TRUE;
1001 }
1002 if (strstr (page_content, "* Menu:")) {
1003 newnode = yelp_info_parse_menu (tree, node, page_content, notes);
1004 } else {
1005 newnode = xmlNewTextChild (*node, NULL,
1006 BAD_CAST "Section",
1007 NULL);
1008 if (!notes)
1009 info_body_text (newnode, NULL, NULL, FALSE, page_content);
1010
1011 else {
1012 /* Handle notes here */
1013 info_process_text_notes (&newnode, page_content, tree);
1014 }
1015 }
1016 /* if we free the page content, now it's in the XML, we can
1017 * save some memory */
1018 g_free (page_content);
1019 page_content = NULL;
1020
1021 if (gtk_tree_model_iter_parent (GTK_TREE_MODEL (tree), &parent, &iter)) {
1022 gchar *parent_id;
1023 gtk_tree_model_get (GTK_TREE_MODEL (tree), &parent,
1024 INFO_PARSER_COLUMN_PAGE_NO, &parent_id,
1025 -1);
1026 xmlNewProp (newnode, BAD_CAST "up", BAD_CAST parent_id);
1027 g_free (parent_id);
1028 }
1029
1030 xmlNewProp (newnode, BAD_CAST "id",
1031 BAD_CAST page_no);
1032 xmlNewProp (newnode, BAD_CAST "name",
1033 BAD_CAST page_name);
1034 if (gtk_tree_model_iter_children (GTK_TREE_MODEL (tree),
1035 &children,
1036 &iter))
1037 parse_tree_level (tree, &newnode, children);
1038 g_free (page_no);
1039 g_free (page_name);
1040 }
1041 while (gtk_tree_model_iter_next (GTK_TREE_MODEL (tree), &iter));
1042 debug_print (DB_DEBUG, "Ascending\n");
1043 }
1044
1045 xmlDocPtr
yelp_info_parser_parse_tree(GtkTreeStore * tree)1046 yelp_info_parser_parse_tree (GtkTreeStore *tree)
1047 {
1048 xmlDocPtr doc;
1049 xmlNodePtr node;
1050 GtkTreeIter iter;
1051
1052 /*
1053 xmlChar *xmlbuf;
1054 int bufsiz;
1055 */
1056
1057 doc = xmlNewDoc (BAD_CAST "1.0");
1058 node = xmlNewNode (NULL, BAD_CAST "Info");
1059 xmlDocSetRootElement (doc, node);
1060
1061 /* functions I will want:
1062 gtk_tree_model_get_iter_first;
1063 gtk_tree_model_iter_next;
1064 gtk_tree_model_iter_children;
1065 */
1066
1067 if (gtk_tree_model_get_iter_first (GTK_TREE_MODEL (tree), &iter))
1068 parse_tree_level (tree, &node, iter);
1069 d (else debug_print (DB_DEBUG, "Empty tree?\n"));
1070
1071 /*
1072 xmlDocDumpFormatMemory (doc, &xmlbuf, &bufsiz, 1);
1073 g_print ("XML follows:\n%s\n", xmlbuf);
1074 */
1075
1076 return doc;
1077 }
1078
1079 gboolean
resolve_frag_id(GtkTreeModel * model,GtkTreePath * path,GtkTreeIter * iter,gpointer data)1080 resolve_frag_id (GtkTreeModel *model, GtkTreePath *path, GtkTreeIter *iter,
1081 gpointer data)
1082 {
1083 gchar *page_no = NULL;
1084 gchar *page_name = NULL;
1085 gchar **xref = data;
1086
1087 gtk_tree_model_get (GTK_TREE_MODEL (model), iter,
1088 INFO_PARSER_COLUMN_PAGE_NO, &page_no,
1089 INFO_PARSER_COLUMN_PAGE_NAME, &page_name,
1090 -1);
1091 if (g_str_equal (page_name, *xref)) {
1092 g_free (*xref);
1093 *xref = g_strdup (page_name);
1094 *xref = g_strdelimit (*xref, " ", '_');
1095
1096 g_free (page_name);
1097 g_free (page_no);
1098 return TRUE;
1099 }
1100 g_free (page_name);
1101 g_free (page_no);
1102
1103 return FALSE;
1104 }
1105
1106 gboolean
get_menuoptions(gchar * line,gchar ** title,gchar ** ref,gchar ** desc,gchar ** xref)1107 get_menuoptions (gchar *line, gchar **title, gchar **ref, gchar **desc,
1108 gchar **xref)
1109 {
1110 /* Since info is actually braindead and allows .s in
1111 * its references, we gotta carefully extract things
1112 * as .s can be in either the title or desc
1113 */
1114 gchar *tmp = line;
1115 gchar *tfind = NULL;
1116
1117 if (!g_str_has_prefix (line, "* "))
1118 return FALSE;
1119
1120 tfind = strchr (tmp, ':');
1121
1122 if (!tfind) /* No : on the line, bail out */
1123 return FALSE;
1124
1125 (*title) = g_strndup (tmp, tfind-tmp);
1126
1127 if (tfind[1] == ':') { /* This happens if the title and ref are the same
1128 * Most menus are of this type
1129 */
1130
1131 (*ref) = NULL; /* There is no second part. The rest is description */
1132
1133 tmp++;
1134 (*xref) = g_strndup (tmp, tfind-tmp);
1135 g_strstrip (*xref);
1136
1137 tfind+=2;
1138 (*desc) = g_strdup (tfind);
1139 } else { /* The other type of menu option */
1140 gchar *td = NULL;
1141
1142 tfind++;
1143 td = strchr (tfind, '.');
1144 if (!td)
1145 return FALSE;
1146 (*ref) = g_strndup (tfind, td-tfind);
1147 (*xref) = g_strdup (*ref);
1148 g_strstrip (*xref);
1149
1150 td++;
1151 (*desc) = g_strdup (td);
1152 }
1153 return TRUE;
1154 }
1155
1156 /* Find the first non whitespace character in str or return pointer to the
1157 * '\0' if there isn't one. */
1158 static gchar*
first_non_space(gchar * str)1159 first_non_space (gchar* str)
1160 {
1161 /* As long as str is null terminated, this is ok! */
1162 while (g_ascii_isspace (*str)) str++;
1163 return str;
1164 }
1165
1166 static xmlNodePtr
yelp_info_parse_menu(GtkTreeStore * tree,xmlNodePtr * node,gchar * page_content,gboolean notes)1167 yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
1168 gchar *page_content, gboolean notes)
1169 {
1170 gchar **split;
1171 gchar **menuitems;
1172 gchar *tmp = NULL;
1173 xmlNodePtr newnode, menu_node = NULL, mholder = NULL;
1174 int i=0;
1175
1176 split = g_strsplit (page_content, "* Menu:", 2);
1177
1178 newnode = xmlNewChild (*node, NULL,
1179 BAD_CAST "Section", NULL);
1180
1181
1182 if (!notes)
1183 info_body_text (newnode, NULL, NULL, FALSE, split[0]);
1184 else {
1185 info_process_text_notes (&newnode, split[0], tree);
1186 }
1187
1188 menuitems = g_strsplit (split[1], "\n", -1);
1189 g_strfreev (split);
1190
1191 /* The output xml should look something like the following:
1192
1193 <menu>
1194 <menuholder>
1195 <a href="xref:Help-Inv">Help-Inv</a>
1196 <para1>Invisible text in Emacs Info.</para1>
1197 </menuholder>
1198 <menuholder>
1199 <a href="xref:Help-M">Help-M</a>
1200 <para1>Menus.</para1>
1201 </menuholder>
1202 ...
1203 </menu>
1204
1205 (from the top page of info:info). Note the absence of *'s and
1206 ::'s on the links.
1207
1208 If there's a line with no "* Blah::", it looks like a child of
1209 the previous menu item so (for i > 0) deal with that correctly by
1210 not "closing" the <menuholder> tag until we find the next
1211 start.
1212 */
1213
1214 if (menuitems[0] != NULL) {
1215 /* If there are any menu items, make the <menu> node */
1216 menu_node = xmlNewChild (newnode, NULL, BAD_CAST "menu", NULL);
1217 }
1218
1219 while (menuitems[i] != NULL) {
1220 gboolean menu = FALSE;
1221 gchar *title = NULL;
1222 gchar *ref = NULL;
1223 gchar *desc = NULL;
1224 gchar *xref = NULL;
1225 gchar *link_text = NULL;
1226 xmlNodePtr ref1;
1227
1228 menu = get_menuoptions (menuitems[i], &title, &ref, &desc, &xref);
1229
1230 if (menu && (*title == '\0' || *(title + 1) == '\0')) {
1231 g_warning ("Info title unexpectedly short for menu item (%s)",
1232 menuitems[i]);
1233 menu = FALSE;
1234 }
1235
1236 if (menu) {
1237 mholder = xmlNewChild (menu_node, NULL, BAD_CAST "menuholder", NULL);
1238 gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &xref);
1239
1240 if (ref == NULL) { /* A standard type menu */
1241 /* title+2 skips the "* ". We know we haven't jumped over the
1242 end of the string because strlen (title) >= 3 */
1243 link_text = g_strdup (title+2);
1244
1245 ref1 = xmlNewTextChild (mholder, NULL, BAD_CAST "a",
1246 BAD_CAST link_text);
1247
1248 tmp = g_strconcat ("xref:", xref, NULL);
1249 xmlNewProp (ref1, BAD_CAST "href", BAD_CAST tmp);
1250 g_free (tmp);
1251 } else { /* Indexy type menu - we gotta do a little work to fix the
1252 * spacing
1253 */
1254 gchar *spacing = ref;
1255 gint c=0;
1256 gchar *sp = NULL;
1257
1258 while (*spacing ==' ') {
1259 c++;
1260 spacing++;
1261 }
1262 sp = g_strndup (ref, c);
1263
1264 link_text = g_strdup (title);
1265
1266 ref1 = xmlNewTextChild (mholder, NULL, BAD_CAST "a",
1267 BAD_CAST link_text);
1268 tmp = g_strconcat ("xref:", xref, NULL);
1269 xmlNewProp (ref1, BAD_CAST "href", BAD_CAST tmp);
1270 g_free (tmp);
1271 xmlNewTextChild (mholder, NULL, BAD_CAST "spacing",
1272 BAD_CAST sp);
1273 tmp = g_strconcat (g_strstrip(ref), ".", NULL);
1274 ref1 = xmlNewTextChild (mholder, NULL, BAD_CAST "a",
1275 BAD_CAST tmp);
1276 g_free (tmp);
1277 tmp = g_strconcat ("xref:", xref, NULL);
1278 xmlNewProp (ref1, BAD_CAST "href", BAD_CAST tmp);
1279
1280 g_free (tmp);
1281 g_free (sp);
1282 }
1283
1284 tmp = g_strconcat ("\n", first_non_space (desc), NULL);
1285
1286 /*
1287 Don't print the link text a second time, because that looks
1288 really stupid.
1289
1290 We don't do a straight check for equality because lots of
1291 .info files have something like
1292
1293 * Foo:: Foo.
1294
1295 Obviously if the longer explanation has more afterwards, we
1296 don't want to omit it, which is why there's the strlen test.
1297 */
1298 if (strncmp (link_text, tmp + 1, strlen (link_text)) ||
1299 strlen (link_text) + 1 < strlen (tmp + 1)) {
1300 xmlNewTextChild (mholder, NULL,
1301 BAD_CAST "para1", BAD_CAST tmp);
1302 }
1303
1304 g_free (tmp);
1305 g_free (link_text);
1306 }
1307 else if (*(menuitems[i]) != '\0') {
1308 tmp = g_strconcat ("\n", first_non_space (menuitems[i]), NULL);
1309 xmlNewTextChild (mholder ? mholder : menu_node,
1310 NULL, BAD_CAST "para1",
1311 BAD_CAST tmp);
1312 g_free (tmp);
1313 }
1314 i++;
1315 g_free (title);
1316 g_free (ref);
1317 g_free (desc);
1318 g_free (xref);
1319
1320 }
1321 g_strfreev (menuitems);
1322
1323 return newnode;
1324 }
1325
1326 void
info_process_text_notes(xmlNodePtr * node,gchar * content,GtkTreeStore * tree)1327 info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
1328 {
1329 gchar **notes;
1330 gchar **current;
1331 xmlNodePtr ref1;
1332 xmlNodePtr paragraph = NULL;
1333 gboolean first = TRUE;
1334
1335 /*
1336 Split using the regular expression
1337
1338 \*[Nn]ote(?!_)
1339
1340 which deals with either case and the last bit is a lookahead so
1341 that we don't split on things of the form *Note:_, which aren't
1342 real notes.
1343 */
1344 notes = g_regex_split_simple ("\\*[Nn]ote(?!_)", content, 0, 0);
1345
1346 for (current = notes; *current != NULL; current++) {
1347 gchar *url, **urls;
1348 gchar *append;
1349 gchar *alt_append, *alt_append1;
1350 gchar *link_text;
1351 gchar *href = NULL;
1352 gchar *break_point = NULL;
1353 gboolean broken = FALSE;
1354 if (first) {
1355 /* The first node is special. It doesn't have a note ref at the
1356 * start, so we can just add it and forget about it.
1357 */
1358 first = FALSE;
1359 info_body_text (*node, ¶graph, NULL, TRUE, (*current));
1360 continue;
1361 }
1362
1363 /* If we got to here, we now gotta parse the note reference */
1364 append = strchr (*current, ':');
1365 if (!append) {
1366 info_body_text (*node, ¶graph, NULL, TRUE, *current);
1367 continue;
1368 }
1369 append++;
1370 alt_append = append;
1371 alt_append1 = alt_append;
1372 append = strchr (append, ':');
1373 alt_append = strchr (alt_append, '.');
1374 if (alt_append && g_str_has_prefix (alt_append, ".info")) {
1375 broken = TRUE;
1376 alt_append++;
1377 alt_append = strchr (alt_append, '.');
1378 }
1379 alt_append1 = strchr (alt_append1, ',');
1380 if (!append && !alt_append && !alt_append1) {
1381 info_body_text (*node, ¶graph, NULL, TRUE, *current);
1382 continue;
1383 }
1384 if (!append || alt_append || alt_append1) {
1385 if (!append) {
1386 if (alt_append) append = alt_append;
1387 else append = alt_append1;
1388 }
1389 if ((alt_append && alt_append < append))
1390 append = alt_append;
1391 if (alt_append1 && alt_append1 < append)
1392 append = alt_append1;
1393 }
1394 append++;
1395 url = g_strndup (*current, append - (*current));
1396
1397 /* Save a copy of the unadulterated link text for later. */
1398 link_text = g_strconcat ("*Note", url, NULL);
1399
1400 /* By now, we got 2 things. First, is append which is the (hopefully)
1401 * non-link text. Second, we got a url.
1402 * The url can be in several forms:
1403 * 1. linkend::
1404 * 2. linkend:(infofile)Linkend.
1405 * 3. Title: Linkend.
1406 * 4. Title: Linkend, (pretty sure this is just broken)
1407 * 5. Title: (infofile.info)Linkend.
1408 * All possibilities should have been picked up.
1409 * Here:
1410 * Clean up the split. Should be left with a real url and
1411 * a list of fragments that should be linked
1412 * Also goes through and removes extra spaces, leaving only one
1413 * space in place of many
1414 */
1415 urls = g_strsplit (url, "\n", -1);
1416 break_point = strchr (url, '\n');
1417 while (break_point) {
1418 *break_point = ' ';
1419 break_point = strchr (++break_point, '\n');
1420 }
1421 break_point = strchr (url, ' ');
1422 while (break_point) {
1423 if (*(break_point+1) == ' ') {
1424 /* Massive space. Fix. */
1425 gchar *next = break_point;
1426 gchar *url_copy;
1427 gchar *old = url;
1428 while (*next == ' ')
1429 next++;
1430 next--;
1431 url_copy = g_strndup (url, break_point-url);
1432 url = g_strconcat (url_copy, next, NULL);
1433 g_free (old);
1434 break_point = strchr (url, ' ');
1435 g_free (url_copy);
1436 } else {
1437 break_point++;
1438 break_point = strchr (break_point, ' ');
1439 }
1440 }
1441 if (url[strlen(url)-1] == '.') { /* The 2nd or 3rd sort of link */
1442 gchar *stop = NULL;
1443 gchar *lurl = NULL;
1444 gchar *zloc = NULL;
1445 stop = strchr (url, ':');
1446 lurl = strchr (stop, '(');
1447 if (!lurl) { /* 3rd type of link */
1448 gchar *link;
1449 gint length;
1450 stop++;
1451 link = g_strdup (stop);
1452 link = g_strstrip (link);
1453 length = strlen (link) - 1;
1454 link[length] = '\0';
1455 href = g_strconcat ("xref:", link, NULL);
1456 link[length] = 'a';
1457 g_free (link);
1458
1459
1460 } else { /* 2nd type of link. Easy. Provided .info is neglected ;) */
1461 if (broken) {
1462 gchar *new_url;
1463 gchar *info;
1464 gchar *stripped;
1465
1466 new_url = g_strdup (lurl);
1467 info = strstr (new_url, ".info)");
1468 stripped = g_strndup (new_url, info-new_url);
1469 info +=5;
1470 lurl = g_strconcat (stripped, info, NULL);
1471 g_free (stripped);
1472 g_free (new_url);
1473 }
1474 zloc = &(lurl[strlen(lurl)-1]);
1475 *zloc = '\0';
1476 href = g_strconcat ("info:", lurl, NULL);
1477 *zloc = 'a';
1478 }
1479 } else { /* First kind of link */
1480 gchar *tmp1;
1481 gchar *frag;
1482
1483 tmp1 = strchr (url, ':');
1484 if (!tmp1)
1485 frag = g_strdup (url);
1486 else
1487 frag = g_strndup (url, tmp1 - url);
1488 g_strstrip (frag);
1489 gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &frag);
1490 href = g_strconcat ("xref:", frag, NULL);
1491 g_free (frag);
1492 }
1493
1494 /* Check we've got a valid paragraph node */
1495 if (!paragraph) {
1496 paragraph = xmlNewChild (*node, NULL, BAD_CAST "para", NULL);
1497 }
1498
1499 /*
1500 Now we're supposed to actually render the link. I have a list of
1501 bits of URL and actually this is really easy - I want to have
1502 the link *text* exactly the same as it appeared in the .info
1503 file, so don't use the list of strings urls, instead use the
1504 whole lot: url (complete with embedded newlines etc.)
1505 */
1506 ref1 = xmlNewTextChild (paragraph, NULL, BAD_CAST "a",
1507 BAD_CAST link_text);
1508 g_free (link_text);
1509 xmlNewProp (ref1, BAD_CAST "href", BAD_CAST href);
1510
1511 g_strfreev (urls);
1512
1513 /* Finally, we can add the following text as required */
1514 info_body_text (*node, ¶graph, NULL, TRUE, append);
1515
1516 g_free (url);
1517 g_free (href);
1518 }
1519 g_strfreev (notes);
1520 }
1521