1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * Copyright (C) 2003-2020 Shaun McCance  <shaunm@gnome.org>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of the
8  * License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public
16  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
17  *
18  * Author: Shaun McCance  <shaunm@gnome.org>
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 
25 #include <glib.h>
26 #include <glib/gi18n.h>
27 #include <gtk/gtk.h>
28 #include <libxml/parser.h>
29 #include <libxml/parserInternals.h>
30 #include <libxml/xinclude.h>
31 
32 #include "yelp-docbook-document.h"
33 #include "yelp-error.h"
34 #include "yelp-settings.h"
35 #include "yelp-storage.h"
36 #include "yelp-transform.h"
37 #include "yelp-debug.h"
38 
39 #define STYLESHEET DATADIR"/yelp/xslt/db2html.xsl"
40 #define DEFAULT_CATALOG "file:///etc/xml/catalog"
41 #define YELP_CATALOG "file://"DATADIR"/yelp/dtd/catalog"
42 
43 typedef enum {
44     DOCBOOK_STATE_BLANK,   /* Brand new, run transform as needed */
45     DOCBOOK_STATE_PARSING, /* Parsing/transforming document, please wait */
46     DOCBOOK_STATE_PARSED,  /* All done, if we ain't got it, it ain't here */
47     DOCBOOK_STATE_STOP     /* Stop everything now, object to be disposed */
48 } DocbookState;
49 
50 enum {
51     DOCBOOK_COLUMN_ID,
52     DOCBOOK_COLUMN_TITLE
53 };
54 
55 static void           yelp_docbook_document_dispose         (GObject                  *object);
56 static void           yelp_docbook_document_finalize        (GObject                  *object);
57 
58 static void           docbook_index             (YelpDocument         *document);
59 static gboolean       docbook_request_page      (YelpDocument         *document,
60                                                  const gchar          *page_id,
61                                                  GCancellable         *cancellable,
62                                                  YelpDocumentCallback  callback,
63                                                  gpointer              user_data,
64                                                  GDestroyNotify        notify);
65 
66 static void           docbook_process           (YelpDocbookDocument  *docbook);
67 static void           docbook_disconnect        (YelpDocbookDocument  *docbook);
68 static gboolean       docbook_reload            (YelpDocbookDocument  *docbook);
69 static void           docbook_monitor_changed   (GFileMonitor         *monitor,
70                                                  GFile                *file,
71                                                  GFile                *other_file,
72                                                  GFileMonitorEvent     event_type,
73                                                  YelpDocbookDocument  *docbook);
74 
75 static void           docbook_walk              (YelpDocbookDocument  *docbook);
76 static gboolean       docbook_walk_chunkQ       (YelpDocbookDocument  *docbook,
77                                                  xmlNodePtr            cur,
78                                                  gint                  depth,
79                                                  gint                  max_depth);
80 static gboolean       docbook_walk_divisionQ    (YelpDocbookDocument  *docbook,
81                                                  xmlNodePtr            cur);
82 static gchar *        docbook_walk_get_title    (YelpDocbookDocument  *docbook,
83                                                  xmlNodePtr            cur);
84 static gchar *        docbook_walk_get_keywords (YelpDocbookDocument  *docbook,
85                                                  xmlNodePtr            cur);
86 
87 static void           transform_chunk_ready     (YelpTransform        *transform,
88                                                  gchar                *chunk_id,
89                                                  YelpDocbookDocument  *docbook);
90 static void           transform_finished        (YelpTransform        *transform,
91                                                  YelpDocbookDocument  *docbook);
92 static void           transform_error           (YelpTransform        *transform,
93                                                  YelpDocbookDocument  *docbook);
94 static void           transform_finalized       (YelpDocbookDocument  *docbook,
95                                                  gpointer              transform);
96 
97 typedef struct _YelpDocbookDocumentPrivate  YelpDocbookDocumentPrivate;
98 struct _YelpDocbookDocumentPrivate {
99     DocbookState   state;
100 
101     GMutex         mutex;
102     GThread       *thread;
103 
104     GThread       *index;
105     gboolean       index_running;
106 
107     gboolean       process_running;
108     gboolean       transform_running;
109 
110     YelpTransform *transform;
111     guint          chunk_ready;
112     guint          finished;
113     guint          error;
114 
115     xmlDocPtr     xmldoc;
116     xmlNodePtr    xmlcur;
117     gint          max_depth;
118     gint          cur_depth;
119     gchar        *cur_page_id;
120     gchar        *cur_prev_id;
121     gchar        *root_id;
122 
123     GFileMonitor **monitors;
124     gint64         reload_time;
125 
126     GHashTable   *autoids;
127 };
128 
G_DEFINE_TYPE_WITH_PRIVATE(YelpDocbookDocument,yelp_docbook_document,YELP_TYPE_DOCUMENT)129 G_DEFINE_TYPE_WITH_PRIVATE (YelpDocbookDocument, yelp_docbook_document, YELP_TYPE_DOCUMENT)
130 
131 /******************************************************************************/
132 
133 static void
134 yelp_docbook_document_class_init (YelpDocbookDocumentClass *klass)
135 {
136     GObjectClass      *object_class   = G_OBJECT_CLASS (klass);
137     YelpDocumentClass *document_class = YELP_DOCUMENT_CLASS (klass);
138     const gchar *catalog = g_getenv ("XML_CATALOG_FILES");
139 
140     /* We ship a faux DocBook catalog. It just contains the common entity
141      * definitions. Documents can use the named entities they expect to
142      * be able to use, but we don't have to depend on docbook-dtds.
143      */
144     if (catalog == NULL)
145         catalog = DEFAULT_CATALOG;
146     if (!strstr(catalog, YELP_CATALOG)) {
147         gchar *newcat = g_strconcat (YELP_CATALOG, " ", catalog, NULL);
148         g_setenv ("XML_CATALOG_FILES", newcat, TRUE);
149         g_free (newcat);
150     }
151 
152     object_class->dispose = yelp_docbook_document_dispose;
153     object_class->finalize = yelp_docbook_document_finalize;
154 
155     document_class->index = docbook_index;
156     document_class->request_page = docbook_request_page;
157 }
158 
159 static void
yelp_docbook_document_init(YelpDocbookDocument * docbook)160 yelp_docbook_document_init (YelpDocbookDocument *docbook)
161 {
162     YelpDocbookDocumentPrivate *priv = yelp_docbook_document_get_instance_private (docbook);
163 
164     priv->state = DOCBOOK_STATE_BLANK;
165     priv->autoids = NULL;
166 
167     g_mutex_init (&priv->mutex);
168 }
169 
170 static void
yelp_docbook_document_dispose(GObject * object)171 yelp_docbook_document_dispose (GObject *object)
172 {
173     gint i;
174     YelpDocbookDocumentPrivate *priv =
175         yelp_docbook_document_get_instance_private (YELP_DOCBOOK_DOCUMENT (object));
176 
177     if (priv->monitors != NULL) {
178         for (i = 0; priv->monitors[i]; i++) {
179             g_object_unref (priv->monitors[i]);
180         }
181         g_free (priv->monitors);
182         priv->monitors = NULL;
183     }
184 
185     G_OBJECT_CLASS (yelp_docbook_document_parent_class)->dispose (object);
186 }
187 
188 static void
yelp_docbook_document_finalize(GObject * object)189 yelp_docbook_document_finalize (GObject *object)
190 {
191     YelpDocbookDocumentPrivate *priv =
192         yelp_docbook_document_get_instance_private (YELP_DOCBOOK_DOCUMENT (object));
193 
194     if (priv->xmldoc)
195         xmlFreeDoc (priv->xmldoc);
196 
197     g_free (priv->cur_page_id);
198     g_free (priv->cur_prev_id);
199     g_free (priv->root_id);
200 
201     g_hash_table_destroy (priv->autoids);
202 
203     g_mutex_clear (&priv->mutex);
204 
205     G_OBJECT_CLASS (yelp_docbook_document_parent_class)->finalize (object);
206 }
207 
208 /******************************************************************************/
209 
210 YelpDocument *
yelp_docbook_document_new(YelpUri * uri)211 yelp_docbook_document_new (YelpUri *uri)
212 {
213     YelpDocbookDocument *docbook;
214     YelpDocbookDocumentPrivate *priv;
215     gchar **path;
216     gint path_i;
217 
218     g_return_val_if_fail (uri != NULL, NULL);
219 
220     docbook = (YelpDocbookDocument *) g_object_new (YELP_TYPE_DOCBOOK_DOCUMENT,
221                                                     "document-uri", uri,
222                                                     NULL);
223     priv = yelp_docbook_document_get_instance_private (docbook);
224 
225     path = yelp_uri_get_search_path (uri);
226     priv->monitors = g_new0 (GFileMonitor*, g_strv_length (path) + 1);
227     for (path_i = 0; path[path_i]; path_i++) {
228         GFile *file;
229         file = g_file_new_for_path (path[path_i]);
230         priv->monitors[path_i] = g_file_monitor (file,
231                                                  G_FILE_MONITOR_SEND_MOVED,
232                                                  NULL, NULL);
233         g_signal_connect (priv->monitors[path_i], "changed",
234                           G_CALLBACK (docbook_monitor_changed),
235                           docbook);
236         g_object_unref (file);
237     }
238     g_strfreev (path);
239     return (YelpDocument *) docbook;
240 }
241 
242 /******************************************************************************/
243 
244 static gboolean
docbook_request_page(YelpDocument * document,const gchar * page_id,GCancellable * cancellable,YelpDocumentCallback callback,gpointer user_data,GDestroyNotify notify)245 docbook_request_page (YelpDocument         *document,
246                       const gchar          *page_id,
247                       GCancellable         *cancellable,
248                       YelpDocumentCallback  callback,
249                       gpointer              user_data,
250                       GDestroyNotify        notify)
251 {
252     YelpDocbookDocumentPrivate *priv =
253         yelp_docbook_document_get_instance_private (YELP_DOCBOOK_DOCUMENT (document));
254     gchar *docuri;
255     GError *error;
256     gboolean handled;
257 
258     debug_print (DB_FUNCTION, "entering\n");
259     debug_print (DB_ARG, "    page_id=\"%s\"\n", page_id);
260 
261     if (page_id == NULL)
262         page_id = "//index";
263 
264     handled =
265         YELP_DOCUMENT_CLASS (yelp_docbook_document_parent_class)->request_page (document,
266                                                                                 page_id,
267                                                                                 cancellable,
268                                                                                 callback,
269                                                                                 user_data,
270                                                                                 notify);
271     if (handled) {
272         return handled;
273     }
274 
275     g_mutex_lock (&priv->mutex);
276 
277     switch (priv->state) {
278     case DOCBOOK_STATE_BLANK:
279         priv->state = DOCBOOK_STATE_PARSING;
280         priv->process_running = TRUE;
281         g_object_ref (document);
282         priv->thread = g_thread_new ("docbook-page",
283                                      (GThreadFunc)(GCallback) docbook_process,
284                                      document);
285         break;
286     case DOCBOOK_STATE_PARSING:
287         break;
288     case DOCBOOK_STATE_PARSED:
289     case DOCBOOK_STATE_STOP:
290         docuri = yelp_uri_get_document_uri (yelp_document_get_uri (document));
291         error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
292                              _("The page ‘%s’ was not found in the document ‘%s’."),
293                              page_id, docuri);
294         g_free (docuri);
295         yelp_document_signal (document, page_id,
296                               YELP_DOCUMENT_SIGNAL_ERROR,
297                               error);
298         g_error_free (error);
299         break;
300     default:
301         g_assert_not_reached ();
302         break;
303     }
304 
305     g_mutex_unlock (&priv->mutex);
306     return FALSE;
307 }
308 
309 /******************************************************************************/
310 
311 static void
docbook_process(YelpDocbookDocument * docbook)312 docbook_process (YelpDocbookDocument *docbook)
313 {
314     YelpDocbookDocumentPrivate *priv = yelp_docbook_document_get_instance_private (docbook);
315     YelpDocument *document = YELP_DOCUMENT (docbook);
316     GFile *file = NULL;
317     gchar *filepath = NULL;
318     xmlDocPtr xmldoc = NULL;
319     xmlNodePtr xmlcur = NULL;
320     xmlChar *id = NULL;
321     xmlParserCtxtPtr parserCtxt = NULL;
322     GError *error;
323     gint  params_i = 0;
324     gchar **params = NULL;
325 
326     debug_print (DB_FUNCTION, "entering\n");
327 
328     file = yelp_uri_get_file (yelp_document_get_uri (document));
329     if (file == NULL) {
330         error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
331                              _("The file does not exist."));
332         yelp_document_error_pending (document, error);
333         g_error_free (error);
334         goto done;
335     }
336 
337     filepath = g_file_get_path (file);
338     g_object_unref (file);
339     if (!g_file_test (filepath, G_FILE_TEST_IS_REGULAR)) {
340         error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
341                              _("The file ‘%s’ does not exist."),
342                              filepath);
343         yelp_document_error_pending (document, error);
344         g_error_free (error);
345         goto done;
346     }
347 
348     parserCtxt = xmlNewParserCtxt ();
349     xmldoc = xmlCtxtReadFile (parserCtxt,
350                               filepath, NULL,
351                               XML_PARSE_DTDLOAD | XML_PARSE_NOCDATA |
352                               XML_PARSE_NOENT   | XML_PARSE_NONET   );
353 
354     if (xmldoc)
355         xmlcur = xmlDocGetRootElement (xmldoc);
356 
357     if (xmldoc == NULL || xmlcur == NULL) {
358         error = g_error_new (YELP_ERROR, YELP_ERROR_PROCESSING,
359                              _("The file ‘%s’ could not be parsed because it is"
360                                " not a well-formed XML document."),
361                              filepath);
362         yelp_document_error_pending (document, error);
363         g_error_free (error);
364         goto done;
365     }
366 
367     if (xmlXIncludeProcessFlags (xmldoc,
368                                  XML_PARSE_DTDLOAD | XML_PARSE_NOCDATA |
369                                  XML_PARSE_NOENT   | XML_PARSE_NONET   )
370         < 0) {
371         error = g_error_new (YELP_ERROR, YELP_ERROR_PROCESSING,
372                              _("The file ‘%s’ could not be parsed because"
373                                " one or more of its included files is not"
374                                " a well-formed XML document."),
375                              filepath);
376         yelp_document_error_pending (document, error);
377         g_error_free (error);
378         goto done;
379     }
380 
381     g_mutex_lock (&priv->mutex);
382     if (!xmlStrcmp (xmlDocGetRootElement (xmldoc)->name, BAD_CAST "book"))
383         priv->max_depth = 2;
384     else
385         priv->max_depth = 1;
386 
387     priv->xmldoc = xmldoc;
388     priv->xmlcur = xmlcur;
389 
390     id = xmlGetProp (priv->xmlcur, BAD_CAST "id");
391     if (!id)
392         id = xmlGetNsProp (priv->xmlcur, BAD_CAST "id", XML_XML_NAMESPACE);
393 
394     if (id) {
395         priv->root_id = g_strdup ((const gchar *) id);
396         yelp_document_set_page_id (document, NULL, (gchar *) id);
397         yelp_document_set_page_id (document, "//index", (gchar *) id);
398     }
399     else {
400         priv->root_id = g_strdup ("//index");
401         yelp_document_set_page_id (document, NULL, "//index");
402         /* add the id attribute to the root element with value "index"
403          * so when we try to load the document later, it doesn't fail */
404         if (priv->xmlcur->ns)
405             xmlSetProp (priv->xmlcur, BAD_CAST "xml:id", BAD_CAST "//index");
406         else
407             xmlSetProp (priv->xmlcur, BAD_CAST "id", BAD_CAST "//index");
408     }
409     yelp_document_set_root_id (document, priv->root_id, priv->root_id);
410     g_mutex_unlock (&priv->mutex);
411 
412     g_mutex_lock (&priv->mutex);
413     if (priv->state == DOCBOOK_STATE_STOP) {
414         g_mutex_unlock (&priv->mutex);
415         goto done;
416     }
417     g_mutex_unlock (&priv->mutex);
418 
419     docbook_walk (docbook);
420 
421     g_mutex_lock (&priv->mutex);
422     if (priv->state == DOCBOOK_STATE_STOP) {
423         g_mutex_unlock (&priv->mutex);
424         goto done;
425     }
426 
427     priv->state = DOCBOOK_STATE_PARSED;
428 
429     priv->transform = yelp_transform_new (STYLESHEET);
430     priv->chunk_ready =
431         g_signal_connect (priv->transform, "chunk-ready",
432                           (GCallback) transform_chunk_ready,
433                           docbook);
434     priv->finished =
435         g_signal_connect (priv->transform, "finished",
436                           (GCallback) transform_finished,
437                           docbook);
438     priv->error =
439         g_signal_connect (priv->transform, "error",
440                           (GCallback) transform_error,
441                           docbook);
442 
443     params = yelp_settings_get_all_params (yelp_settings_get_default (), 2, &params_i);
444     params[params_i++] = g_strdup ("db.chunk.max_depth");
445     params[params_i++] = g_strdup_printf ("%i", priv->max_depth);
446     params[params_i] = NULL;
447 
448     priv->transform_running = TRUE;
449     yelp_transform_start (priv->transform,
450                           priv->xmldoc,
451                           NULL,
452 			  (const gchar * const *) params);
453     g_strfreev (params);
454     g_mutex_unlock (&priv->mutex);
455 
456  done:
457     g_free (filepath);
458     if (id)
459         xmlFree (id);
460     if (parserCtxt)
461         xmlFreeParserCtxt (parserCtxt);
462 
463     priv->process_running = FALSE;
464     g_object_unref (docbook);
465 }
466 
467 static void
docbook_disconnect(YelpDocbookDocument * docbook)468 docbook_disconnect (YelpDocbookDocument *docbook)
469 {
470     YelpDocbookDocumentPrivate *priv = yelp_docbook_document_get_instance_private (docbook);
471     if (priv->chunk_ready) {
472         g_signal_handler_disconnect (priv->transform, priv->chunk_ready);
473         priv->chunk_ready = 0;
474     }
475     if (priv->finished) {
476         g_signal_handler_disconnect (priv->transform, priv->finished);
477         priv->finished = 0;
478     }
479     if (priv->error) {
480         g_signal_handler_disconnect (priv->transform, priv->error);
481         priv->error = 0;
482     }
483     yelp_transform_cancel (priv->transform);
484     g_object_unref (priv->transform);
485     priv->transform = NULL;
486     priv->transform_running = FALSE;
487 }
488 
489 static gboolean
docbook_reload(YelpDocbookDocument * docbook)490 docbook_reload (YelpDocbookDocument *docbook)
491 {
492     YelpDocbookDocumentPrivate *priv = yelp_docbook_document_get_instance_private (docbook);
493 
494     if (priv->index_running || priv->process_running || priv->transform_running)
495         return TRUE;
496 
497     g_mutex_lock (&priv->mutex);
498 
499     priv->reload_time = g_get_monotonic_time();
500 
501     yelp_document_clear_contents (YELP_DOCUMENT (docbook));
502 
503     priv->state = DOCBOOK_STATE_PARSING;
504     priv->process_running = TRUE;
505     g_object_ref (docbook);
506     priv->thread = g_thread_new ("docbook-reload",
507                                  (GThreadFunc)(GCallback) docbook_process,
508                                  docbook);
509 
510     g_mutex_unlock (&priv->mutex);
511 
512     return FALSE;
513 }
514 
515 static void
docbook_monitor_changed(GFileMonitor * monitor,GFile * file,GFile * other_file,GFileMonitorEvent event_type,YelpDocbookDocument * docbook)516 docbook_monitor_changed   (GFileMonitor         *monitor,
517                            GFile                *file,
518                            GFile                *other_file,
519                            GFileMonitorEvent     event_type,
520                            YelpDocbookDocument  *docbook)
521 {
522     YelpDocbookDocumentPrivate *priv = yelp_docbook_document_get_instance_private (docbook);
523 
524     if (g_get_monotonic_time() - priv->reload_time < 1000)
525         return;
526 
527     if (priv->index_running || priv->process_running || priv->transform_running) {
528         g_timeout_add_seconds (1, (GSourceFunc) docbook_reload, docbook);
529         return;
530     }
531 
532     docbook_reload (docbook);
533 }
534 
535 /******************************************************************************/
536 
537 static void
docbook_walk(YelpDocbookDocument * docbook)538 docbook_walk (YelpDocbookDocument *docbook)
539 {
540     static       gint autoid = 0;
541     gchar        autoidstr[20];
542     xmlChar     *id = NULL;
543     xmlChar     *title = NULL;
544     xmlChar     *keywords = NULL;
545     xmlNodePtr   cur, old_cur;
546     gboolean chunkQ;
547     YelpDocbookDocumentPrivate *priv = yelp_docbook_document_get_instance_private (docbook);
548     YelpDocument *document = YELP_DOCUMENT (docbook);
549 
550     debug_print (DB_FUNCTION, "entering\n");
551     debug_print (DB_DEBUG, "  priv->xmlcur->name: %s\n", priv->xmlcur->name);
552 
553     /* Check for the db.chunk.max_depth PI and set max chunk depth */
554     if (priv->cur_depth == 0)
555         for (cur = priv->xmlcur; cur; cur = cur->prev)
556             if (cur->type == XML_PI_NODE)
557                 if (!xmlStrcmp (cur->name, (const xmlChar *) "db.chunk.max_depth")) {
558                     gint max = atoi ((gchar *) cur->content);
559                     if (max)
560                         priv->max_depth = max;
561                     break;
562                 }
563 
564     id = xmlGetProp (priv->xmlcur, BAD_CAST "id");
565     if (!id)
566         id = xmlGetNsProp (priv->xmlcur, BAD_CAST "id", XML_XML_NAMESPACE);
567 
568     if (docbook_walk_divisionQ (docbook, priv->xmlcur) && !id) {
569         /* If id attribute is not present, autogenerate a
570          * unique value, and insert it into the in-memory tree */
571         g_snprintf (autoidstr, 20, "//yelp-autoid-%d", ++autoid);
572         if (priv->xmlcur->ns) {
573             xmlSetProp (priv->xmlcur, BAD_CAST "xml:id", BAD_CAST autoidstr);
574             id = xmlGetNsProp (priv->xmlcur, BAD_CAST "id", XML_XML_NAMESPACE);
575         }
576         else {
577             xmlSetProp (priv->xmlcur, BAD_CAST "id", BAD_CAST autoidstr);
578             id = xmlGetProp (priv->xmlcur, BAD_CAST "id");
579         }
580 
581         if (!priv->autoids)
582             priv->autoids = g_hash_table_new_full (g_str_hash, g_str_equal, xmlFree, xmlFree);
583         g_hash_table_insert (priv->autoids, xmlGetNodePath(priv->xmlcur), xmlStrdup (id));
584     }
585 
586     if (docbook_walk_chunkQ (docbook, priv->xmlcur, priv->cur_depth, priv->max_depth)) {
587         title = BAD_CAST docbook_walk_get_title (docbook, priv->xmlcur);
588         keywords = BAD_CAST docbook_walk_get_keywords (docbook, priv->xmlcur);
589 
590         debug_print (DB_DEBUG, "  id: \"%s\"\n", id);
591         debug_print (DB_DEBUG, "  title: \"%s\"\n", title);
592 
593         yelp_document_set_page_title (document, (gchar *) id, (gchar *) title);
594         yelp_document_set_page_keywords (document, (gchar *) id, (gchar *) keywords);
595 
596         if (priv->cur_prev_id) {
597             yelp_document_set_prev_id (document, (gchar *) id, priv->cur_prev_id);
598             yelp_document_set_next_id (document, priv->cur_prev_id, (gchar *) id);
599             g_free (priv->cur_prev_id);
600         }
601         priv->cur_prev_id = g_strdup ((gchar *) id);
602 
603         if (priv->cur_page_id)
604             yelp_document_set_up_id (document, (gchar *) id, priv->cur_page_id);
605         priv->cur_page_id = g_strdup ((gchar *) id);
606     }
607 
608     old_cur = priv->xmlcur;
609     priv->cur_depth++;
610     if (id) {
611         yelp_document_set_root_id (document, (gchar *) id, priv->root_id);
612         yelp_document_set_page_id (document, (gchar *) id, priv->cur_page_id);
613     }
614 
615     chunkQ = docbook_walk_chunkQ (docbook, priv->xmlcur, priv->cur_depth, priv->max_depth);
616     if (chunkQ)
617         yelp_document_signal (YELP_DOCUMENT (docbook),
618                               priv->cur_page_id,
619                               YELP_DOCUMENT_SIGNAL_INFO,
620                               NULL);
621 
622     for (cur = priv->xmlcur->children; cur; cur = cur->next) {
623         if (cur->type == XML_ELEMENT_NODE) {
624             priv->xmlcur = cur;
625             docbook_walk (docbook);
626         }
627     }
628     priv->cur_depth--;
629     priv->xmlcur = old_cur;
630 
631     if (priv->cur_depth == 0) {
632         g_free (priv->cur_prev_id);
633         priv->cur_prev_id = NULL;
634 
635         g_free (priv->cur_page_id);
636         priv->cur_page_id = NULL;
637     }
638 
639     if (id != NULL)
640         xmlFree (id);
641     if (title != NULL)
642         xmlFree (title);
643     if (keywords != NULL)
644         xmlFree (keywords);
645 }
646 
647 static gboolean
docbook_walk_chunkQ(YelpDocbookDocument * docbook,xmlNodePtr cur,gint cur_depth,gint max_depth)648 docbook_walk_chunkQ (YelpDocbookDocument *docbook,
649                      xmlNodePtr           cur,
650                      gint                 cur_depth,
651                      gint                 max_depth)
652 {
653     if (cur_depth <= max_depth)
654         return docbook_walk_divisionQ (docbook, cur);
655     else
656         return FALSE;
657 }
658 
659 static gboolean
docbook_walk_divisionQ(YelpDocbookDocument * docbook,xmlNodePtr node)660 docbook_walk_divisionQ (YelpDocbookDocument *docbook, xmlNodePtr node)
661 {
662     return (!xmlStrcmp (node->name, (const xmlChar *) "appendix")     ||
663             !xmlStrcmp (node->name, (const xmlChar *) "article")      ||
664             !xmlStrcmp (node->name, (const xmlChar *) "book")         ||
665             !xmlStrcmp (node->name, (const xmlChar *) "bibliography") ||
666             !xmlStrcmp (node->name, (const xmlChar *) "bibliodiv")    ||
667             !xmlStrcmp (node->name, (const xmlChar *) "chapter")      ||
668             !xmlStrcmp (node->name, (const xmlChar *) "colophon")     ||
669             !xmlStrcmp (node->name, (const xmlChar *) "dedication")   ||
670             !xmlStrcmp (node->name, (const xmlChar *) "glossary")     ||
671             !xmlStrcmp (node->name, (const xmlChar *) "glossdiv")     ||
672             !xmlStrcmp (node->name, (const xmlChar *) "lot")          ||
673             !xmlStrcmp (node->name, (const xmlChar *) "index")        ||
674             !xmlStrcmp (node->name, (const xmlChar *) "part")         ||
675             !xmlStrcmp (node->name, (const xmlChar *) "preface")      ||
676             !xmlStrcmp (node->name, (const xmlChar *) "reference")    ||
677             !xmlStrcmp (node->name, (const xmlChar *) "refentry")     ||
678             !xmlStrcmp (node->name, (const xmlChar *) "sect1")        ||
679             !xmlStrcmp (node->name, (const xmlChar *) "sect2")        ||
680             !xmlStrcmp (node->name, (const xmlChar *) "sect3")        ||
681             !xmlStrcmp (node->name, (const xmlChar *) "sect4")        ||
682             !xmlStrcmp (node->name, (const xmlChar *) "sect5")        ||
683             !xmlStrcmp (node->name, (const xmlChar *) "section")      ||
684             !xmlStrcmp (node->name, (const xmlChar *) "set")          ||
685             !xmlStrcmp (node->name, (const xmlChar *) "setindex")     ||
686             !xmlStrcmp (node->name, (const xmlChar *) "simplesect")   ||
687             !xmlStrcmp (node->name, (const xmlChar *) "toc")          );
688 }
689 
690 static gchar *
docbook_walk_get_title(YelpDocbookDocument * docbook,xmlNodePtr cur)691 docbook_walk_get_title (YelpDocbookDocument *docbook,
692                         xmlNodePtr           cur)
693 {
694     gchar *infoname = NULL;
695     xmlNodePtr child = NULL;
696     xmlNodePtr title = NULL;
697     xmlNodePtr title_tmp = NULL;
698 
699     if (!xmlStrcmp (cur->name, BAD_CAST "refentry")) {
700         /* The title for a refentry element can come from the following:
701          *   refmeta/refentrytitle
702          *   refentryinfo/title[abbrev]
703          *   refnamediv/refname
704          * We take the first one we find.
705          */
706         for (child = cur->children; child; child = child->next) {
707             if (!xmlStrcmp (child->name, BAD_CAST "refmeta")) {
708                 for (title = child->children; title; title = title->next) {
709                     if (!xmlStrcmp (title->name, BAD_CAST "refentrytitle"))
710                         break;
711                 }
712                 if (title)
713                     goto done;
714             }
715             else if (!xmlStrcmp (child->name, BAD_CAST "refentryinfo")) {
716                 for (title = child->children; title; title = title->next) {
717                     if (!xmlStrcmp (title->name, BAD_CAST "titleabbrev"))
718                         break;
719                     else if (!xmlStrcmp (title->name, BAD_CAST "title"))
720                         title_tmp = title;
721                 }
722                 if (title)
723                     goto done;
724                 else if (title_tmp) {
725                     title = title_tmp;
726                     goto done;
727                 }
728             }
729             else if (!xmlStrcmp (child->name, BAD_CAST "refnamediv")) {
730                 for (title = child->children; title; title = title->next) {
731                     if (!xmlStrcmp (title->name, BAD_CAST "refname"))
732                         break;
733                     else if (!xmlStrcmp (title->name, BAD_CAST "refpurpose")) {
734                         title = NULL;
735                         break;
736                     }
737                 }
738                 if (title)
739                     goto done;
740             }
741             else if (!xmlStrncmp (child->name, BAD_CAST "refsect", 7))
742                 break;
743         }
744     }
745     else {
746         /* The title for other elements appears in the following:
747          *   title[abbrev]
748          *   *info/title[abbrev]
749          *   blockinfo/title[abbrev]
750          *   objectinfo/title[abbrev]
751          * We take them in that order.
752          */
753         xmlNodePtr infos[3] = {NULL, NULL, NULL};
754         int i;
755 
756         infoname = g_strdup_printf ("%sinfo", cur->name);
757 
758         for (child = cur->children; child; child = child->next) {
759             if (!xmlStrcmp (child->name, BAD_CAST "titleabbrev")) {
760                 title = child;
761                 goto done;
762             }
763             else if (!xmlStrcmp (child->name, BAD_CAST "title"))
764                 title_tmp = child;
765             else if (!xmlStrcmp (child->name, BAD_CAST "info"))
766                 infos[0] = child;
767             else if (!xmlStrcmp (child->name, BAD_CAST infoname))
768                 infos[0] = child;
769             else if (!xmlStrcmp (child->name, BAD_CAST "blockinfo"))
770                 infos[1] = child;
771             else if (!xmlStrcmp (child->name, BAD_CAST "objectinfo"))
772                 infos[2] = child;
773         }
774 
775         if (title_tmp) {
776             title = title_tmp;
777             goto done;
778         }
779 
780         for (i = 0; i < 3; i++) {
781             child = infos[i];
782             if (child) {
783                 for (title = child->children; title; title = title->next) {
784                     if (!xmlStrcmp (title->name, BAD_CAST "titleabbrev"))
785                         goto done;
786                     else if (!xmlStrcmp (title->name, BAD_CAST "title"))
787                         title_tmp = title;
788                 }
789                 if (title_tmp) {
790                     title = title_tmp;
791                     goto done;
792                 }
793             }
794         }
795     }
796 
797  done:
798     g_free (infoname);
799 
800     if (title) {
801         xmlChar *title_s = xmlNodeGetContent (title);
802         gchar *ret = g_strdup ((const gchar *) title_s);
803         xmlFree (title_s);
804         return ret;
805     }
806     else
807         return g_strdup (_("Unknown"));
808 }
809 
810 static gchar *
docbook_walk_get_keywords(YelpDocbookDocument * docbook,xmlNodePtr cur)811 docbook_walk_get_keywords (YelpDocbookDocument *docbook,
812                            xmlNodePtr           cur)
813 {
814     xmlNodePtr info, keywordset, keyword;
815     GString *ret = NULL;
816 
817     for (info = cur->children; info; info = info->next) {
818         if (g_str_has_suffix ((const gchar *) info->name, "info")) {
819             for (keywordset = info->children; keywordset; keywordset = keywordset->next) {
820                 if (!xmlStrcmp (keywordset->name, BAD_CAST "keywordset")) {
821                     for (keyword = keywordset->children; keyword; keyword = keyword->next) {
822                         if (!xmlStrcmp (keyword->name, BAD_CAST "keyword")) {
823                             xmlChar *content;
824                             if (ret)
825                                 g_string_append(ret, ", ");
826                             else
827                                 ret = g_string_new ("");
828                             /* FIXME: try this with just ->children->text */
829                             content = xmlNodeGetContent (keyword);
830                             g_string_append (ret, (gchar *) content);
831                             xmlFree (content);
832                         }
833                     }
834                 }
835             }
836             break;
837         }
838     }
839 
840     if (ret)
841         return g_string_free (ret, FALSE);
842     else
843         return NULL;
844 }
845 
846 /******************************************************************************/
847 
848 static void
transform_chunk_ready(YelpTransform * transform,gchar * chunk_id,YelpDocbookDocument * docbook)849 transform_chunk_ready (YelpTransform       *transform,
850                        gchar               *chunk_id,
851                        YelpDocbookDocument *docbook)
852 {
853     YelpDocbookDocumentPrivate *priv = yelp_docbook_document_get_instance_private (docbook);
854     gchar *content;
855 
856     debug_print (DB_FUNCTION, "entering\n");
857     g_assert (transform == priv->transform);
858 
859     if (priv->state == DOCBOOK_STATE_STOP) {
860         docbook_disconnect (docbook);
861         return;
862     }
863 
864     content = yelp_transform_take_chunk (transform, chunk_id);
865     yelp_document_give_contents (YELP_DOCUMENT (docbook),
866                                  chunk_id,
867                                  content,
868                                  "application/xhtml+xml");
869 
870     yelp_document_signal (YELP_DOCUMENT (docbook),
871                           chunk_id,
872                           YELP_DOCUMENT_SIGNAL_CONTENTS,
873                           NULL);
874 }
875 
876 static void
transform_finished(YelpTransform * transform,YelpDocbookDocument * docbook)877 transform_finished (YelpTransform       *transform,
878                     YelpDocbookDocument *docbook)
879 {
880     YelpDocbookDocumentPrivate *priv = yelp_docbook_document_get_instance_private (docbook);
881     YelpDocument *document = YELP_DOCUMENT (docbook);
882     gchar *docuri;
883     GError *error;
884 
885     debug_print (DB_FUNCTION, "entering\n");
886     g_assert (transform == priv->transform);
887 
888     if (priv->state == DOCBOOK_STATE_STOP) {
889         docbook_disconnect (docbook);
890         return;
891     }
892 
893     docbook_disconnect (docbook);
894 
895     /* We want to free priv->xmldoc, but we can't free it before transform
896        is finalized.   Otherwise, we could crash when YelpTransform frees
897        its libxslt resources.
898      */
899     g_object_weak_ref ((GObject *) transform,
900                        (GWeakNotify) transform_finalized,
901                        docbook);
902 
903     docuri = yelp_uri_get_document_uri (yelp_document_get_uri (document));
904     error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
905                          _("The requested page was not found in the document ‘%s’."),
906                          docuri);
907     g_free (docuri);
908     yelp_document_error_pending ((YelpDocument *) docbook, error);
909     g_error_free (error);
910 }
911 
912 static void
transform_error(YelpTransform * transform,YelpDocbookDocument * docbook)913 transform_error (YelpTransform       *transform,
914                  YelpDocbookDocument *docbook)
915 {
916     YelpDocbookDocumentPrivate *priv = yelp_docbook_document_get_instance_private (docbook);
917     GError *error;
918 
919     debug_print (DB_FUNCTION, "entering\n");
920     g_assert (transform == priv->transform);
921 
922     if (priv->state == DOCBOOK_STATE_STOP) {
923         docbook_disconnect (docbook);
924         return;
925     }
926 
927     error = yelp_transform_get_error (transform);
928     yelp_document_error_pending ((YelpDocument *) docbook, error);
929     g_error_free (error);
930 
931     docbook_disconnect (docbook);
932 }
933 
934 static void
transform_finalized(YelpDocbookDocument * docbook,gpointer transform)935 transform_finalized (YelpDocbookDocument *docbook,
936                      gpointer             transform)
937 {
938     YelpDocbookDocumentPrivate *priv = yelp_docbook_document_get_instance_private (docbook);
939 
940     debug_print (DB_FUNCTION, "entering\n");
941 
942     if (priv->xmldoc)
943 	xmlFreeDoc (priv->xmldoc);
944     priv->xmldoc = NULL;
945 }
946 
947 /******************************************************************************/
948 
949 static gboolean
docbook_index_done(YelpDocbookDocument * docbook)950 docbook_index_done (YelpDocbookDocument *docbook)
951 {
952     g_object_set (docbook, "indexed", TRUE, NULL);
953     g_object_unref (docbook);
954     return FALSE;
955 }
956 
957 typedef struct {
958     YelpDocbookDocument *docbook;
959     xmlDocPtr doc;
960     xmlNodePtr cur;
961     gchar *doc_uri;
962     GString *str;
963     gint depth;
964     gint max_depth;
965     gboolean in_info;
966 } DocbookIndexData;
967 
968 static void
docbook_index_node(DocbookIndexData * index)969 docbook_index_node (DocbookIndexData *index)
970 {
971     xmlNodePtr oldcur, child;
972 
973     if ((g_str_equal (index->cur->parent->name, "menuchoice") ||
974          g_str_equal (index->cur->parent->name, "keycombo")) &&
975         index->cur->prev != NULL) {
976         g_string_append_c (index->str, ' ');
977     }
978     if (index->cur->type == XML_TEXT_NODE) {
979         g_string_append (index->str, (const gchar *) index->cur->content);
980         return;
981     }
982     if (index->cur->type != XML_ELEMENT_NODE) {
983         return;
984     }
985     if (g_str_equal (index->cur->name, "remark")) {
986         return;
987     }
988     if (g_str_has_suffix ((const gchar *) index->cur->name, "info")) {
989         return;
990     }
991     oldcur = index->cur;
992     for (child = index->cur->children; child; child = child->next) {
993         index->cur = child;
994         docbook_index_node (index);
995         index->cur = oldcur;
996     }
997 }
998 
999 static void
docbook_index_chunk(DocbookIndexData * index)1000 docbook_index_chunk (DocbookIndexData *index)
1001 {
1002     xmlChar *id;
1003     xmlNodePtr child;
1004     gchar *title = NULL;
1005     gchar *keywords;
1006     GSList *chunks = NULL;
1007     YelpDocbookDocumentPrivate *priv = yelp_docbook_document_get_instance_private (index->docbook);
1008 
1009     id = xmlGetProp (index->cur, BAD_CAST "id");
1010     if (!id)
1011         id = xmlGetNsProp (index->cur, BAD_CAST "id", XML_XML_NAMESPACE);
1012     if (!id) {
1013         xmlChar *path = xmlGetNodePath (index->cur);
1014         id = g_hash_table_lookup (priv->autoids, path);
1015         if (id)
1016             id = xmlStrdup (id);
1017         xmlFree (path);
1018     }
1019 
1020     if (id != NULL) {
1021         title = docbook_walk_get_title (index->docbook, index->cur);
1022         if (index->cur->parent->parent == NULL)
1023             yelp_storage_set_root_title (yelp_storage_get_default (),
1024                                          index->doc_uri, title);
1025         index->str = g_string_new ("");
1026         keywords = docbook_walk_get_keywords (index->docbook, index->cur);
1027         if (keywords) {
1028             g_string_append (index->str, keywords);
1029             g_free (keywords);
1030         }
1031     }
1032 
1033     for (child = index->cur->children; child; child = child->next) {
1034         if (docbook_walk_chunkQ (index->docbook, child, index->depth, index->max_depth)) {
1035             chunks = g_slist_append (chunks, child);
1036         }
1037         else if (id != NULL) {
1038             xmlNodePtr oldcur = index->cur;
1039             index->cur = child;
1040             docbook_index_node (index);
1041             index->cur = oldcur;
1042         }
1043     }
1044 
1045     if (id != NULL) {
1046         YelpDocument *document = YELP_DOCUMENT (index->docbook);
1047         YelpUri *uri;
1048         gchar *full_uri, *tmp, *body;
1049 
1050         body = g_string_free (index->str, FALSE);
1051         index->str = NULL;
1052 
1053         tmp = g_strconcat ("xref:", id, NULL);
1054         uri = yelp_uri_new_relative (yelp_document_get_uri (document), tmp);
1055         g_free (tmp);
1056         yelp_uri_resolve_sync (uri);
1057         full_uri = yelp_uri_get_canonical_uri (uri);
1058         g_object_unref (uri);
1059 
1060         yelp_storage_update (yelp_storage_get_default (),
1061                              index->doc_uri, full_uri,
1062                              title, "", "yelp-page-symbolic",
1063                              body);
1064         if (index->cur->parent->parent == NULL)
1065             yelp_storage_set_root_title (yelp_storage_get_default (),
1066                                          index->doc_uri, title);
1067         g_free (full_uri);
1068         g_free (body);
1069         g_free (title);
1070         xmlFree (id);
1071     }
1072 
1073     index->depth++;
1074     while (chunks != NULL) {
1075         xmlNodePtr oldcur = index->cur;
1076         index->cur = (xmlNodePtr) chunks->data;
1077         docbook_index_chunk(index);
1078         index->cur = oldcur;
1079         chunks = g_slist_delete_link (chunks, chunks);
1080     }
1081     index->depth--;
1082 }
1083 
1084 static void
docbook_index_threaded(YelpDocbookDocument * docbook)1085 docbook_index_threaded (YelpDocbookDocument *docbook)
1086 {
1087     DocbookIndexData *index = NULL;
1088     xmlParserCtxtPtr parserCtxt = NULL;
1089     GFile *file = NULL;
1090     gchar *filename = NULL;
1091     YelpUri *uri;
1092     YelpDocbookDocumentPrivate *priv = yelp_docbook_document_get_instance_private (docbook);
1093 
1094     uri = yelp_document_get_uri (YELP_DOCUMENT (docbook));
1095     file = yelp_uri_get_file (uri);
1096     if (file == NULL)
1097         goto done;
1098     filename = g_file_get_path (file);
1099 
1100     index = g_new0 (DocbookIndexData, 1);
1101     index->docbook = docbook;
1102     index->doc_uri = yelp_uri_get_document_uri (uri);
1103 
1104     parserCtxt = xmlNewParserCtxt ();
1105     index->doc = xmlCtxtReadFile (parserCtxt, filename, NULL,
1106                                   XML_PARSE_DTDLOAD | XML_PARSE_NOCDATA |
1107                                   XML_PARSE_NOENT   | XML_PARSE_NONET   );
1108     if (index->doc == NULL)
1109         goto done;
1110     if (xmlXIncludeProcessFlags (index->doc,
1111                                  XML_PARSE_DTDLOAD | XML_PARSE_NOCDATA |
1112                                  XML_PARSE_NOENT   | XML_PARSE_NONET   )
1113         < 0)
1114         goto done;
1115 
1116     index->cur = xmlDocGetRootElement (index->doc);
1117     index->depth = 0;
1118     if (!xmlStrcmp (index->cur->name, BAD_CAST "book"))
1119         index->max_depth = 2;
1120     else
1121         index->max_depth = 1;
1122     docbook_index_chunk (index);
1123 
1124  done:
1125     if (file != NULL)
1126         g_object_unref (file);
1127     if (filename != NULL)
1128         g_free (filename);
1129     if (index != NULL) {
1130         if (index->doc != NULL)
1131             xmlFreeDoc (index->doc);
1132         if (index->doc_uri != NULL)
1133             g_free (index->doc_uri);
1134         g_free (index);
1135     }
1136     if (parserCtxt != NULL)
1137         xmlFreeParserCtxt (parserCtxt);
1138 
1139     priv->index_running = FALSE;
1140     g_idle_add ((GSourceFunc) docbook_index_done, docbook);
1141 }
1142 
1143 static void
docbook_index(YelpDocument * document)1144 docbook_index (YelpDocument *document)
1145 {
1146     YelpDocbookDocumentPrivate *priv;
1147     gboolean done;
1148 
1149     g_object_get (document, "indexed", &done, NULL);
1150     if (done)
1151         return;
1152 
1153     priv = yelp_docbook_document_get_instance_private (YELP_DOCBOOK_DOCUMENT (document));
1154     g_object_ref (document);
1155     priv->index = g_thread_new ("docbook-index",
1156                                 (GThreadFunc)(GCallback) docbook_index_threaded,
1157                                 document);
1158     priv->index_running = TRUE;
1159 }
1160