1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 8 -*- */
2 /*
3  * Copyright (c) 2002-2003 Mikael Hallendal <micke@imendio.com>
4  * Copyright (c) 2002-2003 CodeFactory AB
5  * Copyright (C) 2005,2008 Imendio AB
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public
18  * License along with this program; if not, write to the
19  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20  * Boston, MA 02111-1307, USA.
21  */
22 
23 #include "config.h"
24 #include <string.h>
25 #include <errno.h>
26 #include <zlib.h>
27 #include <glib/gi18n-lib.h>
28 
29 #include "dh-error.h"
30 #include "dh-link.h"
31 #include "dh-parser.h"
32 
33 #define NAMESPACE      "http://www.devhelp.net/book"
34 #define BYTES_PER_READ 4096
35 
36 typedef struct {
37 	GMarkupParser       *m_parser;
38 	GMarkupParseContext *context;
39 
40 	const gchar         *path;
41 
42 	/* Top node of book */
43 	GNode               *book_node;
44 
45 	/* Current sub section node */
46 	GNode               *parent;
47 
48 	gboolean             parsing_chapters;
49 	gboolean             parsing_keywords;
50 
51  	GNode              **book_tree;
52 	GList              **keywords;
53 
54 	/* Version 2 uses <keyword> instead of <function>. */
55 	gint                 version;
56 } DhParser;
57 
58 static void
dh_parser_free(DhParser * parser)59 dh_parser_free (DhParser *parser)
60 {
61         // NOTE: priv->book_tree and priv->keywords do not need to be freed
62         // because they're only used to store the locations for the return
63         // params of dh_parser_read_file()
64 
65         g_markup_parse_context_free (parser->context);
66         g_free (parser->m_parser);
67         g_free (parser);
68 }
69 
70 static void
parser_start_node_book(DhParser * parser,GMarkupParseContext * context,const gchar * node_name,const gchar ** attribute_names,const gchar ** attribute_values,GError ** error)71 parser_start_node_book (DhParser             *parser,
72                         GMarkupParseContext  *context,
73                         const gchar          *node_name,
74                         const gchar         **attribute_names,
75                         const gchar         **attribute_values,
76                         GError              **error)
77 {
78         gint         i, j;
79         gint         line, col;
80         gchar       *title = NULL;
81         gchar *base = NULL;
82         const gchar *name = NULL;
83         const gchar *uri = NULL;
84 	DhLink      *link;
85 
86         if (g_ascii_strcasecmp (node_name, "book") != 0) {
87                 g_markup_parse_context_get_position (context, &line, &col);
88                 g_set_error (error,
89                              DH_ERROR,
90                              DH_ERROR_MALFORMED_BOOK,
91                              _("Expected '%s', got '%s' at line %d, column %d"),
92                              "book", node_name, line, col);
93                 return;
94         }
95 
96         for (i = 0; attribute_names[i]; ++i) {
97                 const gchar *xmlns;
98 
99                 if (g_ascii_strcasecmp (attribute_names[i], "xmlns") == 0) {
100                         xmlns = attribute_values[i];
101                         if (g_ascii_strcasecmp (xmlns, NAMESPACE) != 0) {
102                                 g_markup_parse_context_get_position (context,
103                                                                      &line,
104                                                                      &col);
105                                 g_set_error (error,
106                                              DH_ERROR,
107                                              DH_ERROR_MALFORMED_BOOK,
108                                              _("Invalid namespace '%s' at"
109                                                " line %d, column %d"),
110                                              xmlns, line, col);
111                                 return;
112                         }
113                 }
114                 else if (g_ascii_strcasecmp (attribute_names[i], "name") == 0) {
115                         name = attribute_values[i];
116                 }
117                 else if (g_ascii_strcasecmp (attribute_names[i], "title") == 0) {
118                         title = g_strdup(attribute_values[i]);
119                         for (j = 0; title[j]; j++) {
120                                 if (title[j] == '\n') title[j] = ' ';
121                         }
122                 }
123                 else if (g_ascii_strcasecmp (attribute_names[i], "base") == 0) {
124                         base = g_strdup (attribute_values[i]);
125 			}
126                 else if (g_ascii_strcasecmp (attribute_names[i], "link") == 0) {
127                         uri = attribute_values[i];
128                 }
129         }
130 
131         if (!title || !name || !uri) {
132                 g_markup_parse_context_get_position (context, &line, &col);
133                 g_set_error (error,
134                              DH_ERROR,
135                              DH_ERROR_MALFORMED_BOOK,
136                              _("\"title\", \"name\" and \"link\" elements are "
137                                "required at line %d, column %d"),
138                              line, col);
139                 g_free (title);
140                 return;
141         }
142 
143         if (!base) {
144                 base = g_path_get_dirname (parser->path);
145         }
146 
147         link = dh_link_new (DH_LINK_TYPE_BOOK,
148                             base,
149                             name,
150                             title,
151                             NULL,
152                             NULL,
153                             uri);
154         g_free (base);
155 
156         *parser->keywords = g_list_prepend (*parser->keywords, dh_link_ref (link));
157 
158         parser->book_node = g_node_new (dh_link_ref (link));
159         *parser->book_tree = parser->book_node;
160         parser->parent = parser->book_node;
161         g_free (title);
162         dh_link_unref (link);
163 }
164 
165 static void
parser_start_node_chapter(DhParser * parser,GMarkupParseContext * context,const gchar * node_name,const gchar ** attribute_names,const gchar ** attribute_values,GError ** error)166 parser_start_node_chapter (DhParser             *parser,
167                            GMarkupParseContext  *context,
168                            const gchar          *node_name,
169                            const gchar         **attribute_names,
170                            const gchar         **attribute_values,
171                            GError              **error)
172 {
173         gint         i;
174         gint         line, col;
175         const gchar *name = NULL;
176         const gchar *uri = NULL;
177 	DhLink      *link;
178         GNode       *node;
179 
180         if (g_ascii_strcasecmp (node_name, "sub") != 0) {
181                 g_markup_parse_context_get_position (context, &line, &col);
182                 g_set_error (error,
183                              DH_ERROR,
184                              DH_ERROR_MALFORMED_BOOK,
185                              _("Expected '%s', got '%s' at line %d, column %d"),
186                              "sub", node_name, line, col);
187                 return;
188         }
189 
190         for (i = 0; attribute_names[i]; ++i) {
191                 if (g_ascii_strcasecmp (attribute_names[i], "name") == 0) {
192                         name = attribute_values[i];
193                 }
194                 else if (g_ascii_strcasecmp (attribute_names[i], "link") == 0) {
195                         uri = attribute_values[i];
196                 }
197         }
198 
199         if (!name || !uri) {
200                 g_markup_parse_context_get_position (context, &line, &col);
201                 g_set_error (error,
202                              DH_ERROR,
203                              DH_ERROR_MALFORMED_BOOK,
204                              _("\"name\" and \"link\" elements are required "
205                                "inside <sub> on line %d, column %d"),
206                              line, col);
207                 return;
208         }
209 
210         link = dh_link_new (DH_LINK_TYPE_PAGE,
211                             NULL,
212                             NULL,
213                             name,
214                             parser->book_node->data,
215                             NULL,
216                             uri);
217 
218         *parser->keywords = g_list_prepend (*parser->keywords, link);
219 
220         node = g_node_new (link);
221         g_node_prepend (parser->parent, node);
222         parser->parent = node;
223 }
224 
225 static void
parser_start_node_keyword(DhParser * parser,GMarkupParseContext * context,const gchar * node_name,const gchar ** attribute_names,const gchar ** attribute_values,GError ** error)226 parser_start_node_keyword (DhParser             *parser,
227                            GMarkupParseContext  *context,
228                            const gchar          *node_name,
229                            const gchar         **attribute_names,
230                            const gchar         **attribute_values,
231                            GError              **error)
232 {
233         gint         i;
234         gint         line, col;
235         const gchar *name = NULL;
236         const gchar *uri = NULL;
237         const gchar *type = NULL;
238         const gchar *deprecated = NULL;
239         DhLinkType   link_type;
240 	DhLink      *link;
241         gchar       *tmp;
242 
243         if (parser->version == 2 &&
244             g_ascii_strcasecmp (node_name, "keyword") != 0) {
245                 g_markup_parse_context_get_position (context, &line, &col);
246                 g_set_error (error,
247                              DH_ERROR,
248                              DH_ERROR_MALFORMED_BOOK,
249                              _("Expected '%s', got '%s' at line %d, column %d"),
250                              "keyword", node_name, line, col);
251                 return;
252         }
253         else if (parser->version == 1 &&
254             g_ascii_strcasecmp (node_name, "function") != 0) {
255                 g_markup_parse_context_get_position (context, &line, &col);
256                 g_set_error (error,
257                              DH_ERROR,
258                              DH_ERROR_MALFORMED_BOOK,
259                              _("Expected '%s', got '%s' at line %d, column %d"),
260                              "function", node_name, line, col);
261                 return;
262         }
263 
264         for (i = 0; attribute_names[i]; ++i) {
265                 if (g_ascii_strcasecmp (attribute_names[i], "type") == 0) {
266                         type = attribute_values[i];
267                 }
268                 else if (g_ascii_strcasecmp (attribute_names[i], "name") == 0) {
269                         name = attribute_values[i];
270                 }
271                 else if (g_ascii_strcasecmp (attribute_names[i], "link") == 0) {
272                         uri = attribute_values[i];
273                 }
274                 else if (g_ascii_strcasecmp (attribute_names[i], "deprecated") == 0) {
275                         deprecated = attribute_values[i];
276                 }
277         }
278 
279         if (!name || !uri) {
280                 g_markup_parse_context_get_position (context, &line, &col);
281                 g_set_error (error,
282                              DH_ERROR,
283                              DH_ERROR_MALFORMED_BOOK,
284                              _("\"name\" and \"link\" elements are required "
285                                "inside '%s' on line %d, column %d"),
286                              parser->version == 2 ? "keyword" : "function",
287                              line, col);
288                 return;
289         }
290 
291         if (parser->version == 2 && !type) {
292                 /* Required */
293                 g_markup_parse_context_get_position (context, &line, &col);
294                 g_set_error (error,
295                              DH_ERROR,
296                              DH_ERROR_MALFORMED_BOOK,
297                              _("\"type\" element is required "
298                                "inside <keyword> on line %d, column %d"),
299                              line, col);
300                 return;
301         }
302 
303         if (parser->version == 2) {
304                 if (strcmp (type, "function") == 0) {
305                         link_type = DH_LINK_TYPE_FUNCTION;
306                 }
307                 else if (strcmp (type, "struct") == 0) {
308                         link_type = DH_LINK_TYPE_STRUCT;
309                 }
310                 else if (strcmp (type, "macro") == 0) {
311                         link_type = DH_LINK_TYPE_MACRO;
312                 }
313                 else if (strcmp (type, "enum") == 0) {
314                         link_type = DH_LINK_TYPE_ENUM;
315                 }
316                 else if (strcmp (type, "typedef") == 0) {
317                         link_type = DH_LINK_TYPE_TYPEDEF;
318                 } else {
319                         link_type = DH_LINK_TYPE_KEYWORD;
320                 }
321         } else {
322                 link_type = DH_LINK_TYPE_KEYWORD;
323         }
324 
325         /* Strip out trailing " () or "()". */
326         if (g_str_has_suffix (name, " ()")) {
327                 tmp = g_strndup (name, strlen (name) - 3);
328 
329                 if (link_type == DH_LINK_TYPE_KEYWORD) {
330                         link_type = DH_LINK_TYPE_FUNCTION;
331                 }
332                 name = tmp;
333         }
334         else if (g_str_has_suffix (name, "()")) {
335                 tmp = g_strndup (name, strlen (name) - 2);
336 
337                 /* With old devhelp format, take a guess that this is a
338                  * macro.
339                  */
340                 if (link_type == DH_LINK_TYPE_KEYWORD) {
341                         link_type = DH_LINK_TYPE_MACRO;
342                 }
343                 name = tmp;
344         } else {
345                 tmp = NULL;
346         }
347 
348         /* Strip out prefixing "struct", "union", "enum", to make searching
349          * easier. Also fix up the link type (only applies for old devhelp
350          * format).
351          */
352         if (g_str_has_prefix (name, "struct ")) {
353                 name = name + 7;
354                 if (link_type == DH_LINK_TYPE_KEYWORD) {
355                         link_type = DH_LINK_TYPE_STRUCT;
356                 }
357         }
358         else if (g_str_has_prefix (name, "union ")) {
359                 name = name + 6;
360                 if (link_type == DH_LINK_TYPE_KEYWORD) {
361                         link_type = DH_LINK_TYPE_STRUCT;
362                 }
363         }
364         else if (g_str_has_prefix (name, "enum ")) {
365                 name = name + 5;
366                 if (link_type == DH_LINK_TYPE_KEYWORD) {
367                         link_type = DH_LINK_TYPE_ENUM;
368                 }
369         }
370 
371         link = dh_link_new (link_type,
372                             NULL,
373                             NULL,
374                             name,
375                             parser->book_node->data,
376                             parser->parent->data,
377                             uri);
378 
379         g_free (tmp);
380 
381         if (deprecated) {
382                 dh_link_set_flags (
383                         link,
384                         dh_link_get_flags (link) | DH_LINK_FLAGS_DEPRECATED);
385         }
386 
387         *parser->keywords = g_list_prepend (*parser->keywords, link);
388 }
389 
390 static void
parser_start_node_cb(GMarkupParseContext * context,const gchar * node_name,const gchar ** attribute_names,const gchar ** attribute_values,gpointer user_data,GError ** error)391 parser_start_node_cb (GMarkupParseContext  *context,
392 		      const gchar          *node_name,
393 		      const gchar         **attribute_names,
394 		      const gchar         **attribute_values,
395 		      gpointer              user_data,
396 		      GError              **error)
397 {
398 	DhParser *parser = user_data;
399 
400         if (parser->parsing_keywords) {
401                 parser_start_node_keyword (parser,
402                                            context,
403                                            node_name,
404                                            attribute_names,
405                                            attribute_values,
406                                            error);
407                 return;
408         }
409         else if (parser->parsing_chapters) {
410                 parser_start_node_chapter (parser,
411                                            context,
412                                            node_name,
413                                            attribute_names,
414                                            attribute_values,
415                                            error);
416                 return;
417         }
418 	else if (g_ascii_strcasecmp (node_name, "functions") == 0) {
419 		parser->parsing_keywords = TRUE;
420 	}
421 	else if (g_ascii_strcasecmp (node_name, "chapters") == 0) {
422 		parser->parsing_chapters = TRUE;
423 	}
424 	if (!parser->book_node) {
425                 parser_start_node_book (parser,
426                                         context,
427                                         node_name,
428                                         attribute_names,
429                                         attribute_values,
430                                         error);
431 		return;
432 	}
433 }
434 
435 static void
parser_end_node_cb(GMarkupParseContext * context,const gchar * node_name,gpointer user_data,GError ** error)436 parser_end_node_cb (GMarkupParseContext  *context,
437 		    const gchar          *node_name,
438 		    gpointer              user_data,
439 		    GError              **error)
440 {
441 	DhParser *parser = user_data;
442 
443         if (parser->parsing_keywords) {
444                 if (g_ascii_strcasecmp (node_name, "functions") == 0) {
445 			parser->parsing_keywords = FALSE;
446 		}
447 	}
448 	else if (parser->parsing_chapters) {
449 		g_node_reverse_children (parser->parent);
450 		if (g_ascii_strcasecmp (node_name, "sub") == 0) {
451 			parser->parent = parser->parent->parent;
452 			/* Move up in the tree */
453 		}
454 		else if (g_ascii_strcasecmp (node_name, "chapters") == 0) {
455 			parser->parsing_chapters = FALSE;
456 		}
457 	}
458 }
459 
460 static void
parser_error_cb(GMarkupParseContext * context,GError * error,gpointer user_data)461 parser_error_cb (GMarkupParseContext *context,
462 		 GError              *error,
463 		 gpointer             user_data)
464 {
465 	DhParser *parser = user_data;
466 
467 	g_markup_parse_context_free (parser->context);
468  	parser->context = NULL;
469 }
470 
471 static gboolean
parser_read_gz_file(DhParser * parser,const gchar * path,GError ** error)472 parser_read_gz_file (DhParser     *parser,
473                      const gchar  *path,
474 		     GError      **error)
475 {
476 	gchar  buf[BYTES_PER_READ];
477 	gzFile file;
478 
479 	file = gzopen (path, "r");
480 	if (!file) {
481 		g_set_error (error,
482 			     DH_ERROR,
483 			     DH_ERROR_FILE_NOT_FOUND,
484 			     "%s", g_strerror (errno));
485 		return FALSE;
486 	}
487 
488 	while (TRUE) {
489 		gssize bytes_read;
490 
491 		bytes_read = gzread (file, buf, BYTES_PER_READ);
492 		if (bytes_read == -1) {
493 			gint         err;
494 			const gchar *message;
495 
496 			message = gzerror (file, &err);
497 			g_set_error (error,
498 				     DH_ERROR,
499 				     DH_ERROR_INTERNAL_ERROR,
500 				     _("Cannot uncompress book '%s': %s"),
501 				     path, message);
502 			return FALSE;
503 		}
504 
505 		g_markup_parse_context_parse (parser->context, buf,
506 					      bytes_read, error);
507 		if (error != NULL && *error != NULL) {
508 			return FALSE;
509 		}
510 		if (bytes_read < BYTES_PER_READ) {
511 			break;
512 		}
513 	}
514 
515 	gzclose (file);
516 
517 	return TRUE;
518 }
519 
520 gboolean
dh_parser_read_file(const gchar * path,GNode ** book_tree,GList ** keywords,GError ** error)521 dh_parser_read_file (const gchar  *path,
522 		     GNode       **book_tree,
523 		     GList       **keywords,
524 		     GError      **error)
525 {
526 	DhParser   *parser;
527         gboolean    gz;
528 	GIOChannel *io = NULL;
529 	gchar       buf[BYTES_PER_READ];
530 	gboolean    result = TRUE;
531 
532 	parser = g_new0 (DhParser, 1);
533 
534 	if (g_str_has_suffix (path, ".devhelp2")) {
535 		parser->version = 2;
536                 gz = FALSE;
537         }
538         else if (g_str_has_suffix (path, ".devhelp")) {
539 		parser->version = 1;
540                 gz = FALSE;
541         }
542         else if (g_str_has_suffix (path, ".devhelp2.gz")) {
543 		parser->version = 2;
544                 gz = TRUE;
545         } else {
546 		parser->version = 1;
547                 gz = TRUE;
548         }
549 
550 	parser->m_parser = g_new0 (GMarkupParser, 1);
551 
552 	parser->m_parser->start_element = parser_start_node_cb;
553 	parser->m_parser->end_element = parser_end_node_cb;
554 	parser->m_parser->error = parser_error_cb;
555 
556 	parser->context = g_markup_parse_context_new (parser->m_parser, 0,
557 						      parser, NULL);
558 
559 	parser->path = path;
560 	parser->book_tree = book_tree;
561 	parser->keywords = keywords;
562 
563         if (gz) {
564                 if (!parser_read_gz_file (parser,
565                                           path,
566                                           error)) {
567                         result = FALSE;
568                 }
569                 goto exit;
570 	} else {
571                 io = g_io_channel_new_file (path, "r", error);
572                 if (!io) {
573                         result = FALSE;
574                         goto exit;
575                 }
576 
577                 while (TRUE) {
578                         GIOStatus io_status;
579                         gsize     bytes_read;
580 
581                         io_status = g_io_channel_read_chars (io, buf, BYTES_PER_READ,
582                                                              &bytes_read, error);
583                         if (io_status == G_IO_STATUS_ERROR) {
584                                 result = FALSE;
585                                 goto exit;
586                         }
587                         if (io_status != G_IO_STATUS_NORMAL) {
588                                 break;
589                         }
590 
591                         g_markup_parse_context_parse (parser->context, buf,
592                                                       bytes_read, error);
593                         if (error != NULL && *error != NULL) {
594                                 result = FALSE;
595                                 goto exit;
596                         }
597 
598                         if (bytes_read < BYTES_PER_READ) {
599                                 break;
600                         }
601                 }
602         }
603 
604  exit:
605 	if (io) {
606                 g_io_channel_unref (io);
607         }
608 	dh_parser_free (parser);
609 
610 	return result;
611 }
612