1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 8 -*- */
2 /*
3 * Copyright (c) 2002-2003 Mikael Hallendal <micke@imendio.com>
4 * Copyright (c) 2002-2003 CodeFactory AB
5 * Copyright (C) 2005,2008 Imendio AB
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public
18 * License along with this program; if not, write to the
19 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
21 */
22
23 #include "config.h"
24 #include <string.h>
25 #include <errno.h>
26 #include <zlib.h>
27 #include <glib/gi18n-lib.h>
28
29 #include "dh-error.h"
30 #include "dh-link.h"
31 #include "dh-parser.h"
32
33 #define NAMESPACE "http://www.devhelp.net/book"
34 #define BYTES_PER_READ 4096
35
36 typedef struct {
37 GMarkupParser *m_parser;
38 GMarkupParseContext *context;
39
40 const gchar *path;
41
42 /* Top node of book */
43 GNode *book_node;
44
45 /* Current sub section node */
46 GNode *parent;
47
48 gboolean parsing_chapters;
49 gboolean parsing_keywords;
50
51 GNode **book_tree;
52 GList **keywords;
53
54 /* Version 2 uses <keyword> instead of <function>. */
55 gint version;
56 } DhParser;
57
58 static void
dh_parser_free(DhParser * parser)59 dh_parser_free (DhParser *parser)
60 {
61 // NOTE: priv->book_tree and priv->keywords do not need to be freed
62 // because they're only used to store the locations for the return
63 // params of dh_parser_read_file()
64
65 g_markup_parse_context_free (parser->context);
66 g_free (parser->m_parser);
67 g_free (parser);
68 }
69
70 static void
parser_start_node_book(DhParser * parser,GMarkupParseContext * context,const gchar * node_name,const gchar ** attribute_names,const gchar ** attribute_values,GError ** error)71 parser_start_node_book (DhParser *parser,
72 GMarkupParseContext *context,
73 const gchar *node_name,
74 const gchar **attribute_names,
75 const gchar **attribute_values,
76 GError **error)
77 {
78 gint i, j;
79 gint line, col;
80 gchar *title = NULL;
81 gchar *base = NULL;
82 const gchar *name = NULL;
83 const gchar *uri = NULL;
84 DhLink *link;
85
86 if (g_ascii_strcasecmp (node_name, "book") != 0) {
87 g_markup_parse_context_get_position (context, &line, &col);
88 g_set_error (error,
89 DH_ERROR,
90 DH_ERROR_MALFORMED_BOOK,
91 _("Expected '%s', got '%s' at line %d, column %d"),
92 "book", node_name, line, col);
93 return;
94 }
95
96 for (i = 0; attribute_names[i]; ++i) {
97 const gchar *xmlns;
98
99 if (g_ascii_strcasecmp (attribute_names[i], "xmlns") == 0) {
100 xmlns = attribute_values[i];
101 if (g_ascii_strcasecmp (xmlns, NAMESPACE) != 0) {
102 g_markup_parse_context_get_position (context,
103 &line,
104 &col);
105 g_set_error (error,
106 DH_ERROR,
107 DH_ERROR_MALFORMED_BOOK,
108 _("Invalid namespace '%s' at"
109 " line %d, column %d"),
110 xmlns, line, col);
111 return;
112 }
113 }
114 else if (g_ascii_strcasecmp (attribute_names[i], "name") == 0) {
115 name = attribute_values[i];
116 }
117 else if (g_ascii_strcasecmp (attribute_names[i], "title") == 0) {
118 title = g_strdup(attribute_values[i]);
119 for (j = 0; title[j]; j++) {
120 if (title[j] == '\n') title[j] = ' ';
121 }
122 }
123 else if (g_ascii_strcasecmp (attribute_names[i], "base") == 0) {
124 base = g_strdup (attribute_values[i]);
125 }
126 else if (g_ascii_strcasecmp (attribute_names[i], "link") == 0) {
127 uri = attribute_values[i];
128 }
129 }
130
131 if (!title || !name || !uri) {
132 g_markup_parse_context_get_position (context, &line, &col);
133 g_set_error (error,
134 DH_ERROR,
135 DH_ERROR_MALFORMED_BOOK,
136 _("\"title\", \"name\" and \"link\" elements are "
137 "required at line %d, column %d"),
138 line, col);
139 g_free (title);
140 return;
141 }
142
143 if (!base) {
144 base = g_path_get_dirname (parser->path);
145 }
146
147 link = dh_link_new (DH_LINK_TYPE_BOOK,
148 base,
149 name,
150 title,
151 NULL,
152 NULL,
153 uri);
154 g_free (base);
155
156 *parser->keywords = g_list_prepend (*parser->keywords, dh_link_ref (link));
157
158 parser->book_node = g_node_new (dh_link_ref (link));
159 *parser->book_tree = parser->book_node;
160 parser->parent = parser->book_node;
161 g_free (title);
162 dh_link_unref (link);
163 }
164
165 static void
parser_start_node_chapter(DhParser * parser,GMarkupParseContext * context,const gchar * node_name,const gchar ** attribute_names,const gchar ** attribute_values,GError ** error)166 parser_start_node_chapter (DhParser *parser,
167 GMarkupParseContext *context,
168 const gchar *node_name,
169 const gchar **attribute_names,
170 const gchar **attribute_values,
171 GError **error)
172 {
173 gint i;
174 gint line, col;
175 const gchar *name = NULL;
176 const gchar *uri = NULL;
177 DhLink *link;
178 GNode *node;
179
180 if (g_ascii_strcasecmp (node_name, "sub") != 0) {
181 g_markup_parse_context_get_position (context, &line, &col);
182 g_set_error (error,
183 DH_ERROR,
184 DH_ERROR_MALFORMED_BOOK,
185 _("Expected '%s', got '%s' at line %d, column %d"),
186 "sub", node_name, line, col);
187 return;
188 }
189
190 for (i = 0; attribute_names[i]; ++i) {
191 if (g_ascii_strcasecmp (attribute_names[i], "name") == 0) {
192 name = attribute_values[i];
193 }
194 else if (g_ascii_strcasecmp (attribute_names[i], "link") == 0) {
195 uri = attribute_values[i];
196 }
197 }
198
199 if (!name || !uri) {
200 g_markup_parse_context_get_position (context, &line, &col);
201 g_set_error (error,
202 DH_ERROR,
203 DH_ERROR_MALFORMED_BOOK,
204 _("\"name\" and \"link\" elements are required "
205 "inside <sub> on line %d, column %d"),
206 line, col);
207 return;
208 }
209
210 link = dh_link_new (DH_LINK_TYPE_PAGE,
211 NULL,
212 NULL,
213 name,
214 parser->book_node->data,
215 NULL,
216 uri);
217
218 *parser->keywords = g_list_prepend (*parser->keywords, link);
219
220 node = g_node_new (link);
221 g_node_prepend (parser->parent, node);
222 parser->parent = node;
223 }
224
225 static void
parser_start_node_keyword(DhParser * parser,GMarkupParseContext * context,const gchar * node_name,const gchar ** attribute_names,const gchar ** attribute_values,GError ** error)226 parser_start_node_keyword (DhParser *parser,
227 GMarkupParseContext *context,
228 const gchar *node_name,
229 const gchar **attribute_names,
230 const gchar **attribute_values,
231 GError **error)
232 {
233 gint i;
234 gint line, col;
235 const gchar *name = NULL;
236 const gchar *uri = NULL;
237 const gchar *type = NULL;
238 const gchar *deprecated = NULL;
239 DhLinkType link_type;
240 DhLink *link;
241 gchar *tmp;
242
243 if (parser->version == 2 &&
244 g_ascii_strcasecmp (node_name, "keyword") != 0) {
245 g_markup_parse_context_get_position (context, &line, &col);
246 g_set_error (error,
247 DH_ERROR,
248 DH_ERROR_MALFORMED_BOOK,
249 _("Expected '%s', got '%s' at line %d, column %d"),
250 "keyword", node_name, line, col);
251 return;
252 }
253 else if (parser->version == 1 &&
254 g_ascii_strcasecmp (node_name, "function") != 0) {
255 g_markup_parse_context_get_position (context, &line, &col);
256 g_set_error (error,
257 DH_ERROR,
258 DH_ERROR_MALFORMED_BOOK,
259 _("Expected '%s', got '%s' at line %d, column %d"),
260 "function", node_name, line, col);
261 return;
262 }
263
264 for (i = 0; attribute_names[i]; ++i) {
265 if (g_ascii_strcasecmp (attribute_names[i], "type") == 0) {
266 type = attribute_values[i];
267 }
268 else if (g_ascii_strcasecmp (attribute_names[i], "name") == 0) {
269 name = attribute_values[i];
270 }
271 else if (g_ascii_strcasecmp (attribute_names[i], "link") == 0) {
272 uri = attribute_values[i];
273 }
274 else if (g_ascii_strcasecmp (attribute_names[i], "deprecated") == 0) {
275 deprecated = attribute_values[i];
276 }
277 }
278
279 if (!name || !uri) {
280 g_markup_parse_context_get_position (context, &line, &col);
281 g_set_error (error,
282 DH_ERROR,
283 DH_ERROR_MALFORMED_BOOK,
284 _("\"name\" and \"link\" elements are required "
285 "inside '%s' on line %d, column %d"),
286 parser->version == 2 ? "keyword" : "function",
287 line, col);
288 return;
289 }
290
291 if (parser->version == 2 && !type) {
292 /* Required */
293 g_markup_parse_context_get_position (context, &line, &col);
294 g_set_error (error,
295 DH_ERROR,
296 DH_ERROR_MALFORMED_BOOK,
297 _("\"type\" element is required "
298 "inside <keyword> on line %d, column %d"),
299 line, col);
300 return;
301 }
302
303 if (parser->version == 2) {
304 if (strcmp (type, "function") == 0) {
305 link_type = DH_LINK_TYPE_FUNCTION;
306 }
307 else if (strcmp (type, "struct") == 0) {
308 link_type = DH_LINK_TYPE_STRUCT;
309 }
310 else if (strcmp (type, "macro") == 0) {
311 link_type = DH_LINK_TYPE_MACRO;
312 }
313 else if (strcmp (type, "enum") == 0) {
314 link_type = DH_LINK_TYPE_ENUM;
315 }
316 else if (strcmp (type, "typedef") == 0) {
317 link_type = DH_LINK_TYPE_TYPEDEF;
318 } else {
319 link_type = DH_LINK_TYPE_KEYWORD;
320 }
321 } else {
322 link_type = DH_LINK_TYPE_KEYWORD;
323 }
324
325 /* Strip out trailing " () or "()". */
326 if (g_str_has_suffix (name, " ()")) {
327 tmp = g_strndup (name, strlen (name) - 3);
328
329 if (link_type == DH_LINK_TYPE_KEYWORD) {
330 link_type = DH_LINK_TYPE_FUNCTION;
331 }
332 name = tmp;
333 }
334 else if (g_str_has_suffix (name, "()")) {
335 tmp = g_strndup (name, strlen (name) - 2);
336
337 /* With old devhelp format, take a guess that this is a
338 * macro.
339 */
340 if (link_type == DH_LINK_TYPE_KEYWORD) {
341 link_type = DH_LINK_TYPE_MACRO;
342 }
343 name = tmp;
344 } else {
345 tmp = NULL;
346 }
347
348 /* Strip out prefixing "struct", "union", "enum", to make searching
349 * easier. Also fix up the link type (only applies for old devhelp
350 * format).
351 */
352 if (g_str_has_prefix (name, "struct ")) {
353 name = name + 7;
354 if (link_type == DH_LINK_TYPE_KEYWORD) {
355 link_type = DH_LINK_TYPE_STRUCT;
356 }
357 }
358 else if (g_str_has_prefix (name, "union ")) {
359 name = name + 6;
360 if (link_type == DH_LINK_TYPE_KEYWORD) {
361 link_type = DH_LINK_TYPE_STRUCT;
362 }
363 }
364 else if (g_str_has_prefix (name, "enum ")) {
365 name = name + 5;
366 if (link_type == DH_LINK_TYPE_KEYWORD) {
367 link_type = DH_LINK_TYPE_ENUM;
368 }
369 }
370
371 link = dh_link_new (link_type,
372 NULL,
373 NULL,
374 name,
375 parser->book_node->data,
376 parser->parent->data,
377 uri);
378
379 g_free (tmp);
380
381 if (deprecated) {
382 dh_link_set_flags (
383 link,
384 dh_link_get_flags (link) | DH_LINK_FLAGS_DEPRECATED);
385 }
386
387 *parser->keywords = g_list_prepend (*parser->keywords, link);
388 }
389
390 static void
parser_start_node_cb(GMarkupParseContext * context,const gchar * node_name,const gchar ** attribute_names,const gchar ** attribute_values,gpointer user_data,GError ** error)391 parser_start_node_cb (GMarkupParseContext *context,
392 const gchar *node_name,
393 const gchar **attribute_names,
394 const gchar **attribute_values,
395 gpointer user_data,
396 GError **error)
397 {
398 DhParser *parser = user_data;
399
400 if (parser->parsing_keywords) {
401 parser_start_node_keyword (parser,
402 context,
403 node_name,
404 attribute_names,
405 attribute_values,
406 error);
407 return;
408 }
409 else if (parser->parsing_chapters) {
410 parser_start_node_chapter (parser,
411 context,
412 node_name,
413 attribute_names,
414 attribute_values,
415 error);
416 return;
417 }
418 else if (g_ascii_strcasecmp (node_name, "functions") == 0) {
419 parser->parsing_keywords = TRUE;
420 }
421 else if (g_ascii_strcasecmp (node_name, "chapters") == 0) {
422 parser->parsing_chapters = TRUE;
423 }
424 if (!parser->book_node) {
425 parser_start_node_book (parser,
426 context,
427 node_name,
428 attribute_names,
429 attribute_values,
430 error);
431 return;
432 }
433 }
434
435 static void
parser_end_node_cb(GMarkupParseContext * context,const gchar * node_name,gpointer user_data,GError ** error)436 parser_end_node_cb (GMarkupParseContext *context,
437 const gchar *node_name,
438 gpointer user_data,
439 GError **error)
440 {
441 DhParser *parser = user_data;
442
443 if (parser->parsing_keywords) {
444 if (g_ascii_strcasecmp (node_name, "functions") == 0) {
445 parser->parsing_keywords = FALSE;
446 }
447 }
448 else if (parser->parsing_chapters) {
449 g_node_reverse_children (parser->parent);
450 if (g_ascii_strcasecmp (node_name, "sub") == 0) {
451 parser->parent = parser->parent->parent;
452 /* Move up in the tree */
453 }
454 else if (g_ascii_strcasecmp (node_name, "chapters") == 0) {
455 parser->parsing_chapters = FALSE;
456 }
457 }
458 }
459
460 static void
parser_error_cb(GMarkupParseContext * context,GError * error,gpointer user_data)461 parser_error_cb (GMarkupParseContext *context,
462 GError *error,
463 gpointer user_data)
464 {
465 DhParser *parser = user_data;
466
467 g_markup_parse_context_free (parser->context);
468 parser->context = NULL;
469 }
470
471 static gboolean
parser_read_gz_file(DhParser * parser,const gchar * path,GError ** error)472 parser_read_gz_file (DhParser *parser,
473 const gchar *path,
474 GError **error)
475 {
476 gchar buf[BYTES_PER_READ];
477 gzFile file;
478
479 file = gzopen (path, "r");
480 if (!file) {
481 g_set_error (error,
482 DH_ERROR,
483 DH_ERROR_FILE_NOT_FOUND,
484 "%s", g_strerror (errno));
485 return FALSE;
486 }
487
488 while (TRUE) {
489 gssize bytes_read;
490
491 bytes_read = gzread (file, buf, BYTES_PER_READ);
492 if (bytes_read == -1) {
493 gint err;
494 const gchar *message;
495
496 message = gzerror (file, &err);
497 g_set_error (error,
498 DH_ERROR,
499 DH_ERROR_INTERNAL_ERROR,
500 _("Cannot uncompress book '%s': %s"),
501 path, message);
502 return FALSE;
503 }
504
505 g_markup_parse_context_parse (parser->context, buf,
506 bytes_read, error);
507 if (error != NULL && *error != NULL) {
508 return FALSE;
509 }
510 if (bytes_read < BYTES_PER_READ) {
511 break;
512 }
513 }
514
515 gzclose (file);
516
517 return TRUE;
518 }
519
520 gboolean
dh_parser_read_file(const gchar * path,GNode ** book_tree,GList ** keywords,GError ** error)521 dh_parser_read_file (const gchar *path,
522 GNode **book_tree,
523 GList **keywords,
524 GError **error)
525 {
526 DhParser *parser;
527 gboolean gz;
528 GIOChannel *io = NULL;
529 gchar buf[BYTES_PER_READ];
530 gboolean result = TRUE;
531
532 parser = g_new0 (DhParser, 1);
533
534 if (g_str_has_suffix (path, ".devhelp2")) {
535 parser->version = 2;
536 gz = FALSE;
537 }
538 else if (g_str_has_suffix (path, ".devhelp")) {
539 parser->version = 1;
540 gz = FALSE;
541 }
542 else if (g_str_has_suffix (path, ".devhelp2.gz")) {
543 parser->version = 2;
544 gz = TRUE;
545 } else {
546 parser->version = 1;
547 gz = TRUE;
548 }
549
550 parser->m_parser = g_new0 (GMarkupParser, 1);
551
552 parser->m_parser->start_element = parser_start_node_cb;
553 parser->m_parser->end_element = parser_end_node_cb;
554 parser->m_parser->error = parser_error_cb;
555
556 parser->context = g_markup_parse_context_new (parser->m_parser, 0,
557 parser, NULL);
558
559 parser->path = path;
560 parser->book_tree = book_tree;
561 parser->keywords = keywords;
562
563 if (gz) {
564 if (!parser_read_gz_file (parser,
565 path,
566 error)) {
567 result = FALSE;
568 }
569 goto exit;
570 } else {
571 io = g_io_channel_new_file (path, "r", error);
572 if (!io) {
573 result = FALSE;
574 goto exit;
575 }
576
577 while (TRUE) {
578 GIOStatus io_status;
579 gsize bytes_read;
580
581 io_status = g_io_channel_read_chars (io, buf, BYTES_PER_READ,
582 &bytes_read, error);
583 if (io_status == G_IO_STATUS_ERROR) {
584 result = FALSE;
585 goto exit;
586 }
587 if (io_status != G_IO_STATUS_NORMAL) {
588 break;
589 }
590
591 g_markup_parse_context_parse (parser->context, buf,
592 bytes_read, error);
593 if (error != NULL && *error != NULL) {
594 result = FALSE;
595 goto exit;
596 }
597
598 if (bytes_read < BYTES_PER_READ) {
599 break;
600 }
601 }
602 }
603
604 exit:
605 if (io) {
606 g_io_channel_unref (io);
607 }
608 dh_parser_free (parser);
609
610 return result;
611 }
612