1 /********************************************************************
2  * sixtp.c -- functions for XML parsing                             *
3  * Copyright (c) 2001 Gnumatic, Inc.                                *
4  *                                                                  *
5  * This program is free software; you can redistribute it and/or    *
6  * modify it under the terms of the GNU General Public License as   *
7  * published by the Free Software Foundation; either version 2 of   *
8  * the License, or (at your option) any later version.              *
9  *                                                                  *
10  * This program is distributed in the hope that it will be useful,  *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of   *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    *
13  * GNU General Public License for more details.                     *
14  *                                                                  *
15  * You should have received a copy of the GNU General Public License*
16  * along with this program; if not, contact:                        *
17  *                                                                  *
18  * Free Software Foundation           Voice:  +1-617-542-5942       *
19  * 51 Franklin Street, Fifth Floor    Fax:    +1-617-542-2652       *
20  * Boston, MA  02110-1301,  USA       gnu@gnu.org                   *
21  *                                                                  *
22  ********************************************************************/
23 #include <glib.h>
24 #include <glib/gstdio.h>
25 
26 extern "C"
27 {
28 #include <config.h>
29 #include <string.h>
30 #include <ctype.h>
31 #include <stdarg.h>
32 #include <stdio.h>
33 #include <sys/types.h>
34 #include <qoflog.h>
35 #ifdef _MSC_VER
36     typedef int ssize_t;
37 # define g_fopen fopen
38 #endif
39 }
40 
41 #include "sixtp.h"
42 #include "sixtp-parsers.h"
43 #include "sixtp-stack.h"
44 
45 #undef G_LOG_DOMAIN
46 #define G_LOG_DOMAIN "gnc.backend.file.sixtp"
47 static QofLogModule log_module = "gnc.backend.file.sixtp";
48 
49 extern const gchar* gnc_v2_xml_version_string;        /* see io-gncxml-v2.c */
50 
51 /************************************************************************/
52 gboolean
is_child_result_from_node_named(sixtp_child_result * cr,const char * tag)53 is_child_result_from_node_named (sixtp_child_result* cr, const char* tag)
54 {
55     return ((cr->type == SIXTP_CHILD_RESULT_NODE)
56             &&
57             (g_strcmp0 (cr->tag, tag) == 0));
58 }
59 
60 void
sixtp_child_free_data(sixtp_child_result * result)61 sixtp_child_free_data (sixtp_child_result* result)
62 {
63     if (result->data) g_free (result->data);
64 }
65 
66 void
sixtp_child_result_destroy(sixtp_child_result * r)67 sixtp_child_result_destroy (sixtp_child_result* r)
68 {
69     if (r->should_cleanup && r->cleanup_handler)
70     {
71         r->cleanup_handler (r);
72     }
73     if (r->type == SIXTP_CHILD_RESULT_NODE) g_free (r->tag);
74     g_free (r);
75 }
76 
77 void
sixtp_child_result_print(sixtp_child_result * cr,FILE * f)78 sixtp_child_result_print (sixtp_child_result* cr, FILE* f)
79 {
80     fprintf (f, "((tag %s) (data %p))",
81              cr->tag ? cr->tag : "(null)",
82              cr->data);
83 }
84 
85 /************************************************************************/
86 
87 
88 void
sixtp_set_start(sixtp * parser,sixtp_start_handler start_handler)89 sixtp_set_start (sixtp* parser, sixtp_start_handler start_handler)
90 {
91     parser->start_handler = start_handler;
92 }
93 
94 void
sixtp_set_before_child(sixtp * parser,sixtp_before_child_handler handler)95 sixtp_set_before_child (sixtp* parser, sixtp_before_child_handler handler)
96 {
97     parser->before_child = handler;
98 }
99 
100 void
sixtp_set_after_child(sixtp * parser,sixtp_after_child_handler handler)101 sixtp_set_after_child (sixtp* parser, sixtp_after_child_handler handler)
102 {
103     parser->after_child = handler;
104 }
105 
106 void
sixtp_set_end(sixtp * parser,sixtp_end_handler end_handler)107 sixtp_set_end (sixtp* parser, sixtp_end_handler end_handler)
108 {
109     parser->end_handler = end_handler;
110 }
111 
112 void
sixtp_set_chars(sixtp * parser,sixtp_characters_handler char_handler)113 sixtp_set_chars (sixtp* parser, sixtp_characters_handler char_handler)
114 {
115     parser->characters_handler = char_handler;
116 }
117 
118 void
sixtp_set_cleanup_result(sixtp * parser,sixtp_result_handler handler)119 sixtp_set_cleanup_result (sixtp* parser, sixtp_result_handler handler)
120 {
121     parser->cleanup_result = handler;
122 }
123 
124 void
sixtp_set_cleanup_chars(sixtp * parser,sixtp_result_handler handler)125 sixtp_set_cleanup_chars (sixtp* parser, sixtp_result_handler handler)
126 {
127     parser->cleanup_chars = handler;
128 }
129 
130 void
sixtp_set_fail(sixtp * parser,sixtp_fail_handler handler)131 sixtp_set_fail (sixtp* parser,
132                 sixtp_fail_handler handler)
133 {
134     parser->fail_handler = handler;
135 }
136 
137 void
sixtp_set_result_fail(sixtp * parser,sixtp_result_handler handler)138 sixtp_set_result_fail (sixtp* parser, sixtp_result_handler handler)
139 {
140     parser->result_fail_handler = handler;
141 }
142 
143 void
sixtp_set_chars_fail(sixtp * parser,sixtp_result_handler handler)144 sixtp_set_chars_fail (sixtp* parser, sixtp_result_handler handler)
145 {
146     parser->chars_fail_handler = handler;
147 }
148 
149 sixtp*
sixtp_new(void)150 sixtp_new (void)
151 {
152     sixtp* s = g_new0 (sixtp, 1);
153 
154     if (s)
155     {
156         s->child_parsers = g_hash_table_new (g_str_hash, g_str_equal);
157         if (!s->child_parsers)
158         {
159             g_free (s);
160             s = NULL;
161         }
162     }
163     return (s);
164 }
165 
166 sixtp*
sixtp_set_any(sixtp * tochange,int cleanup,...)167 sixtp_set_any (sixtp* tochange, int cleanup, ...)
168 {
169     va_list ap;
170     sixtp_handler_type type;
171 
172     if (!tochange)
173     {
174         PWARN ("Null tochange passed");
175         return NULL;
176     }
177 
178     va_start (ap, cleanup);
179 
180     do
181     {
182         type = static_cast<decltype (type)> (va_arg (ap, int));
183 
184         switch (type)
185         {
186         case SIXTP_NO_MORE_HANDLERS:
187             va_end (ap);
188             return tochange;
189 
190         case SIXTP_START_HANDLER_ID:
191             sixtp_set_start (tochange, va_arg (ap, sixtp_start_handler));
192             break;
193 
194         case SIXTP_BEFORE_CHILD_HANDLER_ID:
195             sixtp_set_before_child (tochange,
196                                     va_arg (ap, sixtp_before_child_handler));
197             break;
198 
199         case SIXTP_AFTER_CHILD_HANDLER_ID:
200             sixtp_set_after_child (tochange,
201                                    va_arg (ap, sixtp_after_child_handler));
202             break;
203 
204         case SIXTP_END_HANDLER_ID:
205             sixtp_set_end (tochange, va_arg (ap, sixtp_end_handler));
206             break;
207 
208         case SIXTP_CHARACTERS_HANDLER_ID:
209             sixtp_set_chars (tochange, va_arg (ap, sixtp_characters_handler));
210             break;
211 
212         case SIXTP_FAIL_HANDLER_ID:
213             sixtp_set_fail (tochange, va_arg (ap, sixtp_fail_handler));
214             break;
215 
216         case SIXTP_CLEANUP_RESULT_ID:
217             sixtp_set_cleanup_result (tochange,
218                                       va_arg (ap, sixtp_result_handler));
219             break;
220 
221         case SIXTP_CLEANUP_CHARS_ID:
222             sixtp_set_cleanup_chars (tochange,
223                                      va_arg (ap, sixtp_result_handler));
224             break;
225 
226         case SIXTP_RESULT_FAIL_ID:
227             sixtp_set_result_fail (tochange, va_arg (ap, sixtp_result_handler));
228             break;
229 
230         case SIXTP_CHARS_FAIL_ID:
231             sixtp_set_chars_fail (tochange, va_arg (ap, sixtp_result_handler));
232             break;
233 
234         default:
235             va_end (ap);
236             g_critical ("Bogus sixtp type %d", type);
237             if (cleanup)
238             {
239                 sixtp_destroy (tochange);
240             }
241             return NULL;
242         }
243     }
244     while (1);
245 
246     va_end (ap);
247     return tochange;
248 }
249 
250 static void sixtp_destroy_child (gpointer key, gpointer value,
251                                  gpointer user_data);
252 
253 static void
sixtp_destroy_node(sixtp * sp,GHashTable * corpses)254 sixtp_destroy_node (sixtp* sp, GHashTable* corpses)
255 {
256     g_return_if_fail (sp);
257     g_return_if_fail (corpses);
258     g_hash_table_foreach (sp->child_parsers, sixtp_destroy_child, corpses);
259     g_hash_table_destroy (sp->child_parsers);
260     g_free (sp);
261 }
262 
263 static void
sixtp_destroy_child(gpointer key,gpointer value,gpointer user_data)264 sixtp_destroy_child (gpointer key, gpointer value, gpointer user_data)
265 {
266     GHashTable* corpses = (GHashTable*) user_data;
267     sixtp* child = (sixtp*) value;
268     gpointer lookup_key;
269     gpointer lookup_value;
270 
271     DEBUG ("Killing sixtp child under key <%s>", key ? (char*) key : "(null)");
272 
273     if (!corpses)
274     {
275         g_critical ("no corpses in sixtp_destroy_child <%s>",
276                     key ? (char*) key : "(null)");
277         g_free (key);
278         return;
279     }
280     if (!child)
281     {
282         g_critical ("no child in sixtp_destroy_child <%s>",
283                     key ? (char*) key : "");
284         g_free (key);
285         return;
286     }
287     g_free (key);
288 
289     if (!g_hash_table_lookup_extended (corpses, (gconstpointer) child,
290                                        &lookup_key, &lookup_value))
291     {
292         /* haven't killed this one yet. */
293         g_hash_table_insert (corpses, child, (gpointer) 1);
294         sixtp_destroy_node (child, corpses);
295     }
296 }
297 
298 void
sixtp_destroy(sixtp * sp)299 sixtp_destroy (sixtp* sp)
300 {
301     GHashTable* corpses;
302     g_return_if_fail (sp);
303     corpses = g_hash_table_new (g_direct_hash, g_direct_equal);
304     sixtp_destroy_node (sp, corpses);
305     g_hash_table_destroy (corpses);
306 }
307 
308 
309 /***********************************************************************/
310 
311 gboolean
sixtp_add_sub_parser(sixtp * parser,const gchar * tag,sixtp * sub_parser)312 sixtp_add_sub_parser (sixtp* parser, const gchar* tag, sixtp* sub_parser)
313 {
314     g_return_val_if_fail (parser, FALSE);
315     g_return_val_if_fail (tag, FALSE);
316     g_return_val_if_fail (sub_parser, FALSE);
317 
318     g_hash_table_insert (parser->child_parsers,
319                          g_strdup (tag), (gpointer) sub_parser);
320     return (TRUE);
321 }
322 
323 /*
324  * This is a bit complex because of having to make sure to
325  * cleanup things we haven't looked at on an error condition
326  */
327 sixtp*
sixtp_add_some_sub_parsers(sixtp * tochange,int cleanup,...)328 sixtp_add_some_sub_parsers (sixtp* tochange, int cleanup, ...)
329 {
330     int have_error;
331     va_list ap;
332     char* tag;
333     sixtp* handler;
334 
335     va_start (ap, cleanup);
336 
337     have_error = 0;
338 
339     if (!tochange)
340     {
341         have_error = 1;
342     }
343 
344     do
345     {
346         tag = va_arg (ap, char*);
347         if (!tag)
348         {
349             break;
350         }
351 
352         handler = va_arg (ap, sixtp*);
353         if (!handler)
354         {
355             PWARN ("Handler for tag %s is null",
356                        tag ? tag : "(null)");
357 
358             if (cleanup)
359             {
360                 sixtp_destroy (tochange);
361                 tochange = NULL;
362                 have_error = 1;
363             }
364             else
365             {
366                 va_end (ap);
367                 return NULL;
368             }
369         }
370 
371         if (have_error)
372         {
373             sixtp_destroy (handler);
374         }
375         else
376         {
377             sixtp_add_sub_parser (tochange, tag, handler);
378         }
379     }
380     while (1);
381 
382     va_end (ap);
383     return tochange;
384 }
385 
386 /************************************************************************/
387 
388 void
sixtp_sax_start_handler(void * user_data,const xmlChar * name,const xmlChar ** attrs)389 sixtp_sax_start_handler (void* user_data,
390                          const xmlChar* name,
391                          const xmlChar** attrs)
392 {
393     sixtp_sax_data* pdata = (sixtp_sax_data*) user_data;
394     sixtp_stack_frame* current_frame = NULL;
395     sixtp* current_parser = NULL;
396     sixtp* next_parser = NULL;
397     gchar* next_parser_tag = NULL;
398     gboolean lookup_success = FALSE;
399     sixtp_stack_frame* new_frame = NULL;
400 
401     current_frame = (sixtp_stack_frame*) pdata->stack->data;
402     current_parser = current_frame->parser;
403 
404     /* Use an extended lookup so we can get *our* copy of the key.
405        Since we've strduped it, we know its lifetime... */
406     lookup_success =
407         g_hash_table_lookup_extended (current_parser->child_parsers,
408                                       name,
409                                       reinterpret_cast<void**> (&next_parser_tag),
410                                       reinterpret_cast<void**> (&next_parser));
411 
412 
413     if (!lookup_success)
414     {
415         /* magic catch all value */
416         lookup_success = g_hash_table_lookup_extended (
417                              current_parser->child_parsers, SIXTP_MAGIC_CATCHER,
418                              reinterpret_cast<void**> (&next_parser_tag),
419                              reinterpret_cast<void**> (&next_parser));
420         if (!lookup_success)
421         {
422             g_critical ("Tag <%s> not allowed in current context.",
423                         name ? (char*) name : "(null)");
424             pdata->parsing_ok = FALSE;
425             next_parser = pdata->bad_xml_parser;
426         }
427     }
428 
429     if (current_frame->parser->before_child)
430     {
431         GSList* parent_data_from_children = NULL;
432         gpointer parent_data_for_children = NULL;
433 
434         if (g_slist_length (pdata->stack) > 1)
435         {
436             /* we're not in the top level node */
437             sixtp_stack_frame* parent_frame =
438                 (sixtp_stack_frame*) pdata->stack->next->data;
439             parent_data_from_children = static_cast<decltype (parent_data_from_children)>
440                                         (parent_frame->data_from_children);
441         }
442 
443         pdata->parsing_ok &=
444             current_frame->parser->before_child (current_frame->data_for_children,
445                                                  current_frame->data_from_children,
446                                                  parent_data_from_children,
447                                                  parent_data_for_children,
448                                                  pdata->global_data,
449                                                  & (current_frame->frame_data),
450                                                  current_frame->tag,
451                                                  (gchar*) name);
452     }
453 
454     /* now allocate the new stack frame and shift to it */
455     new_frame = sixtp_stack_frame_new (next_parser, g_strdup ((char*) name));
456 
457     new_frame->line = xmlSAX2GetLineNumber (pdata->saxParserCtxt);
458     new_frame->col  = xmlSAX2GetColumnNumber (pdata->saxParserCtxt);
459 
460     pdata->stack = g_slist_prepend (pdata->stack, (gpointer) new_frame);
461 
462     if (next_parser->start_handler)
463     {
464         pdata->parsing_ok &=
465             next_parser->start_handler (current_frame->data_from_children,
466                                         current_frame->data_for_children,
467                                         pdata->global_data,
468                                         &new_frame->data_for_children,
469                                         &new_frame->frame_data,
470                                         (gchar*) name,
471                                         (gchar**)attrs);
472     }
473 }
474 
475 void
sixtp_sax_characters_handler(void * user_data,const xmlChar * text,int len)476 sixtp_sax_characters_handler (void* user_data, const xmlChar* text, int len)
477 {
478     sixtp_sax_data* pdata = (sixtp_sax_data*) user_data;
479     sixtp_stack_frame* frame;
480 
481     frame = (sixtp_stack_frame*) pdata->stack->data;
482     if (frame->parser->characters_handler)
483     {
484         gpointer result = NULL;
485 
486         pdata->parsing_ok &=
487             frame->parser->characters_handler (frame->data_from_children,
488                                                frame->data_for_children,
489                                                pdata->global_data,
490                                                &result,
491                                                (gchar*) text,
492                                                len);
493         if (pdata->parsing_ok && result)
494         {
495             /* push the result onto the current "child" list. */
496             sixtp_child_result* child_data = g_new0 (sixtp_child_result, 1);
497 
498             child_data->type = SIXTP_CHILD_RESULT_CHARS;
499             child_data->tag = NULL;
500             child_data->data = result;
501             child_data->should_cleanup = TRUE;
502             child_data->cleanup_handler = frame->parser->cleanup_chars;
503             child_data->fail_handler = frame->parser->chars_fail_handler;
504             frame->data_from_children = g_slist_prepend (frame->data_from_children,
505                                                          child_data);
506         }
507     }
508 }
509 
510 void
sixtp_sax_end_handler(void * user_data,const xmlChar * name)511 sixtp_sax_end_handler (void* user_data, const xmlChar* name)
512 {
513     sixtp_sax_data* pdata = (sixtp_sax_data*) user_data;
514     sixtp_stack_frame* current_frame;
515     sixtp_stack_frame* parent_frame;
516     sixtp_child_result* child_result_data = NULL;
517     gchar* end_tag = NULL;
518 
519     current_frame = (sixtp_stack_frame*) pdata->stack->data;
520     parent_frame = (sixtp_stack_frame*) pdata->stack->next->data;
521 
522     /* time to make sure we got the right closing tag.  Is this really
523        necessary? */
524     if (g_strcmp0 (current_frame->tag, (gchar*) name) != 0)
525     {
526         PWARN ("bad closing tag (start <%s>, end <%s>)", current_frame->tag, name);
527         pdata->parsing_ok = FALSE;
528 
529         /* See if we're just off by one and try to recover */
530         if (g_strcmp0 (parent_frame->tag, (gchar*) name) == 0)
531         {
532             pdata->stack = sixtp_pop_and_destroy_frame (pdata->stack);
533             current_frame = (sixtp_stack_frame*) pdata->stack->data;
534             parent_frame = (sixtp_stack_frame*) pdata->stack->next->data;
535             PWARN ("found matching start <%s> tag up one level", name);
536         }
537     }
538 
539     /* tag's OK, proceed. */
540     if (current_frame->parser->end_handler)
541     {
542         pdata->parsing_ok &=
543             current_frame->parser->end_handler (current_frame->data_for_children,
544                                                 current_frame->data_from_children,
545                                                 parent_frame->data_from_children,
546                                                 parent_frame->data_for_children,
547                                                 pdata->global_data,
548                                                 &current_frame->frame_data,
549                                                 current_frame->tag);
550     }
551 
552     if (current_frame->frame_data)
553     {
554         /* push the result onto the parent's child result list. */
555         child_result_data = g_new (sixtp_child_result, 1);
556 
557         child_result_data->type = SIXTP_CHILD_RESULT_NODE;
558         child_result_data->tag = g_strdup (current_frame->tag);
559         child_result_data->data = current_frame->frame_data;
560         child_result_data->should_cleanup = TRUE;
561         child_result_data->cleanup_handler = current_frame->parser->cleanup_result;
562         child_result_data->fail_handler =
563             current_frame->parser->result_fail_handler;
564         parent_frame->data_from_children =
565             g_slist_prepend (parent_frame->data_from_children, child_result_data);
566     }
567 
568     /* grab it before it goes away - we own the reference */
569     end_tag = current_frame->tag;
570 
571     DEBUG ("Finished with end of <%s>", end_tag ? end_tag : "(null)");
572 
573     /*sixtp_print_frame_stack(pdata->stack, stderr);*/
574 
575     pdata->stack = sixtp_pop_and_destroy_frame (pdata->stack);
576 
577     /* reset pointer after stack pop */
578     current_frame = (sixtp_stack_frame*) pdata->stack->data;
579     /* reset the parent, checking to see if we're at the top level node */
580     parent_frame = (sixtp_stack_frame*)
581                    ((g_slist_length (pdata->stack) > 1) ? (pdata->stack->next->data) : NULL);
582 
583     if (current_frame->parser->after_child)
584     {
585         /* reset pointer after stack pop */
586         GSList* parent_data_from_children = NULL;
587         gpointer parent_data_for_children = NULL;
588 
589         if (parent_frame)
590         {
591             /* we're not in the top level node */
592             sixtp_stack_frame* parent_frame =
593                 (sixtp_stack_frame*) pdata->stack->next->data;
594             parent_data_from_children = static_cast<decltype (parent_data_from_children)>
595                                         (parent_frame->data_for_children);
596         }
597 
598         pdata->parsing_ok &=
599             current_frame->parser->after_child (current_frame->data_for_children,
600                                                 current_frame->data_from_children,
601                                                 parent_data_from_children,
602                                                 parent_data_for_children,
603                                                 pdata->global_data,
604                                                 & (current_frame->frame_data),
605                                                 current_frame->tag,
606                                                 end_tag,
607                                                 child_result_data);
608     }
609 
610     g_free (end_tag);
611 }
612 
613 xmlEntityPtr
sixtp_sax_get_entity_handler(void * user_data,const xmlChar * name)614 sixtp_sax_get_entity_handler (void* user_data, const xmlChar* name)
615 {
616     return xmlGetPredefinedEntity (name);
617 }
618 
619 
620 void
sixtp_handle_catastrophe(sixtp_sax_data * sax_data)621 sixtp_handle_catastrophe (sixtp_sax_data* sax_data)
622 {
623     /* Something has gone wrong.  To handle it, we have to traverse the
624        stack, calling, at each level, the frame failure handler (the
625        handler for the current, unfinished block) and then the sibling
626        handlers.  The order is reverse chronological - oldest child
627        results cleaned up last.  This holds overall as well, stack
628        frames are cleaned up in their order on the stack which will be
629        youngest to oldest.  */
630 
631     GSList* lp;
632     GSList** stack = & (sax_data->stack);
633 
634     g_critical ("parse failed at:");
635     sixtp_print_frame_stack (sax_data->stack, stderr);
636 
637     while (*stack)
638     {
639         sixtp_stack_frame* current_frame = (sixtp_stack_frame*) (*stack)->data;
640 
641         /* cleanup the current frame */
642         if (current_frame->parser->fail_handler)
643         {
644             GSList* sibling_data;
645             gpointer parent_data;
646 
647             if ((*stack)->next == NULL)
648             {
649                 /* This is the top of the stack... */
650                 parent_data = NULL;
651                 sibling_data = NULL;
652             }
653             else
654             {
655                 sixtp_stack_frame* parent_frame =
656                     (sixtp_stack_frame*) (*stack)->next->data;
657                 parent_data = parent_frame->data_for_children;
658                 sibling_data = parent_frame->data_from_children;
659             }
660 
661             current_frame->parser->fail_handler (current_frame->data_for_children,
662                                                  current_frame->data_from_children,
663                                                  sibling_data,
664                                                  parent_data,
665                                                  sax_data->global_data,
666                                                  &current_frame->frame_data,
667                                                  current_frame->tag);
668         }
669 
670         /* now cleanup any children's results */
671         for (lp = current_frame->data_from_children; lp; lp = lp->next)
672         {
673             sixtp_child_result* cresult = (sixtp_child_result*) lp->data;
674             if (cresult->fail_handler)
675             {
676                 cresult->fail_handler (cresult);
677             }
678         }
679 
680         if ((*stack)->next == NULL)
681         {
682             /* This is the top of the stack. The top frame seems to want to
683              * be destroyed by sixtp_context_destroy. */
684             break;
685         }
686 
687         *stack = sixtp_pop_and_destroy_frame (*stack);
688     }
689 }
690 
691 static gboolean
gnc_bad_xml_end_handler(gpointer data_for_children,GSList * data_from_children,GSList * sibling_data,gpointer parent_data,gpointer global_data,gpointer * result,const gchar * tag)692 gnc_bad_xml_end_handler (gpointer data_for_children,
693                          GSList* data_from_children, GSList* sibling_data,
694                          gpointer parent_data, gpointer global_data,
695                          gpointer* result, const gchar* tag)
696 {
697     return TRUE;
698 }
699 
700 static gboolean
sixtp_parse_file_common(sixtp * sixtp,xmlParserCtxtPtr xml_context,gpointer data_for_top_level,gpointer global_data,gpointer * parse_result)701 sixtp_parse_file_common (sixtp* sixtp,
702                          xmlParserCtxtPtr xml_context,
703                          gpointer data_for_top_level,
704                          gpointer global_data,
705                          gpointer* parse_result)
706 {
707     sixtp_parser_context* ctxt;
708     int parse_ret;
709 
710     if (! (ctxt = sixtp_context_new (sixtp, global_data, data_for_top_level)))
711     {
712         g_critical ("sixtp_context_new returned null");
713         return FALSE;
714     }
715 
716     ctxt->data.saxParserCtxt = xml_context;
717     ctxt->data.saxParserCtxt->sax = &ctxt->handler;
718     ctxt->data.saxParserCtxt->userData = &ctxt->data;
719     ctxt->data.bad_xml_parser = sixtp_dom_parser_new (gnc_bad_xml_end_handler,
720                                                       NULL, NULL);
721     parse_ret = xmlParseDocument (ctxt->data.saxParserCtxt);
722     //xmlSAXUserParseFile(&ctxt->handler, &ctxt->data, filename);
723 
724     sixtp_context_run_end_handler (ctxt);
725 
726     if (parse_ret == 0 && ctxt->data.parsing_ok)
727     {
728         if (parse_result)
729             *parse_result = ctxt->top_frame->frame_data;
730         sixtp_context_destroy (ctxt);
731         return TRUE;
732     }
733     else
734     {
735         if (parse_result)
736             *parse_result = NULL;
737         if (g_slist_length (ctxt->data.stack) > 1)
738             sixtp_handle_catastrophe (&ctxt->data);
739         sixtp_context_destroy (ctxt);
740         return FALSE;
741     }
742 }
743 
744 gboolean
sixtp_parse_file(sixtp * sixtp,const char * filename,gpointer data_for_top_level,gpointer global_data,gpointer * parse_result)745 sixtp_parse_file (sixtp* sixtp,
746                   const char* filename,
747                   gpointer data_for_top_level,
748                   gpointer global_data,
749                   gpointer* parse_result)
750 {
751     gboolean ret;
752     xmlParserCtxtPtr context;
753 
754 #ifdef G_OS_WIN32
755     {
756         gchar* conv_name = g_win32_locale_filename_from_utf8 (filename);
757         if (!conv_name)
758         {
759             PWARN ("Could not convert '%s' to system codepage", filename);
760             return FALSE;
761         }
762         context = xmlCreateFileParserCtxt (conv_name);
763         g_free (conv_name);
764     }
765 #else
766     context = xmlCreateFileParserCtxt (filename);
767 #endif
768     ret = sixtp_parse_file_common (sixtp, context, data_for_top_level,
769                                    global_data, parse_result);
770     return ret;
771 }
772 
773 /* Call back function for libxml2 to read from compressed or uncompressed stream */
774 static int
sixtp_parser_read(void * context,char * buffer,int len)775 sixtp_parser_read (void* context, char* buffer, int len)
776 {
777     int ret;
778 
779     ret = fread (&buffer[0], sizeof (char), len, (FILE*) context);
780     if (ret < 0)
781         PWARN ("Error reading XML file");
782     return ret;
783 }
784 
785 gboolean
sixtp_parse_fd(sixtp * sixtp,FILE * fd,gpointer data_for_top_level,gpointer global_data,gpointer * parse_result)786 sixtp_parse_fd (sixtp* sixtp,
787                 FILE* fd,
788                 gpointer data_for_top_level,
789                 gpointer global_data,
790                 gpointer* parse_result)
791 {
792     gboolean ret;
793     xmlParserCtxtPtr context = xmlCreateIOParserCtxt (NULL, NULL,
794                                                       sixtp_parser_read, NULL /*no close */, fd,
795                                                       XML_CHAR_ENCODING_NONE);
796     ret = sixtp_parse_file_common (sixtp, context, data_for_top_level,
797                                    global_data, parse_result);
798     return ret;
799 }
800 
801 gboolean
sixtp_parse_buffer(sixtp * sixtp,char * bufp,int bufsz,gpointer data_for_top_level,gpointer global_data,gpointer * parse_result)802 sixtp_parse_buffer (sixtp* sixtp,
803                     char* bufp,
804                     int bufsz,
805                     gpointer data_for_top_level,
806                     gpointer global_data,
807                     gpointer* parse_result)
808 {
809     gboolean ret;
810     xmlParserCtxtPtr context = xmlCreateMemoryParserCtxt (bufp, bufsz);
811     ret = sixtp_parse_file_common (sixtp, context, data_for_top_level,
812                                    global_data, parse_result);
813     return ret;
814 }
815 
816 gboolean
sixtp_parse_push(sixtp * sixtp,sixtp_push_handler push_handler,gpointer push_user_data,gpointer data_for_top_level,gpointer global_data,gpointer * parse_result)817 sixtp_parse_push (sixtp* sixtp,
818                   sixtp_push_handler push_handler,
819                   gpointer push_user_data,
820                   gpointer data_for_top_level,
821                   gpointer global_data,
822                   gpointer* parse_result)
823 {
824     sixtp_parser_context* ctxt;
825     xmlParserCtxtPtr xml_context;
826 
827     if (!push_handler)
828     {
829         g_critical ("No push handler specified");
830         return FALSE;
831     }
832 
833     if (! (ctxt = sixtp_context_new (sixtp, global_data, data_for_top_level)))
834     {
835         g_critical ("sixtp_context_new returned null");
836         return FALSE;
837     }
838 
839     xml_context = xmlCreatePushParserCtxt (&ctxt->handler, &ctxt->data,
840                                            NULL, 0, NULL);
841     ctxt->data.saxParserCtxt = xml_context;
842     ctxt->data.bad_xml_parser = sixtp_dom_parser_new (gnc_bad_xml_end_handler,
843                                                       NULL, NULL);
844 
845     (*push_handler) (xml_context, push_user_data);
846 
847     sixtp_context_run_end_handler (ctxt);
848 
849     if (ctxt->data.parsing_ok)
850     {
851         if (parse_result)
852             *parse_result = ctxt->top_frame->frame_data;
853         sixtp_context_destroy (ctxt);
854         return TRUE;
855     }
856     else
857     {
858         if (parse_result)
859             *parse_result = NULL;
860         if (g_slist_length (ctxt->data.stack) > 1)
861             sixtp_handle_catastrophe (&ctxt->data);
862         sixtp_context_destroy (ctxt);
863         return FALSE;
864     }
865 }
866 
867 /***********************************************************************/
868 static gboolean
eat_whitespace(char ** cursor)869 eat_whitespace (char** cursor)
870 {
871     while (**cursor && isspace (**cursor))
872     {
873         (*cursor)++;
874     }
875 
876     if (**cursor == '\0')
877     {
878         return FALSE;
879     }
880     else
881     {
882         return TRUE;
883     }
884 }
885 
886 static gboolean
search_for(unsigned char marker,char ** cursor)887 search_for (unsigned char marker, char** cursor)
888 {
889     while (**cursor &&** cursor != marker)
890     {
891         (*cursor)++;
892     }
893 
894     if (**cursor == '\0')
895     {
896         return FALSE;
897     }
898     else
899     {
900         (*cursor)++;
901         return TRUE;
902     }
903 }
904 
905 QofBookFileType
gnc_is_our_xml_file(const char * filename,gboolean * with_encoding)906 gnc_is_our_xml_file (const char* filename, gboolean* with_encoding)
907 {
908     FILE* f = NULL;
909     char first_chunk[256];
910     ssize_t num_read;
911 
912     g_return_val_if_fail (filename, GNC_BOOK_NOT_OURS);
913 
914     f = g_fopen (filename, "r");
915     if (f == NULL)
916     {
917         return GNC_BOOK_NOT_OURS;
918     }
919 
920     num_read = fread (first_chunk, sizeof (char), sizeof (first_chunk) - 1, f);
921     fclose (f);
922 
923     if (num_read == 0)
924     {
925         return GNC_BOOK_NOT_OURS;
926     }
927 
928     first_chunk[num_read] = '\0';
929 
930     return gnc_is_our_first_xml_chunk (first_chunk, with_encoding);
931 }
932 
933 QofBookFileType
gnc_is_our_first_xml_chunk(char * chunk,gboolean * with_encoding)934 gnc_is_our_first_xml_chunk (char* chunk, gboolean* with_encoding)
935 {
936     char* cursor = NULL;
937     size_t n;
938 
939     if (with_encoding)
940     {
941         *with_encoding = FALSE;
942     }
943 
944     cursor = chunk;
945 
946     if (!eat_whitespace (&cursor))
947     {
948         return GNC_BOOK_NOT_OURS;
949     }
950 
951     if (strncmp (cursor, "<?xml", 5) == 0)
952     {
953         if (!search_for ('>', &cursor))
954         {
955             return GNC_BOOK_NOT_OURS;
956         }
957 
958         if (!eat_whitespace (&cursor))
959         {
960             return GNC_BOOK_NOT_OURS;
961         }
962 
963         if (*cursor != '<')
964         {
965             return GNC_BOOK_NOT_OURS;
966         }
967 
968         n = strlen (gnc_v2_xml_version_string);
969         if ((strncmp (cursor + 1, gnc_v2_xml_version_string, n) == 0)
970             && isspace (* (cursor + 1 + n)))
971         {
972             if (with_encoding)
973             {
974                 *cursor = '\0';
975                 cursor = chunk;
976                 while (search_for ('e', &cursor))
977                 {
978                     if (strncmp (cursor, "ncoding=", 8) == 0)
979                     {
980                         *with_encoding = TRUE;
981                         break;
982                     }
983                 }
984             }
985             return GNC_BOOK_XML2_FILE;
986         }
987 
988         if (strncmp (cursor, "<gnc>", strlen ("<gnc>")) == 0)
989             return GNC_BOOK_XML1_FILE;
990 
991         /* If it doesn't match any of the above but has '<gnc-v...', it must */
992         /* be a later version */
993         if (strncmp (cursor, "<gnc-v", strlen ("<gnc-v")) == 0)
994             return GNC_BOOK_POST_XML2_0_0_FILE;
995 
996         return GNC_BOOK_NOT_OURS;
997     }
998 
999     return GNC_BOOK_NOT_OURS;
1000 }
1001 
1002 void
sixtp_run_callback(sixtp_gdv2 * data,const char * type)1003 sixtp_run_callback (sixtp_gdv2* data, const char* type)
1004 {
1005     if (data->countCallback)
1006     {
1007         data->countCallback (data, type);
1008     }
1009 }
1010 
1011 /************************* END OF FILE *********************************/
1012