1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * Copyright (C) 2003 Imendio AB
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public License as
7  * published by the Free Software Foundation; either version 2 of the
8  * License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this program; if not, see <https://www.gnu.org/licenses>
17  */
18 
19 #include <config.h>
20 #include <string.h>
21 
22 #include <glib.h>
23 
24 #include "lm-debug.h"
25 #include "lm-internals.h"
26 #include "lm-message-node.h"
27 #include "lm-parser.h"
28 
29 #define SHORT_END_TAG "/>"
30 #define XML_MAX_DEPTH 5
31 
32 #define LM_PARSER(o) ((LmParser *) o)
33 
34 struct LmParser {
35     LmParserMessageFunction  function;
36     gpointer                 user_data;
37     GDestroyNotify           notify;
38 
39     LmMessageNode           *cur_root;
40     LmMessageNode           *cur_node;
41 
42     GMarkupParser           *m_parser;
43     GMarkupParseContext     *context;
44     gchar                   *incomplete; /* incomplete utf-8 character
45                                             found at the end of buffer */
46 };
47 
48 
49 /* Used while parsing */
50 static void    parser_start_node_cb (GMarkupParseContext  *context,
51                                      const gchar          *node_name,
52                                      const gchar         **attribute_names,
53                                      const gchar         **attribute_values,
54                                      gpointer              user_data,
55                                      GError              **error);
56 static void    parser_end_node_cb   (GMarkupParseContext  *context,
57                                      const gchar          *node_name,
58                                      gpointer              user_data,
59                                      GError              **error);
60 static void    parser_text_cb       (GMarkupParseContext  *context,
61                                      const gchar          *text,
62                                      gsize                 text_len,
63                                      gpointer              user_data,
64                                      GError              **error);
65 static void    parser_error_cb      (GMarkupParseContext  *context,
66                                      GError               *error,
67                                      gpointer              user_data);
68 
69 static void
parser_start_node_cb(GMarkupParseContext * context,const gchar * node_name,const gchar ** attribute_names,const gchar ** attribute_values,gpointer user_data,GError ** error)70 parser_start_node_cb (GMarkupParseContext  *context,
71                       const gchar          *node_name,
72                       const gchar         **attribute_names,
73                       const gchar         **attribute_values,
74                       gpointer              user_data,
75                       GError              **error)
76 {
77     LmParser     *parser;
78     gint          i;
79     const gchar  *node_name_unq;
80     const gchar  *xmlns = NULL;
81 
82     parser = LM_PARSER (user_data);;
83 
84 
85 /*  parser->cur_depth++; */
86 
87     //strip namespace prefix other than "stream:" from node_name
88     node_name_unq = strrchr(node_name, ':');
89     if (!node_name_unq || !strncmp(node_name, "stream:", 7))
90         node_name_unq = node_name;
91     else
92         ++node_name_unq;
93 
94     if (!parser->cur_root) {
95         /* New toplevel element */
96         parser->cur_root = _lm_message_node_new (node_name_unq);
97         parser->cur_node = parser->cur_root;
98     } else {
99         LmMessageNode *parent_node;
100 
101         parent_node = parser->cur_node;
102 
103         parser->cur_node = _lm_message_node_new (node_name_unq);
104         _lm_message_node_add_child_node (parent_node,
105                                          parser->cur_node);
106     }
107 
108     for (i = 0; attribute_names[i]; ++i) {
109         g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_PARSER,
110                "ATTRIBUTE: %s = %s\n",
111                attribute_names[i],
112                attribute_values[i]);
113         //FIXME: strip namespace suffix from xmlns: attribute if exists
114 
115         lm_message_node_set_attributes (parser->cur_node,
116                                         attribute_names[i],
117                                         attribute_values[i],
118                                         NULL);
119         if (!strncmp(attribute_names[i], "xmlns:", 6))
120             xmlns = attribute_values[i];
121     }
122     if (xmlns && !lm_message_node_get_attribute(parser->cur_node, "xmlns")) {
123         lm_message_node_set_attribute (parser->cur_node, "xmlns", xmlns);
124         g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_PARSER,
125                "ATTRIBUTE: %s = %s\n",
126                "xmlns", xmlns);
127     }
128 
129     if (strcmp ("stream:stream", node_name) == 0) {
130         parser_end_node_cb (context,
131                             "stream:stream",
132                             user_data,
133                             error);
134     }
135 }
136 
137 static void
parser_end_node_cb(GMarkupParseContext * context,const gchar * node_name,gpointer user_data,GError ** error)138 parser_end_node_cb (GMarkupParseContext  *context,
139                     const gchar          *node_name,
140                     gpointer              user_data,
141                     GError              **error)
142 {
143     LmParser     *parser;
144     const gchar  *node_name_unq;
145 
146     parser = LM_PARSER (user_data);
147 
148     node_name_unq = strrchr(node_name, ':');
149     if (!node_name_unq || !strncmp(node_name, "stream:", 7))
150         node_name_unq = node_name;
151     else
152         ++node_name_unq;
153 
154     g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_PARSER,
155            "Trying to close node: %s\n", node_name_unq);
156 
157     if (!parser->cur_node) {
158         /* FIXME: LM-1 should look at this */
159         return;
160     }
161 
162     //cur_node->name doesn't have namespace prefix anymore, node_name does.
163     if (strcmp (parser->cur_node->name, node_name_unq) != 0) {
164         g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_PARSER,
165                "Trying to close node that isn't open: %s",
166                node_name_unq);
167         return;
168     }
169 
170     if (parser->cur_node == parser->cur_root) {
171         LmMessage *m;
172 
173         m = _lm_message_new_from_node (parser->cur_root);
174 
175         if (!m) {
176             g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_PARSER,
177                    "Couldn't create message: %s\n",
178                    parser->cur_root->name);
179         } else {
180             g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_PARSER,
181                "Have a new message\n");
182             if (parser->function) {
183                 (* parser->function) (parser, m, parser->user_data);
184             }
185             lm_message_unref (m);
186         }
187 
188         lm_message_node_unref (parser->cur_root);
189         parser->cur_node = parser->cur_root = NULL;
190     } else {
191         LmMessageNode *tmp_node;
192         tmp_node = parser->cur_node;
193         parser->cur_node = parser->cur_node->parent;
194 
195         lm_message_node_unref (tmp_node);
196     }
197 }
198 
199 static void
parser_text_cb(GMarkupParseContext * context,const gchar * text,gsize text_len,gpointer user_data,GError ** error)200 parser_text_cb (GMarkupParseContext   *context,
201                 const gchar           *text,
202                 gsize                  text_len,
203                 gpointer               user_data,
204                 GError               **error)
205 {
206     LmParser *parser;
207 
208     g_return_if_fail (user_data != NULL);
209 
210     parser = LM_PARSER (user_data);
211 
212     if (parser->cur_node && strcmp (text, "") != 0) {
213         lm_message_node_set_value (parser->cur_node, text);
214     }
215 }
216 
217 static void
parser_error_cb(GMarkupParseContext * context,GError * error,gpointer user_data)218 parser_error_cb (GMarkupParseContext *context,
219                  GError              *error,
220                  gpointer             user_data)
221 {
222     g_return_if_fail (user_data != NULL);
223     g_return_if_fail (error != NULL);
224 
225     g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_VERBOSE,
226            "Parsing failed: %s\n", error->message);
227 }
228 
229 LmParser *
lm_parser_new(LmParserMessageFunction function,gpointer user_data,GDestroyNotify notify)230 lm_parser_new (LmParserMessageFunction function,
231                gpointer                user_data,
232                GDestroyNotify          notify)
233 {
234     LmParser *parser;
235 
236     parser = g_new0 (LmParser, 1);
237     if (!parser) {
238         return NULL;
239     }
240 
241     parser->m_parser = g_new0 (GMarkupParser, 1);
242     if (!parser->m_parser) {
243         g_free (parser);
244         return NULL;
245     }
246 
247     parser->function  = function;
248     parser->user_data = user_data;
249     parser->notify    = notify;
250 
251     parser->m_parser->start_element = parser_start_node_cb;
252     parser->m_parser->end_element   = parser_end_node_cb;
253     parser->m_parser->text          = parser_text_cb;
254     parser->m_parser->error         = parser_error_cb;
255 
256     parser->context = g_markup_parse_context_new (parser->m_parser, 0,
257                                                   parser, NULL);
258 
259     parser->cur_root = NULL;
260     parser->cur_node = NULL;
261 
262     parser->incomplete = NULL;
263 
264     return parser;
265 }
266 
267 static gchar *
_lm_parser_make_valid(const gchar * buffer,gchar ** incomplete)268 _lm_parser_make_valid (const gchar *buffer, gchar **incomplete)
269 {
270     GString *string;
271     const gchar *remainder, *invalid;
272     gint remaining_bytes, valid_bytes;
273     gunichar code; /*error code for invalid character*/
274 
275     g_return_val_if_fail (buffer != NULL, NULL);
276 
277     string = NULL;
278     remainder = buffer;
279     remaining_bytes = strlen (buffer);
280 
281     while (remaining_bytes != 0)
282     {
283         if (g_utf8_validate (remainder, remaining_bytes, &invalid))
284             break;
285         valid_bytes = invalid - remainder;
286 
287         if (string == NULL)
288             string = g_string_sized_new (remaining_bytes);
289 
290         g_string_append_len (string, remainder, valid_bytes);
291 
292         remainder = g_utf8_find_next_char(invalid, NULL);
293         remaining_bytes -= valid_bytes + (remainder - invalid);
294 
295         code = g_utf8_get_char_validated (invalid, -1);
296 
297         if (code == -1) {
298             /* A complete but invalid codepoint */
299             /* append U+FFFD REPLACEMENT CHARACTER */
300             g_string_append (string, "\357\277\275");
301             g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_VERBOSE, "invalid character!\n");
302         } else if (code == -2) {
303             /* Beginning of what could be a character */
304             *incomplete = g_strdup (invalid);
305             g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_VERBOSE,
306                            "incomplete character: %s\n", *incomplete);
307 
308             g_assert (remaining_bytes == 0);
309             g_assert (*(g_utf8_find_next_char(invalid, NULL)) == '\0');
310         }
311     }
312 
313     if (string == NULL)
314         return g_strdup (buffer);
315 
316     g_string_append (string, remainder);
317 
318     g_assert (g_utf8_validate (string->str, -1, NULL));
319 
320     return g_string_free (string, FALSE);
321 }
322 
323 
324 gboolean
lm_parser_parse(LmParser * parser,const gchar * string)325 lm_parser_parse (LmParser *parser, const gchar *string)
326 {
327     gboolean parsed;
328     gchar *valid, *completed;
329     g_return_val_if_fail (parser != NULL, FALSE);
330 
331     if (!parser->context) {
332         parser->context = g_markup_parse_context_new (parser->m_parser, 0,
333                                                       parser, NULL);
334     }
335     if (parser->incomplete) {
336         completed = g_strdup_printf("%s%s", parser->incomplete, string);
337         g_free(parser->incomplete);
338         parser->incomplete = NULL;
339     } else {
340         completed = g_strdup(string);
341     }
342     valid = _lm_parser_make_valid (completed, &parser->incomplete);
343     g_free(completed);
344     if (g_markup_parse_context_parse (parser->context, valid,
345                                       (gssize)strlen (valid), NULL)) {
346         parsed = TRUE;
347     } else {
348         g_markup_parse_context_free (parser->context);
349         parser->context = NULL;
350         parsed = FALSE;
351     }
352     g_free(valid);
353     return parsed;
354 }
355 
356 void
lm_parser_free(LmParser * parser)357 lm_parser_free (LmParser *parser)
358 {
359     if (parser->notify) {
360         (* parser->notify) (parser->user_data);
361     }
362 
363     if (parser->context) {
364         g_markup_parse_context_free (parser->context);
365     }
366     g_free (parser->incomplete);
367     g_free (parser->m_parser);
368     g_free (parser);
369 }
370 
371