1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * Copyright (C) 2003 Imendio AB
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this program; if not, see <https://www.gnu.org/licenses>
17 */
18
19 #include <config.h>
20 #include <string.h>
21
22 #include <glib.h>
23
24 #include "lm-debug.h"
25 #include "lm-internals.h"
26 #include "lm-message-node.h"
27 #include "lm-parser.h"
28
29 #define SHORT_END_TAG "/>"
30 #define XML_MAX_DEPTH 5
31
32 #define LM_PARSER(o) ((LmParser *) o)
33
34 struct LmParser {
35 LmParserMessageFunction function;
36 gpointer user_data;
37 GDestroyNotify notify;
38
39 LmMessageNode *cur_root;
40 LmMessageNode *cur_node;
41
42 GMarkupParser *m_parser;
43 GMarkupParseContext *context;
44 gchar *incomplete; /* incomplete utf-8 character
45 found at the end of buffer */
46 };
47
48
49 /* Used while parsing */
50 static void parser_start_node_cb (GMarkupParseContext *context,
51 const gchar *node_name,
52 const gchar **attribute_names,
53 const gchar **attribute_values,
54 gpointer user_data,
55 GError **error);
56 static void parser_end_node_cb (GMarkupParseContext *context,
57 const gchar *node_name,
58 gpointer user_data,
59 GError **error);
60 static void parser_text_cb (GMarkupParseContext *context,
61 const gchar *text,
62 gsize text_len,
63 gpointer user_data,
64 GError **error);
65 static void parser_error_cb (GMarkupParseContext *context,
66 GError *error,
67 gpointer user_data);
68
69 static void
parser_start_node_cb(GMarkupParseContext * context,const gchar * node_name,const gchar ** attribute_names,const gchar ** attribute_values,gpointer user_data,GError ** error)70 parser_start_node_cb (GMarkupParseContext *context,
71 const gchar *node_name,
72 const gchar **attribute_names,
73 const gchar **attribute_values,
74 gpointer user_data,
75 GError **error)
76 {
77 LmParser *parser;
78 gint i;
79 const gchar *node_name_unq;
80 const gchar *xmlns = NULL;
81
82 parser = LM_PARSER (user_data);;
83
84
85 /* parser->cur_depth++; */
86
87 //strip namespace prefix other than "stream:" from node_name
88 node_name_unq = strrchr(node_name, ':');
89 if (!node_name_unq || !strncmp(node_name, "stream:", 7))
90 node_name_unq = node_name;
91 else
92 ++node_name_unq;
93
94 if (!parser->cur_root) {
95 /* New toplevel element */
96 parser->cur_root = _lm_message_node_new (node_name_unq);
97 parser->cur_node = parser->cur_root;
98 } else {
99 LmMessageNode *parent_node;
100
101 parent_node = parser->cur_node;
102
103 parser->cur_node = _lm_message_node_new (node_name_unq);
104 _lm_message_node_add_child_node (parent_node,
105 parser->cur_node);
106 }
107
108 for (i = 0; attribute_names[i]; ++i) {
109 g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_PARSER,
110 "ATTRIBUTE: %s = %s\n",
111 attribute_names[i],
112 attribute_values[i]);
113 //FIXME: strip namespace suffix from xmlns: attribute if exists
114
115 lm_message_node_set_attributes (parser->cur_node,
116 attribute_names[i],
117 attribute_values[i],
118 NULL);
119 if (!strncmp(attribute_names[i], "xmlns:", 6))
120 xmlns = attribute_values[i];
121 }
122 if (xmlns && !lm_message_node_get_attribute(parser->cur_node, "xmlns")) {
123 lm_message_node_set_attribute (parser->cur_node, "xmlns", xmlns);
124 g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_PARSER,
125 "ATTRIBUTE: %s = %s\n",
126 "xmlns", xmlns);
127 }
128
129 if (strcmp ("stream:stream", node_name) == 0) {
130 parser_end_node_cb (context,
131 "stream:stream",
132 user_data,
133 error);
134 }
135 }
136
137 static void
parser_end_node_cb(GMarkupParseContext * context,const gchar * node_name,gpointer user_data,GError ** error)138 parser_end_node_cb (GMarkupParseContext *context,
139 const gchar *node_name,
140 gpointer user_data,
141 GError **error)
142 {
143 LmParser *parser;
144 const gchar *node_name_unq;
145
146 parser = LM_PARSER (user_data);
147
148 node_name_unq = strrchr(node_name, ':');
149 if (!node_name_unq || !strncmp(node_name, "stream:", 7))
150 node_name_unq = node_name;
151 else
152 ++node_name_unq;
153
154 g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_PARSER,
155 "Trying to close node: %s\n", node_name_unq);
156
157 if (!parser->cur_node) {
158 /* FIXME: LM-1 should look at this */
159 return;
160 }
161
162 //cur_node->name doesn't have namespace prefix anymore, node_name does.
163 if (strcmp (parser->cur_node->name, node_name_unq) != 0) {
164 g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_PARSER,
165 "Trying to close node that isn't open: %s",
166 node_name_unq);
167 return;
168 }
169
170 if (parser->cur_node == parser->cur_root) {
171 LmMessage *m;
172
173 m = _lm_message_new_from_node (parser->cur_root);
174
175 if (!m) {
176 g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_PARSER,
177 "Couldn't create message: %s\n",
178 parser->cur_root->name);
179 } else {
180 g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_PARSER,
181 "Have a new message\n");
182 if (parser->function) {
183 (* parser->function) (parser, m, parser->user_data);
184 }
185 lm_message_unref (m);
186 }
187
188 lm_message_node_unref (parser->cur_root);
189 parser->cur_node = parser->cur_root = NULL;
190 } else {
191 LmMessageNode *tmp_node;
192 tmp_node = parser->cur_node;
193 parser->cur_node = parser->cur_node->parent;
194
195 lm_message_node_unref (tmp_node);
196 }
197 }
198
199 static void
parser_text_cb(GMarkupParseContext * context,const gchar * text,gsize text_len,gpointer user_data,GError ** error)200 parser_text_cb (GMarkupParseContext *context,
201 const gchar *text,
202 gsize text_len,
203 gpointer user_data,
204 GError **error)
205 {
206 LmParser *parser;
207
208 g_return_if_fail (user_data != NULL);
209
210 parser = LM_PARSER (user_data);
211
212 if (parser->cur_node && strcmp (text, "") != 0) {
213 lm_message_node_set_value (parser->cur_node, text);
214 }
215 }
216
217 static void
parser_error_cb(GMarkupParseContext * context,GError * error,gpointer user_data)218 parser_error_cb (GMarkupParseContext *context,
219 GError *error,
220 gpointer user_data)
221 {
222 g_return_if_fail (user_data != NULL);
223 g_return_if_fail (error != NULL);
224
225 g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_VERBOSE,
226 "Parsing failed: %s\n", error->message);
227 }
228
229 LmParser *
lm_parser_new(LmParserMessageFunction function,gpointer user_data,GDestroyNotify notify)230 lm_parser_new (LmParserMessageFunction function,
231 gpointer user_data,
232 GDestroyNotify notify)
233 {
234 LmParser *parser;
235
236 parser = g_new0 (LmParser, 1);
237 if (!parser) {
238 return NULL;
239 }
240
241 parser->m_parser = g_new0 (GMarkupParser, 1);
242 if (!parser->m_parser) {
243 g_free (parser);
244 return NULL;
245 }
246
247 parser->function = function;
248 parser->user_data = user_data;
249 parser->notify = notify;
250
251 parser->m_parser->start_element = parser_start_node_cb;
252 parser->m_parser->end_element = parser_end_node_cb;
253 parser->m_parser->text = parser_text_cb;
254 parser->m_parser->error = parser_error_cb;
255
256 parser->context = g_markup_parse_context_new (parser->m_parser, 0,
257 parser, NULL);
258
259 parser->cur_root = NULL;
260 parser->cur_node = NULL;
261
262 parser->incomplete = NULL;
263
264 return parser;
265 }
266
267 static gchar *
_lm_parser_make_valid(const gchar * buffer,gchar ** incomplete)268 _lm_parser_make_valid (const gchar *buffer, gchar **incomplete)
269 {
270 GString *string;
271 const gchar *remainder, *invalid;
272 gint remaining_bytes, valid_bytes;
273 gunichar code; /*error code for invalid character*/
274
275 g_return_val_if_fail (buffer != NULL, NULL);
276
277 string = NULL;
278 remainder = buffer;
279 remaining_bytes = strlen (buffer);
280
281 while (remaining_bytes != 0)
282 {
283 if (g_utf8_validate (remainder, remaining_bytes, &invalid))
284 break;
285 valid_bytes = invalid - remainder;
286
287 if (string == NULL)
288 string = g_string_sized_new (remaining_bytes);
289
290 g_string_append_len (string, remainder, valid_bytes);
291
292 remainder = g_utf8_find_next_char(invalid, NULL);
293 remaining_bytes -= valid_bytes + (remainder - invalid);
294
295 code = g_utf8_get_char_validated (invalid, -1);
296
297 if (code == -1) {
298 /* A complete but invalid codepoint */
299 /* append U+FFFD REPLACEMENT CHARACTER */
300 g_string_append (string, "\357\277\275");
301 g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_VERBOSE, "invalid character!\n");
302 } else if (code == -2) {
303 /* Beginning of what could be a character */
304 *incomplete = g_strdup (invalid);
305 g_log (LM_LOG_DOMAIN, LM_LOG_LEVEL_VERBOSE,
306 "incomplete character: %s\n", *incomplete);
307
308 g_assert (remaining_bytes == 0);
309 g_assert (*(g_utf8_find_next_char(invalid, NULL)) == '\0');
310 }
311 }
312
313 if (string == NULL)
314 return g_strdup (buffer);
315
316 g_string_append (string, remainder);
317
318 g_assert (g_utf8_validate (string->str, -1, NULL));
319
320 return g_string_free (string, FALSE);
321 }
322
323
324 gboolean
lm_parser_parse(LmParser * parser,const gchar * string)325 lm_parser_parse (LmParser *parser, const gchar *string)
326 {
327 gboolean parsed;
328 gchar *valid, *completed;
329 g_return_val_if_fail (parser != NULL, FALSE);
330
331 if (!parser->context) {
332 parser->context = g_markup_parse_context_new (parser->m_parser, 0,
333 parser, NULL);
334 }
335 if (parser->incomplete) {
336 completed = g_strdup_printf("%s%s", parser->incomplete, string);
337 g_free(parser->incomplete);
338 parser->incomplete = NULL;
339 } else {
340 completed = g_strdup(string);
341 }
342 valid = _lm_parser_make_valid (completed, &parser->incomplete);
343 g_free(completed);
344 if (g_markup_parse_context_parse (parser->context, valid,
345 (gssize)strlen (valid), NULL)) {
346 parsed = TRUE;
347 } else {
348 g_markup_parse_context_free (parser->context);
349 parser->context = NULL;
350 parsed = FALSE;
351 }
352 g_free(valid);
353 return parsed;
354 }
355
356 void
lm_parser_free(LmParser * parser)357 lm_parser_free (LmParser *parser)
358 {
359 if (parser->notify) {
360 (* parser->notify) (parser->user_data);
361 }
362
363 if (parser->context) {
364 g_markup_parse_context_free (parser->context);
365 }
366 g_free (parser->incomplete);
367 g_free (parser->m_parser);
368 g_free (parser);
369 }
370
371