1 /*
2 * Copyright (C) 2002-2021 ProcessOne, SARL. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 #include <erl_nif.h>
19 #include <string.h>
20 #include <stdio.h>
21 #include <expat.h>
22
23 #define PARSING_NOT_RESUMABLE XML_FALSE
24
25 #define ASSERT(x) if (!(x)) return 0
26 #define PARSER_ASSERT(X, E) do { if (!(X)) { state->error = (E); XML_StopParser(state->parser, PARSING_NOT_RESUMABLE); return; } } while(0)
27 #define PARSER_MEM_ASSERT(x) PARSER_ASSERT((x), "enomem")
28
29 typedef struct children_list_t {
30 union {
31 ERL_NIF_TERM term;
32 ErlNifBinary cdata;
33 };
34 struct children_list_t *next;
35 char is_cdata;
36 } children_list_t;
37
38 typedef struct attrs_list_t {
39 ErlNifBinary name;
40 ErlNifBinary value;
41 struct attrs_list_t *next;
42 } attrs_list_t;
43
44 typedef struct xmlel_stack_t {
45 ERL_NIF_TERM name;
46 ERL_NIF_TERM attrs;
47 children_list_t *children;
48 struct xmlel_stack_t *next;
49 char *namespace_str;
50 int redefined_top_prefix;
51 } xmlel_stack_t;
52
53
54 typedef struct {
55 ErlNifEnv *env;
56 ErlNifEnv *send_env;
57 ErlNifPid *pid;
58 size_t depth;
59 size_t size;
60 size_t max_size;
61 XML_Parser parser;
62 xmlel_stack_t *elements_stack;
63 attrs_list_t *xmlns_attrs;
64 attrs_list_t *top_xmlns_attrs;
65 const char *error;
66 char normalize_ns:1;
67 char gen_server:1;
68 char use_maps:1;
69 } state_t;
70
71 typedef enum xmlns_op {
72 OP_ERROR = 0,
73 OP_REMOVE_PREFIX,
74 OP_REMOVE_XMLNS,
75 OP_REPLACE_XMLNS,
76 OP_NOP
77 } xmlns_op;
78
79 static XML_Memory_Handling_Suite ms = {
80 .malloc_fcn = enif_alloc,
81 .realloc_fcn = enif_realloc,
82 .free_fcn = enif_free
83 };
84
85 static ErlNifResourceType *parser_state_t = NULL;
86
87 #define FAKE_BIN(STR) { sizeof(STR)-1, (unsigned char*)STR }
88
89 static attrs_list_t stream_stream_ns_attr = {
90 FAKE_BIN("stream:stream"),
91 FAKE_BIN("http://etherx.jabber.org/streams")
92 };
93
same_str_buf(const char * str,const char * buf,size_t buf_len)94 static int same_str_buf(const char *str, const char *buf, size_t buf_len)
95 {
96 if (strlen(str) != buf_len)
97 return 0;
98 if (!buf_len)
99 return 1;
100 return memcmp(str, buf, buf_len) == 0;
101 }
102
dup_buf(const char * buf,size_t buf_len)103 static char *dup_buf(const char *buf, size_t buf_len)
104 {
105 char *res = enif_alloc(buf_len+1);
106 if (!res)
107 return NULL;
108
109 if (buf_len)
110 memcpy(res, buf, buf_len);
111 res[buf_len] = '\0';
112
113 return res;
114 }
115
dup_to_bin(ErlNifBinary * bin,const char * buf,size_t buf_len)116 static int dup_to_bin(ErlNifBinary *bin, const char *buf, size_t buf_len)
117 {
118 if (!enif_alloc_binary(buf_len, bin))
119 return 0;
120
121 memcpy(bin->data, buf, buf_len);
122
123 return 1;
124 }
125
dup_to_term(ErlNifEnv * env,const char * buf,size_t buf_len)126 static ERL_NIF_TERM dup_to_term(ErlNifEnv *env, const char *buf, size_t buf_len)
127 {
128 ERL_NIF_TERM term;
129
130 unsigned char *str = enif_make_new_binary(env, buf_len, &term);
131 memcpy(str, buf, buf_len);
132
133 return term;
134 }
135
has_prefix_ns_from_list(attrs_list_t * list,const char * pfx,size_t pfx_len,const char * ns,size_t ns_len)136 static int has_prefix_ns_from_list(attrs_list_t*list, const char *pfx, size_t pfx_len,
137 const char *ns, size_t ns_len)
138 {
139 while (pfx_len && list) {
140 if ((pfx == NULL ||
141 (list->name.size == pfx_len && memcmp(list->name.data, pfx, pfx_len) == 0)) &&
142 (ns == NULL ||
143 (list->value.size == ns_len && memcmp(list->value.data, ns, ns_len) == 0)))
144 {
145 return 1;
146 }
147 list = list->next;
148 }
149 return 0;
150 }
151
has_prefix_ns_from_top(state_t * state,const char * pfx,size_t pfx_len,const char * ns,size_t ns_len)152 static int has_prefix_ns_from_top(state_t *state, const char *pfx, size_t pfx_len,
153 const char *ns, size_t ns_len)
154 {
155 if (state->elements_stack->redefined_top_prefix || !pfx_len)
156 return 0;
157
158 return has_prefix_ns_from_list(state->top_xmlns_attrs, pfx, pfx_len, ns, ns_len);
159 }
160
encode_name(state_t * state,const char * xml_name,ErlNifBinary * buf,char ** ns_str,char ** pfx_str,int top_element)161 static xmlns_op encode_name(state_t *state, const char *xml_name, ErlNifBinary *buf,
162 char **ns_str, char **pfx_str, int top_element)
163 {
164 const char *parts[3];
165 int i, idx = 0;
166
167 for (i = 0; ; i++) {
168 if (!xml_name[i] || xml_name[i] == '\n') {
169 parts[idx++] = xml_name + i;
170 if (!xml_name[i])
171 break;
172 }
173 if (idx >= 3)
174 return OP_ERROR;
175 }
176 const char *ns = NULL, *name = NULL, *prefix = NULL;
177 size_t ns_len = 0, name_len = 0, prefix_len = 0;
178
179 if (idx == 1) {
180 name = xml_name;
181 name_len = parts[0] - xml_name;
182 } else {
183 ns = xml_name;
184 ns_len = parts[0] - xml_name;
185 name = parts[0] + 1;
186 name_len = parts[1] - parts[0] - 1;
187 if (idx == 3) {
188 prefix = parts[1] + 1;
189 prefix_len = parts[2] - parts[1] - 1;
190 }
191 }
192
193 int with_prefix = prefix_len && (top_element || !ns_str);
194 xmlns_op res = OP_REPLACE_XMLNS;
195
196 if (state->normalize_ns && !top_element) {
197 if (ns_str) {
198 if (!state->elements_stack->redefined_top_prefix && prefix_len &&
199 has_prefix_ns_from_top(state, prefix, prefix_len, ns, ns_len))
200 {
201 res = OP_REMOVE_PREFIX;
202 with_prefix = 1;
203 } else if (same_str_buf(state->elements_stack->namespace_str, ns, ns_len)) {
204 res = OP_REMOVE_XMLNS;
205 with_prefix = 0;
206 }
207 }
208 } else
209 res = OP_NOP;
210
211 if (with_prefix) {
212 ASSERT(enif_alloc_binary(name_len + prefix_len + 1, buf));
213 memcpy(buf->data, prefix, prefix_len);
214 buf->data[prefix_len] = ':';
215 memcpy(buf->data + prefix_len + 1, name, name_len);
216 } else {
217 ASSERT(dup_to_bin(buf, name, name_len));
218 }
219
220 if (ns_str) {
221 if (top_element && prefix_len > 0)
222 *ns_str = NULL;
223 else {
224 *ns_str = top_element ? dup_buf(ns, ns_len) :
225 res == OP_REMOVE_PREFIX ?
226 state->elements_stack->namespace_str :
227 dup_buf(ns, ns_len);
228
229 if (!*ns_str) {
230 enif_release_binary(buf);
231 return OP_ERROR;
232 }
233 }
234 if (pfx_str) {
235 if (res == OP_REMOVE_PREFIX) {
236 *pfx_str = dup_buf(prefix, prefix_len);
237 if (!*pfx_str) {
238 enif_release_binary(buf);
239 if (ns_str && *ns_str)
240 enif_free(*ns_str);
241 return OP_ERROR;
242 }
243 } else
244 *pfx_str = NULL;
245 }
246 }
247
248 return res;
249 }
250
str2bin(ErlNifEnv * env,const char * s)251 static ERL_NIF_TERM str2bin(ErlNifEnv *env, const char *s)
252 {
253 return dup_to_term(env, s, strlen(s));
254 }
255
send_event(state_t * state,ERL_NIF_TERM el)256 static void send_event(state_t *state, ERL_NIF_TERM el)
257 {
258 state->size = 0;
259 if (state->gen_server) {
260 enif_send(state->env, state->pid, state->send_env,
261 enif_make_tuple2(state->send_env,
262 enif_make_atom(state->send_env, "$gen_event"),
263 el));
264 } else {
265 enif_send(state->env, state->pid, state->send_env, el);
266 }
267 enif_clear_env(state->send_env);
268 }
269
send_all_state_event(state_t * state,ERL_NIF_TERM el)270 static void send_all_state_event(state_t *state, ERL_NIF_TERM el)
271 {
272 state->size = 0;
273 if (state->gen_server) {
274 enif_send(state->env, state->pid, state->send_env,
275 enif_make_tuple2(state->send_env,
276 enif_make_atom(state->send_env, "$gen_all_state_event"),
277 el));
278 } else {
279 enif_send(state->env, state->pid, state->send_env, el);
280 }
281 enif_clear_env(state->send_env);
282 }
283
append_attr(state_t * state,ERL_NIF_TERM root,ERL_NIF_TERM name,ERL_NIF_TERM value)284 static ERL_NIF_TERM append_attr(state_t *state, ERL_NIF_TERM root, ERL_NIF_TERM name, ERL_NIF_TERM value) {
285 ErlNifEnv* env = state->send_env;
286
287 if (state->use_maps) {
288 ERL_NIF_TERM res;
289 enif_make_map_put(env, root, name, value, &res);
290 return res;
291 } else {
292 return enif_make_list_cell(env, enif_make_tuple2(env, name, value), root);
293 }
294 }
295
erlXML_StartElementHandler(state_t * state,const XML_Char * name,const XML_Char ** atts)296 void erlXML_StartElementHandler(state_t *state,
297 const XML_Char *name,
298 const XML_Char **atts)
299 {
300 int i = 0;
301 ErlNifEnv* env = state->send_env;
302 ERL_NIF_TERM attrs_term;
303 ErlNifBinary name_bin;
304
305 if (state->use_maps) {
306 attrs_term = enif_make_new_map(env);
307 } else {
308 attrs_term = enif_make_list(env, 0);
309 }
310
311 if (state->error)
312 return;
313
314 state->depth++;
315
316 while (atts[i])
317 i += 2;
318
319 i -= 2;
320
321 while (i >= 0) {
322 ErlNifBinary attr_name;
323 ERL_NIF_TERM val;
324 unsigned char *val_str;
325
326 PARSER_MEM_ASSERT(encode_name(state, atts[i], &attr_name, NULL, NULL, 0));
327
328 size_t val_len = strlen(atts[i+1]);
329 val_str = enif_make_new_binary(env, val_len, &val);
330 PARSER_MEM_ASSERT(val_str);
331 memcpy(val_str, atts[i+1], val_len);
332
333 attrs_term = append_attr(state, attrs_term, enif_make_binary(env, &attr_name), val);
334 i -= 2;
335 }
336
337 char *ns = NULL, *pfx = NULL;
338 int redefined_top_prefix = state->depth > 1 ? state->elements_stack->redefined_top_prefix : 0;
339 int xmlns_op;
340
341 if (state->normalize_ns)
342 xmlns_op = encode_name(state, name, &name_bin, &ns, &pfx, state->depth == 1);
343 else
344 xmlns_op = encode_name(state, name, &name_bin, NULL, NULL, state->depth == 1);
345
346 PARSER_MEM_ASSERT(xmlns_op);
347
348 if (!state->normalize_ns)
349 xmlns_op = OP_NOP;
350
351 int non_xmpp_ns = -1;
352 int had_stream_stream = 0;
353
354 while (state->xmlns_attrs) {
355 ERL_NIF_TERM tuple = 0;
356 ERL_NIF_TERM tuple_name = 0, tuple_val = 0;
357 attrs_list_t *c = state->xmlns_attrs;
358 ErlNifBinary new_prefix, new_ns;
359
360 state->xmlns_attrs = c->next;
361
362 if (state->depth == 1 && state->normalize_ns && c->name.size > 6) {
363 if (non_xmpp_ns != 1 || !has_prefix_ns_from_list(&stream_stream_ns_attr,
364 (char*)c->name.data+6, c->name.size-6,
365 (char*)c->value.data, c->value.size))
366 {
367 if (had_stream_stream) {
368 PARSER_MEM_ASSERT(dup_to_bin(&new_prefix, (char*)stream_stream_ns_attr.name.data,
369 stream_stream_ns_attr.name.size));
370 PARSER_MEM_ASSERT(dup_to_bin(&new_ns, (char*)stream_stream_ns_attr.value.data,
371 stream_stream_ns_attr.value.size));
372 c->name = new_prefix;
373 c->value = new_ns;
374 c->next = state->top_xmlns_attrs;
375 state->top_xmlns_attrs = c;
376 had_stream_stream = 0;
377 }
378 non_xmpp_ns = 1;
379 PARSER_MEM_ASSERT(dup_to_bin(&new_prefix, (char*)c->name.data+6, c->name.size-6));
380 PARSER_MEM_ASSERT(dup_to_bin(&new_ns, (char*)c->value.data, c->value.size));
381 } else {
382 had_stream_stream = 1;
383 non_xmpp_ns = 0;
384 }
385 }
386
387 if (c->name.size == 5) { // xmlns
388 if (xmlns_op == OP_REMOVE_XMLNS) {
389 enif_release_binary(&c->name);
390 enif_release_binary(&c->value);
391 enif_free(c);
392 continue;
393 } else if (xmlns_op == OP_REPLACE_XMLNS) {
394 enif_release_binary(&c->value);
395 if (state->use_maps) {
396 tuple_name = enif_make_binary(env, &c->name);
397 tuple_val = dup_to_term(env, ns, strlen(ns));
398 } else {
399 tuple = enif_make_tuple2(env, enif_make_binary(env, &c->name),
400 dup_to_term(env, ns, strlen(ns)));
401 }
402 xmlns_op = OP_NOP;
403 }
404 if (!ns && state->normalize_ns)
405 PARSER_MEM_ASSERT(ns = dup_buf((char *) c->value.data, c->value.size));
406 } else if (xmlns_op == OP_REMOVE_PREFIX &&
407 same_str_buf(pfx, (char*)c->name.data + 6, c->name.size - 6)) {
408 enif_release_binary(&c->name);
409 enif_release_binary(&c->value);
410 enif_free(c);
411 continue;
412 } else if (!redefined_top_prefix && state->depth > 1 && c->name.size > 6 &&
413 has_prefix_ns_from_top(state, (char*)c->name.data + 6, c->name.size - 6, NULL, 0)) {
414 redefined_top_prefix = 1;
415 }
416
417 if (state->use_maps) {
418 if (!tuple_name) {
419 enif_make_map_update(env, attrs_term, enif_make_binary(env, &c->name),
420 enif_make_binary(env, &c->value), &attrs_term);
421 } else
422 enif_make_map_update(env, attrs_term, tuple_name, tuple_val, &attrs_term);
423 } else {
424 if (!tuple) {
425 tuple = enif_make_tuple2(env, enif_make_binary(env, &c->name),
426 enif_make_binary(env, &c->value));
427 }
428 attrs_term = enif_make_list_cell(env, tuple, attrs_term);
429 }
430
431 if (non_xmpp_ns && state->depth == 1 && state->normalize_ns && c->name.size > 6) {
432 c->name = new_prefix;
433 c->value = new_ns;
434 c->next = state->top_xmlns_attrs;
435 state->top_xmlns_attrs = c;
436 } else
437 enif_free(c);
438 }
439
440 if (!non_xmpp_ns && state->depth == 1 && state->normalize_ns) {
441 state->top_xmlns_attrs = &stream_stream_ns_attr;
442 }
443
444 if (xmlns_op == OP_REPLACE_XMLNS) {
445 attrs_term = append_attr(state, attrs_term, dup_to_term(env, "xmlns", 5),
446 dup_to_term(env, ns, strlen(ns)));
447 } else if (xmlns_op == OP_REMOVE_PREFIX) {
448 enif_free(pfx);
449 }
450
451 if (!ns && state->normalize_ns)
452 PARSER_MEM_ASSERT(ns = dup_buf("", 0));
453
454 xmlel_stack_t *xmlel = enif_alloc(sizeof(xmlel_stack_t));
455 PARSER_MEM_ASSERT(xmlel);
456
457 xmlel->next = state->elements_stack;
458 xmlel->attrs = attrs_term;
459 xmlel->namespace_str = ns;
460 xmlel->children = NULL;
461 xmlel->redefined_top_prefix = redefined_top_prefix;
462
463 state->elements_stack = xmlel;
464
465 if (state->pid && state->depth == 1) {
466 if (state->use_maps) {
467 ERL_NIF_TERM map = enif_make_new_map(env);
468 enif_make_map_put(env, map, enif_make_atom(env, "__struct__"),
469 enif_make_atom(env, "Elixir.FastXML.StreamStart"), &map);
470 enif_make_map_put(env, map, enif_make_atom(env, "name"),
471 enif_make_binary(env, &name_bin), &map);
472 enif_make_map_put(env, map, enif_make_atom(env, "attrs"),
473 attrs_term, &map);
474 send_event(state, map);
475 } else {
476 send_event(state,
477 enif_make_tuple3(env,
478 enif_make_atom(env, "xmlstreamstart"),
479 enif_make_binary(env, &name_bin),
480 attrs_term));
481 }
482 } else {
483 xmlel->name = enif_make_binary(env, &name_bin);
484 }
485 }
486
erlXML_CharacterDataHandler(state_t * state,const XML_Char * s,int len)487 void erlXML_CharacterDataHandler(state_t *state, const XML_Char *s, int len)
488 {
489 ErlNifEnv *env = state->send_env;
490
491 if (state->error)
492 return;
493
494 if (state->depth == 0)
495 return;
496
497 if (state->pid && state->depth == 1) {
498 ErlNifBinary cdata;
499 PARSER_MEM_ASSERT(enif_alloc_binary(len, &cdata));
500 memcpy(cdata.data, s, len);
501 send_all_state_event(state,
502 enif_make_tuple2(env,
503 enif_make_atom(env, "xmlstreamcdata"),
504 enif_make_binary(env, &cdata)));
505 return;
506 }
507
508 children_list_t *children = state->elements_stack->children;
509
510 if (children && children->is_cdata) {
511 int old_size = children->cdata.size;
512 PARSER_MEM_ASSERT(enif_realloc_binary(&children->cdata, old_size + len));
513 memcpy(children->cdata.data+old_size, s, len);
514 } else {
515 children = enif_alloc(sizeof(children_list_t));
516 PARSER_MEM_ASSERT(children);
517 if (!enif_alloc_binary(len, &children->cdata)) {
518 enif_free(children);
519 PARSER_MEM_ASSERT(0);
520 }
521 children->is_cdata = 1;
522 memcpy(children->cdata.data, s, len);
523 children->next = state->elements_stack->children;
524 state->elements_stack->children = children;
525 }
526
527 return;
528 }
529
530 ERL_NIF_TERM
make_xmlel_children_list(state_t * state,children_list_t * list)531 make_xmlel_children_list(state_t *state, children_list_t *list) {
532 ErlNifEnv *env = state->send_env;
533 ERL_NIF_TERM children_list = enif_make_list(env, 0);
534
535 while (list) {
536 if (list->is_cdata) {
537 ERL_NIF_TERM data;
538 if (state->use_maps) {
539 data = enif_make_binary(env, &list->cdata);
540 } else {
541 data = enif_make_tuple2(env,
542 enif_make_atom(env, "xmlcdata"),
543 enif_make_binary(env, &list->cdata));
544 }
545 children_list = enif_make_list_cell(env, data, children_list);
546 } else
547 children_list = enif_make_list_cell(env, list->term, children_list);
548
549 children_list_t *old_head = list;
550 list = list->next;
551
552 enif_free(old_head);
553 }
554
555 return children_list;
556 }
557
erlXML_EndElementHandler(state_t * state,const XML_Char * name)558 void erlXML_EndElementHandler(state_t *state, const XML_Char *name)
559 {
560 ErlNifEnv *env = state->send_env;
561
562 if (state->error)
563 return;
564
565 state->depth--;
566
567 if (state->pid && state->depth == 0) {
568 ErlNifBinary name_bin;
569
570 PARSER_MEM_ASSERT(encode_name(state, name, &name_bin, NULL, NULL, 0));
571
572 if (state->use_maps) {
573 ERL_NIF_TERM map = enif_make_new_map(env);
574 enif_make_map_put(env, map, enif_make_atom(env, "__struct__"),
575 enif_make_atom(env, "Elixir.FastXML.StreamEnd"), &map);
576 enif_make_map_put(env, map, enif_make_atom(env, "name"),
577 enif_make_binary(env, &name_bin), &map);
578 send_event(state, map);
579 } else {
580 send_event(state,
581 enif_make_tuple2(env,
582 enif_make_atom(env, "xmlstreamend"),
583 enif_make_binary(env, &name_bin)));
584 }
585 return;
586 }
587
588 ERL_NIF_TERM xmlel_term;
589
590 if (state->use_maps) {
591 xmlel_term = enif_make_new_map(env);
592 enif_make_map_put(env, xmlel_term, enif_make_atom(env, "__struct__"),
593 enif_make_atom(env, "Elixir.FastXML.El"), &xmlel_term);
594 enif_make_map_put(env, xmlel_term, enif_make_atom(env, "name"), state->elements_stack->name, &xmlel_term);
595 enif_make_map_put(env, xmlel_term, enif_make_atom(env, "attrs"), state->elements_stack->attrs, &xmlel_term);
596 enif_make_map_put(env, xmlel_term, enif_make_atom(env, "children"),
597 make_xmlel_children_list(state, state->elements_stack->children), &xmlel_term);
598 } else {
599 xmlel_term = enif_make_tuple4(env, enif_make_atom(env, "xmlel"),
600 state->elements_stack->name,
601 state->elements_stack->attrs,
602 make_xmlel_children_list(state, state->elements_stack->children));
603 }
604
605 if (!state->pid || state->depth > 1) {
606 children_list_t *el;
607 xmlel_stack_t *cur_el = state->elements_stack;
608
609 PARSER_MEM_ASSERT(el = enif_alloc(sizeof(children_list_t)));
610
611 state->elements_stack = state->elements_stack->next;
612
613 el->is_cdata = 0;
614 el->term = xmlel_term;
615 el->next = state->elements_stack->children;
616 state->elements_stack->children = el;
617 if (cur_el->namespace_str != state->elements_stack->namespace_str)
618 enif_free(cur_el->namespace_str);
619 enif_free(cur_el);
620 } else {
621 xmlel_stack_t *cur_el = state->elements_stack;
622 state->elements_stack = cur_el->next;
623 if (!state->elements_stack || cur_el->namespace_str != state->elements_stack->namespace_str)
624 enif_free(cur_el->namespace_str);
625 enif_free(cur_el);
626 if (state->use_maps) {
627 enif_make_map_put(env, xmlel_term, enif_make_atom(env, "__struct__"),
628 enif_make_atom(env, "Elixir.FastXML.El"), &xmlel_term);
629 send_event(state, xmlel_term);
630 } else {
631 send_event(state,
632 enif_make_tuple2(state->send_env,
633 enif_make_atom(state->send_env, "xmlstreamelement"),
634 xmlel_term));
635 }
636 }
637
638 return;
639 }
640
erlXML_StartNamespaceDeclHandler(state_t * state,const XML_Char * prefix,const XML_Char * uri)641 void erlXML_StartNamespaceDeclHandler(state_t *state,
642 const XML_Char *prefix,
643 const XML_Char *uri)
644 {
645 /* From the expat documentation:
646 "For a default namespace declaration (xmlns='...'),
647 the prefix will be null ...
648 ... The URI will be null for the case where
649 the default namespace is being unset."
650
651 FIXME: I'm not quite sure what all that means */
652 if (uri == NULL)
653 return;
654
655 if (state->error)
656 return;
657
658 attrs_list_t *c = enif_alloc(sizeof(attrs_list_t));
659 PARSER_MEM_ASSERT(c);
660
661 if (prefix) {
662 size_t len = strlen(prefix);
663
664 if (!enif_alloc_binary(len + 6, &c->name)) {
665 enif_free(c);
666 PARSER_MEM_ASSERT(0);
667 }
668 memcpy(c->name.data, "xmlns:", 6);
669 memcpy(c->name.data + 6, prefix, len);
670 } else {
671 if (!enif_alloc_binary(5, &c->name)) {
672 enif_free(c);
673 PARSER_MEM_ASSERT(0);
674 }
675 memcpy(c->name.data, "xmlns", 5);
676 };
677
678 size_t len = strlen(uri);
679 if (!enif_alloc_binary(len, &c->value)) {
680 enif_release_binary(&c->name);
681 enif_free(c);
682 PARSER_MEM_ASSERT(0);
683 }
684
685 memcpy(c->value.data, uri, len);
686
687 c->next = state->xmlns_attrs;
688 state->xmlns_attrs = c;
689
690 return;
691 }
692
693 /*
694 * Prevent entity expansion attacks (CVE-2013-1664) by refusing
695 * to process any XML that contains a DTD.
696 */
erlXML_StartDoctypeDeclHandler(state_t * state,const XML_Char * doctypeName,const XML_Char * doctypeSysid,const XML_Char * doctypePubid,int hasInternalSubset)697 void erlXML_StartDoctypeDeclHandler(state_t *state,
698 const XML_Char *doctypeName,
699 const XML_Char *doctypeSysid,
700 const XML_Char *doctypePubid,
701 int hasInternalSubset)
702 {
703 XML_StopParser(state->parser, PARSING_NOT_RESUMABLE);
704 return;
705 }
706
707 /*
708 * Prevent entity expansion attacks (CVE-2013-1664) by having an explicit
709 * default handler. According to the documentation,
710 *
711 * "Setting the handler with this call has the side effect of turning off
712 * expansion of references to internally defined general entities. Instead
713 * these references are passed to the default handler."
714 */
erlXML_DefaultHandler(state_t * state,const XML_Char * s,int len)715 void erlXML_DefaultHandler(state_t *state, const XML_Char *s, int len)
716 {
717 return;
718 }
719
free_parser_allocated_structs(state_t * state)720 static void free_parser_allocated_structs(state_t *state) {
721 while (state->xmlns_attrs) {
722 attrs_list_t *c = state->xmlns_attrs;
723 state->xmlns_attrs = c->next;
724
725 enif_release_binary(&c->name);
726 enif_release_binary(&c->value);
727 enif_free(c);
728 }
729 while (state->elements_stack) {
730 xmlel_stack_t *c = state->elements_stack;
731 while (c->children) {
732 children_list_t *cc = c->children;
733 if (cc->is_cdata)
734 enif_release_binary(&cc->cdata);
735 c->children = cc->next;
736 enif_free(cc);
737 }
738 if (!c->next || c->namespace_str != c->next->namespace_str)
739 enif_free(c->namespace_str);
740 state->elements_stack = c->next;
741 enif_free(c);
742 }
743 if (state->top_xmlns_attrs != &stream_stream_ns_attr)
744 while (state->top_xmlns_attrs) {
745 attrs_list_t *c = state->top_xmlns_attrs;
746 state->top_xmlns_attrs = c->next;
747 enif_release_binary(&c->name);
748 enif_release_binary(&c->value);
749 enif_free(c);
750 }
751 }
752
destroy_parser_state(ErlNifEnv * env,void * data)753 static void destroy_parser_state(ErlNifEnv *env, void *data)
754 {
755 state_t *state = (state_t *) data;
756 if (state) {
757 if (state->parser) XML_ParserFree(state->parser);
758 if (state->pid) enif_free(state->pid);
759 if (state->send_env) enif_free_env(state->send_env);
760
761 free_parser_allocated_structs(state);
762
763 memset(state, 0, sizeof(state_t));
764 }
765 }
766
setup_parser(state_t * state)767 static void setup_parser(state_t *state)
768 {
769 XML_SetUserData(state->parser, state);
770 XML_SetStartElementHandler(state->parser,
771 (XML_StartElementHandler) erlXML_StartElementHandler);
772 XML_SetEndElementHandler(state->parser,
773 (XML_EndElementHandler) erlXML_EndElementHandler);
774 XML_SetCharacterDataHandler(state->parser,
775 (XML_CharacterDataHandler) erlXML_CharacterDataHandler);
776 XML_SetStartNamespaceDeclHandler(state->parser,
777 (XML_StartNamespaceDeclHandler)
778 erlXML_StartNamespaceDeclHandler);
779 XML_SetStartDoctypeDeclHandler(state->parser,
780 (XML_StartDoctypeDeclHandler)
781 erlXML_StartDoctypeDeclHandler);
782 XML_SetReturnNSTriplet(state->parser, 1);
783 XML_SetDefaultHandler(state->parser, (XML_DefaultHandler) erlXML_DefaultHandler);
784 }
785
init_parser_state(ErlNifPid * pid)786 static state_t *init_parser_state(ErlNifPid *pid)
787 {
788 state_t *state = enif_alloc_resource(parser_state_t, sizeof(state_t));
789 ASSERT(state);
790 memset(state, 0, sizeof(state_t));
791 if (pid) {
792 state->send_env = enif_alloc_env();
793 ASSERT(state->send_env);
794 state->pid = enif_alloc(sizeof(ErlNifPid));
795 ASSERT(state->pid);
796 memcpy(state->pid, pid, sizeof(ErlNifPid));
797 }
798 state->parser = XML_ParserCreate_MM("UTF-8", &ms, "\n");
799 setup_parser(state);
800 return state;
801 }
802
load(ErlNifEnv * env,void ** priv,ERL_NIF_TERM load_info)803 static int load(ErlNifEnv* env, void** priv, ERL_NIF_TERM load_info)
804 {
805 ErlNifResourceFlags flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER;
806 parser_state_t = enif_open_resource_type(env, NULL, "parser_state_t",
807 destroy_parser_state,
808 flags, NULL);
809
810 return 0;
811 }
812
make_parse_error(ErlNifEnv * env,XML_Parser parser)813 static ERL_NIF_TERM make_parse_error(ErlNifEnv *env, XML_Parser parser)
814 {
815 enum XML_Error errcode = XML_GetErrorCode(parser);
816 const char *errstring;
817
818 if (errcode == XML_ERROR_EXTERNAL_ENTITY_HANDLING)
819 errstring = "DTDs are not allowed";
820 else
821 errstring = XML_ErrorString(errcode);
822
823 return enif_make_tuple2(env, enif_make_uint(env, errcode),
824 str2bin(env, errstring));
825 }
826
reset_nif(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])827 static ERL_NIF_TERM reset_nif(ErlNifEnv* env, int argc,
828 const ERL_NIF_TERM argv[])
829 {
830 state_t *state = NULL;
831
832 if (argc != 1)
833 return enif_make_badarg(env);
834
835 if (!enif_get_resource(env, argv[0], parser_state_t, (void *) &state))
836 return enif_make_badarg(env);
837
838 ASSERT(XML_ParserReset(state->parser, "UTF-8"));
839 setup_parser(state);
840
841 free_parser_allocated_structs(state);
842
843 enif_clear_env(state->send_env);
844
845 state->size = 0;
846 state->depth = 0;
847 state->error = NULL;
848
849 return argv[0];
850 }
851
parse_element_nif(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])852 static ERL_NIF_TERM parse_element_nif(ErlNifEnv* env, int argc,
853 const ERL_NIF_TERM argv[])
854 {
855 ERL_NIF_TERM el;
856 ErlNifBinary bin;
857
858 if (argc != 1)
859 return enif_make_badarg(env);
860
861 if (!enif_inspect_binary(env, argv[0], &bin))
862 return enif_make_badarg(env);
863
864 state_t *state = init_parser_state(NULL);
865 if (!state)
866 return enif_make_badarg(env);
867
868 state->send_env = env;
869
870 xmlel_stack_t *xmlel = enif_alloc(sizeof(xmlel_stack_t));
871 if (!xmlel) {
872 enif_release_resource(state);
873 return enif_make_badarg(env);
874 }
875
876 memset(xmlel, 0, sizeof(xmlel_stack_t));
877
878 xmlel->next = state->elements_stack;
879 xmlel->children = NULL;
880
881 state->elements_stack = xmlel;
882
883 int res = XML_Parse(state->parser, (char *)bin.data, bin.size, 1);
884 if (res == XML_STATUS_OK && state->elements_stack->children &&
885 !state->elements_stack->children->is_cdata)
886 el = state->elements_stack->children->term;
887 else if (state->error)
888 el = enif_make_tuple2(env, enif_make_atom(env, "error"),
889 enif_make_atom(env, state->error));
890 else
891 el = enif_make_tuple2(env, enif_make_atom(env, "error"),
892 make_parse_error(env, state->parser));
893
894 state->send_env = NULL;
895
896 enif_release_resource(state);
897
898 return el;
899 }
900
send_error(state_t * state,ERL_NIF_TERM msg)901 static void send_error(state_t *state, ERL_NIF_TERM msg) {
902 ErlNifEnv *env = state->send_env;
903
904 if (state->use_maps) {
905 ERL_NIF_TERM map = enif_make_new_map(env);
906 enif_make_map_put(env, map, enif_make_atom(env, "__struct__"),
907 enif_make_atom(env, "Elixir.FastXML.StreamError"), &map);
908 enif_make_map_put(env, map, enif_make_atom(env, "desc"),
909 msg, &map);
910
911 send_event(state, map);
912 } else {
913 send_event(state,
914 enif_make_tuple2(env,
915 enif_make_atom(env, "xmlstreamerror"),
916 msg));
917 }
918 }
919
parse_nif(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])920 static ERL_NIF_TERM parse_nif(ErlNifEnv* env, int argc,
921 const ERL_NIF_TERM argv[])
922 {
923 state_t *state = NULL;
924 ErlNifBinary bin;
925
926 if (argc != 2)
927 return enif_make_badarg(env);
928
929 if (!enif_get_resource(env, argv[0], parser_state_t, (void *) &state))
930 return enif_make_badarg(env);
931
932 if (!enif_inspect_binary(env, argv[1], &bin))
933 return enif_make_badarg(env);
934
935 if (!state->parser || !state->pid || !state->send_env)
936 return enif_make_badarg(env);
937
938 state->size += bin.size;
939 state->env = env;
940
941 if (state->size >= state->max_size) {
942 size_t size = state->size;
943 send_error(state, str2bin(state->send_env, "XML stanza is too big"));
944 /* Don't let send_event() to set size to zero */
945 state->size = size;
946 } else {
947 int res = XML_Parse(state->parser, (char *)bin.data, bin.size, 0);
948 if (!res)
949 send_error(state, state->error ?
950 str2bin(state->send_env, state->error) :
951 make_parse_error(state->send_env, state->parser));
952 }
953
954 return argv[0];
955 }
956
change_callback_pid_nif(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])957 static ERL_NIF_TERM change_callback_pid_nif(ErlNifEnv* env, int argc,
958 const ERL_NIF_TERM argv[])
959 {
960 state_t *state = NULL;
961 ErlNifPid pid;
962
963 if (argc != 2)
964 return enif_make_badarg(env);
965
966 if (!enif_get_resource(env, argv[0], parser_state_t, (void *) &state))
967 return enif_make_badarg(env);
968
969 if (!state->parser || !state->pid || !state->send_env)
970 return enif_make_badarg(env);
971
972 if (!enif_get_local_pid(env, argv[1], &pid))
973 return enif_make_badarg(env);
974
975 memcpy(state->pid, &pid, sizeof(ErlNifPid));
976
977 return enif_make_resource(env, state);
978 }
979
close_nif(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])980 static ERL_NIF_TERM close_nif(ErlNifEnv* env, int argc,
981 const ERL_NIF_TERM argv[])
982 {
983 state_t *state = NULL;
984
985 if (argc != 1)
986 return enif_make_badarg(env);
987
988 if (!enif_get_resource(env, argv[0], parser_state_t, (void *) &state))
989 return enif_make_badarg(env);
990
991 if (!state->parser || !state->pid)
992 return enif_make_badarg(env);
993
994 destroy_parser_state(env, state);
995
996 return enif_make_atom(env, "true");
997 }
998
new_nif(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])999 static ERL_NIF_TERM new_nif(ErlNifEnv* env, int argc,
1000 const ERL_NIF_TERM argv[])
1001 {
1002 int gen_srv = 1;
1003 int use_maps = 0;
1004
1005 if (argc != 2 && argc != 3)
1006 return enif_make_badarg(env);
1007
1008 if (argc == 3) {
1009 if (!enif_is_list(env, argv[2]))
1010 return enif_make_badarg(env);
1011 ERL_NIF_TERM head, tail = argv[2];
1012 while (enif_get_list_cell(env, tail, &head, &tail)) {
1013 char buf[16];
1014 if (enif_get_atom(env, head, buf, sizeof(buf), ERL_NIF_LATIN1)) {
1015 if (strcmp("no_gen_server", buf) == 0)
1016 gen_srv = 0;
1017 else if (strcmp("use_maps", buf) == 0)
1018 use_maps = 1;
1019 }
1020 }
1021 }
1022
1023 ErlNifPid pid;
1024 if (!enif_get_local_pid(env, argv[0], &pid))
1025 return enif_make_badarg(env);
1026
1027 state_t *state = init_parser_state(&pid);
1028 if (!state)
1029 return enif_make_badarg(env);
1030
1031 state->normalize_ns = 1;
1032 state->use_maps = use_maps;
1033 state->gen_server = gen_srv;
1034
1035 ERL_NIF_TERM result = enif_make_resource(env, state);
1036 enif_release_resource(state);
1037
1038 ErlNifUInt64 max_size;
1039 if (enif_get_uint64(env, argv[1], &max_size))
1040 state->max_size = (size_t) max_size;
1041 else if (!enif_compare(argv[1], enif_make_atom(env, "infinity")))
1042 state->max_size = (size_t) - 1;
1043 else
1044 return enif_make_badarg(env);
1045
1046 return result;
1047 }
1048
1049 static ErlNifFunc nif_funcs[] =
1050 {
1051 {"new", 2, new_nif},
1052 {"new", 3, new_nif},
1053 {"parse", 2, parse_nif},
1054 {"parse_element", 1, parse_element_nif},
1055 {"reset", 1, reset_nif},
1056 {"close", 1, close_nif},
1057 {"change_callback_pid", 2, change_callback_pid_nif}
1058 };
1059
1060 ERL_NIF_INIT(fxml_stream, nif_funcs, load, NULL, NULL, NULL)
1061