1 /*
2  * Copyright (C) 2002-2021 ProcessOne, SARL. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 #include <erl_nif.h>
19 #include <string.h>
20 #include <stdio.h>
21 #include <expat.h>
22 
23 #define PARSING_NOT_RESUMABLE XML_FALSE
24 
25 #define ASSERT(x) if (!(x)) return 0
26 #define PARSER_ASSERT(X, E) do { if (!(X)) { state->error = (E); XML_StopParser(state->parser, PARSING_NOT_RESUMABLE); return; } } while(0)
27 #define PARSER_MEM_ASSERT(x) PARSER_ASSERT((x), "enomem")
28 
29 typedef struct children_list_t {
30   union {
31     ERL_NIF_TERM term;
32     ErlNifBinary cdata;
33   };
34   struct children_list_t *next;
35   char is_cdata;
36 } children_list_t;
37 
38 typedef struct attrs_list_t {
39   ErlNifBinary name;
40   ErlNifBinary value;
41   struct attrs_list_t *next;
42 } attrs_list_t;
43 
44 typedef struct xmlel_stack_t {
45   ERL_NIF_TERM name;
46   ERL_NIF_TERM attrs;
47   children_list_t *children;
48   struct xmlel_stack_t *next;
49   char *namespace_str;
50   int redefined_top_prefix;
51 } xmlel_stack_t;
52 
53 
54 typedef struct {
55   ErlNifEnv *env;
56   ErlNifEnv *send_env;
57   ErlNifPid *pid;
58   size_t depth;
59   size_t size;
60   size_t max_size;
61   XML_Parser parser;
62   xmlel_stack_t *elements_stack;
63   attrs_list_t *xmlns_attrs;
64   attrs_list_t *top_xmlns_attrs;
65   const char *error;
66   char normalize_ns:1;
67   char gen_server:1;
68   char use_maps:1;
69 } state_t;
70 
71 typedef enum xmlns_op {
72   OP_ERROR = 0,
73   OP_REMOVE_PREFIX,
74   OP_REMOVE_XMLNS,
75   OP_REPLACE_XMLNS,
76   OP_NOP
77 } xmlns_op;
78 
79 static XML_Memory_Handling_Suite ms = {
80   .malloc_fcn = enif_alloc,
81   .realloc_fcn = enif_realloc,
82   .free_fcn = enif_free
83 };
84 
85 static ErlNifResourceType *parser_state_t = NULL;
86 
87 #define FAKE_BIN(STR) { sizeof(STR)-1, (unsigned char*)STR }
88 
89 static attrs_list_t stream_stream_ns_attr = {
90   FAKE_BIN("stream:stream"),
91   FAKE_BIN("http://etherx.jabber.org/streams")
92 };
93 
same_str_buf(const char * str,const char * buf,size_t buf_len)94 static int same_str_buf(const char *str, const char *buf, size_t buf_len)
95 {
96   if (strlen(str) != buf_len)
97     return 0;
98   if (!buf_len)
99     return 1;
100   return memcmp(str, buf, buf_len) == 0;
101 }
102 
dup_buf(const char * buf,size_t buf_len)103 static char *dup_buf(const char *buf, size_t buf_len)
104 {
105   char *res = enif_alloc(buf_len+1);
106   if (!res)
107     return NULL;
108 
109   if (buf_len)
110     memcpy(res, buf, buf_len);
111   res[buf_len] = '\0';
112 
113   return res;
114 }
115 
dup_to_bin(ErlNifBinary * bin,const char * buf,size_t buf_len)116 static int dup_to_bin(ErlNifBinary *bin, const char *buf, size_t buf_len)
117 {
118   if (!enif_alloc_binary(buf_len, bin))
119       return 0;
120 
121   memcpy(bin->data, buf, buf_len);
122 
123   return 1;
124 }
125 
dup_to_term(ErlNifEnv * env,const char * buf,size_t buf_len)126 static ERL_NIF_TERM dup_to_term(ErlNifEnv *env, const char *buf, size_t buf_len)
127 {
128   ERL_NIF_TERM term;
129 
130   unsigned char *str = enif_make_new_binary(env, buf_len, &term);
131   memcpy(str, buf, buf_len);
132 
133   return term;
134 }
135 
has_prefix_ns_from_list(attrs_list_t * list,const char * pfx,size_t pfx_len,const char * ns,size_t ns_len)136 static int has_prefix_ns_from_list(attrs_list_t*list, const char *pfx, size_t pfx_len,
137                                    const char *ns, size_t ns_len)
138 {
139   while (pfx_len && list) {
140     if ((pfx == NULL ||
141          (list->name.size == pfx_len && memcmp(list->name.data, pfx, pfx_len) == 0)) &&
142         (ns == NULL ||
143          (list->value.size == ns_len && memcmp(list->value.data, ns, ns_len) == 0)))
144     {
145       return 1;
146     }
147     list = list->next;
148   }
149   return 0;
150 }
151 
has_prefix_ns_from_top(state_t * state,const char * pfx,size_t pfx_len,const char * ns,size_t ns_len)152 static int has_prefix_ns_from_top(state_t *state, const char *pfx, size_t pfx_len,
153                                   const char *ns, size_t ns_len)
154 {
155   if (state->elements_stack->redefined_top_prefix || !pfx_len)
156     return 0;
157 
158   return has_prefix_ns_from_list(state->top_xmlns_attrs, pfx, pfx_len, ns, ns_len);
159 }
160 
encode_name(state_t * state,const char * xml_name,ErlNifBinary * buf,char ** ns_str,char ** pfx_str,int top_element)161 static xmlns_op encode_name(state_t *state, const char *xml_name, ErlNifBinary *buf,
162                             char **ns_str, char **pfx_str, int top_element)
163 {
164   const char *parts[3];
165   int i, idx = 0;
166 
167   for (i = 0; ; i++) {
168     if (!xml_name[i] || xml_name[i] == '\n') {
169       parts[idx++] = xml_name + i;
170       if (!xml_name[i])
171         break;
172     }
173     if (idx >= 3)
174       return OP_ERROR;
175   }
176   const char *ns = NULL, *name = NULL, *prefix = NULL;
177   size_t ns_len = 0, name_len = 0, prefix_len = 0;
178 
179   if (idx == 1) {
180     name = xml_name;
181     name_len = parts[0] - xml_name;
182   } else {
183     ns = xml_name;
184     ns_len = parts[0] - xml_name;
185     name = parts[0] + 1;
186     name_len = parts[1] - parts[0] - 1;
187     if (idx == 3) {
188       prefix = parts[1] + 1;
189       prefix_len = parts[2] - parts[1] - 1;
190     }
191   }
192 
193   int with_prefix = prefix_len && (top_element || !ns_str);
194   xmlns_op res = OP_REPLACE_XMLNS;
195 
196   if (state->normalize_ns && !top_element) {
197     if (ns_str) {
198       if (!state->elements_stack->redefined_top_prefix && prefix_len &&
199           has_prefix_ns_from_top(state, prefix, prefix_len, ns, ns_len))
200       {
201           res = OP_REMOVE_PREFIX;
202           with_prefix = 1;
203       } else if (same_str_buf(state->elements_stack->namespace_str, ns, ns_len)) {
204         res = OP_REMOVE_XMLNS;
205         with_prefix = 0;
206       }
207     }
208   } else
209     res = OP_NOP;
210 
211   if (with_prefix) {
212     ASSERT(enif_alloc_binary(name_len + prefix_len + 1, buf));
213     memcpy(buf->data, prefix, prefix_len);
214     buf->data[prefix_len] = ':';
215     memcpy(buf->data + prefix_len + 1, name, name_len);
216   } else {
217     ASSERT(dup_to_bin(buf, name, name_len));
218   }
219 
220   if (ns_str) {
221     if (top_element && prefix_len > 0)
222       *ns_str = NULL;
223     else {
224       *ns_str = top_element ? dup_buf(ns, ns_len) :
225                 res == OP_REMOVE_PREFIX ?
226                 state->elements_stack->namespace_str :
227                 dup_buf(ns, ns_len);
228 
229       if (!*ns_str) {
230         enif_release_binary(buf);
231         return OP_ERROR;
232       }
233     }
234     if (pfx_str) {
235       if (res == OP_REMOVE_PREFIX) {
236         *pfx_str = dup_buf(prefix, prefix_len);
237         if (!*pfx_str) {
238           enif_release_binary(buf);
239           if (ns_str && *ns_str)
240             enif_free(*ns_str);
241           return OP_ERROR;
242         }
243       } else
244         *pfx_str = NULL;
245     }
246   }
247 
248   return res;
249 }
250 
str2bin(ErlNifEnv * env,const char * s)251 static ERL_NIF_TERM str2bin(ErlNifEnv *env, const char *s)
252 {
253   return dup_to_term(env, s, strlen(s));
254 }
255 
send_event(state_t * state,ERL_NIF_TERM el)256 static void send_event(state_t *state, ERL_NIF_TERM el)
257 {
258   state->size = 0;
259   if (state->gen_server) {
260     enif_send(state->env, state->pid, state->send_env,
261               enif_make_tuple2(state->send_env,
262                                enif_make_atom(state->send_env, "$gen_event"),
263                                el));
264   } else {
265     enif_send(state->env, state->pid, state->send_env, el);
266   }
267   enif_clear_env(state->send_env);
268 }
269 
send_all_state_event(state_t * state,ERL_NIF_TERM el)270 static void send_all_state_event(state_t *state, ERL_NIF_TERM el)
271 {
272   state->size = 0;
273   if (state->gen_server) {
274     enif_send(state->env, state->pid, state->send_env,
275               enif_make_tuple2(state->send_env,
276                                enif_make_atom(state->send_env, "$gen_all_state_event"),
277                                el));
278   } else {
279     enif_send(state->env, state->pid, state->send_env, el);
280   }
281   enif_clear_env(state->send_env);
282 }
283 
append_attr(state_t * state,ERL_NIF_TERM root,ERL_NIF_TERM name,ERL_NIF_TERM value)284 static ERL_NIF_TERM append_attr(state_t *state, ERL_NIF_TERM root, ERL_NIF_TERM name, ERL_NIF_TERM value) {
285   ErlNifEnv* env = state->send_env;
286 
287   if (state->use_maps) {
288     ERL_NIF_TERM res;
289     enif_make_map_put(env, root, name, value, &res);
290     return res;
291   } else {
292     return enif_make_list_cell(env, enif_make_tuple2(env, name, value), root);
293   }
294 }
295 
erlXML_StartElementHandler(state_t * state,const XML_Char * name,const XML_Char ** atts)296 void erlXML_StartElementHandler(state_t *state,
297                                 const XML_Char *name,
298                                 const XML_Char **atts)
299 {
300   int i = 0;
301   ErlNifEnv* env = state->send_env;
302   ERL_NIF_TERM attrs_term;
303   ErlNifBinary name_bin;
304 
305   if (state->use_maps) {
306     attrs_term = enif_make_new_map(env);
307   } else {
308     attrs_term = enif_make_list(env, 0);
309   }
310 
311   if (state->error)
312     return;
313 
314   state->depth++;
315 
316   while (atts[i])
317     i += 2;
318 
319   i -= 2;
320 
321   while (i >= 0) {
322     ErlNifBinary attr_name;
323     ERL_NIF_TERM val;
324     unsigned char *val_str;
325 
326     PARSER_MEM_ASSERT(encode_name(state, atts[i], &attr_name, NULL, NULL, 0));
327 
328     size_t val_len = strlen(atts[i+1]);
329     val_str = enif_make_new_binary(env, val_len, &val);
330     PARSER_MEM_ASSERT(val_str);
331     memcpy(val_str, atts[i+1], val_len);
332 
333     attrs_term = append_attr(state, attrs_term, enif_make_binary(env, &attr_name), val);
334     i -= 2;
335   }
336 
337   char *ns = NULL, *pfx = NULL;
338   int redefined_top_prefix = state->depth > 1 ? state->elements_stack->redefined_top_prefix : 0;
339   int xmlns_op;
340 
341   if (state->normalize_ns)
342       xmlns_op = encode_name(state, name, &name_bin, &ns, &pfx, state->depth == 1);
343   else
344     xmlns_op = encode_name(state, name, &name_bin, NULL, NULL, state->depth == 1);
345 
346   PARSER_MEM_ASSERT(xmlns_op);
347 
348   if (!state->normalize_ns)
349     xmlns_op = OP_NOP;
350 
351   int non_xmpp_ns = -1;
352   int had_stream_stream = 0;
353 
354   while (state->xmlns_attrs) {
355     ERL_NIF_TERM tuple = 0;
356     ERL_NIF_TERM tuple_name = 0, tuple_val = 0;
357     attrs_list_t *c = state->xmlns_attrs;
358     ErlNifBinary new_prefix, new_ns;
359 
360     state->xmlns_attrs = c->next;
361 
362     if (state->depth == 1 && state->normalize_ns && c->name.size > 6) {
363       if (non_xmpp_ns != 1 || !has_prefix_ns_from_list(&stream_stream_ns_attr,
364                                                        (char*)c->name.data+6, c->name.size-6,
365                                                        (char*)c->value.data, c->value.size))
366       {
367         if (had_stream_stream) {
368           PARSER_MEM_ASSERT(dup_to_bin(&new_prefix, (char*)stream_stream_ns_attr.name.data,
369                                        stream_stream_ns_attr.name.size));
370           PARSER_MEM_ASSERT(dup_to_bin(&new_ns, (char*)stream_stream_ns_attr.value.data,
371                                        stream_stream_ns_attr.value.size));
372           c->name = new_prefix;
373           c->value = new_ns;
374           c->next = state->top_xmlns_attrs;
375           state->top_xmlns_attrs = c;
376           had_stream_stream = 0;
377         }
378         non_xmpp_ns = 1;
379         PARSER_MEM_ASSERT(dup_to_bin(&new_prefix, (char*)c->name.data+6, c->name.size-6));
380         PARSER_MEM_ASSERT(dup_to_bin(&new_ns, (char*)c->value.data, c->value.size));
381       } else {
382         had_stream_stream = 1;
383         non_xmpp_ns = 0;
384       }
385     }
386 
387     if (c->name.size == 5) { // xmlns
388       if (xmlns_op == OP_REMOVE_XMLNS) {
389         enif_release_binary(&c->name);
390         enif_release_binary(&c->value);
391         enif_free(c);
392         continue;
393       } else if (xmlns_op == OP_REPLACE_XMLNS) {
394         enif_release_binary(&c->value);
395         if (state->use_maps) {
396           tuple_name = enif_make_binary(env, &c->name);
397           tuple_val = dup_to_term(env, ns, strlen(ns));
398         } else {
399           tuple = enif_make_tuple2(env, enif_make_binary(env, &c->name),
400                                    dup_to_term(env, ns, strlen(ns)));
401         }
402         xmlns_op = OP_NOP;
403       }
404       if (!ns && state->normalize_ns)
405         PARSER_MEM_ASSERT(ns = dup_buf((char *) c->value.data, c->value.size));
406     } else if (xmlns_op == OP_REMOVE_PREFIX &&
407         same_str_buf(pfx, (char*)c->name.data + 6, c->name.size - 6)) {
408       enif_release_binary(&c->name);
409       enif_release_binary(&c->value);
410       enif_free(c);
411       continue;
412     } else if (!redefined_top_prefix && state->depth > 1 && c->name.size > 6 &&
413         has_prefix_ns_from_top(state, (char*)c->name.data + 6, c->name.size - 6, NULL, 0)) {
414       redefined_top_prefix = 1;
415     }
416 
417     if (state->use_maps) {
418       if (!tuple_name) {
419         enif_make_map_update(env, attrs_term, enif_make_binary(env, &c->name),
420                              enif_make_binary(env, &c->value), &attrs_term);
421       } else
422         enif_make_map_update(env, attrs_term, tuple_name, tuple_val, &attrs_term);
423     } else {
424       if (!tuple) {
425         tuple = enif_make_tuple2(env, enif_make_binary(env, &c->name),
426                                  enif_make_binary(env, &c->value));
427       }
428       attrs_term = enif_make_list_cell(env, tuple, attrs_term);
429     }
430 
431     if (non_xmpp_ns && state->depth == 1 && state->normalize_ns && c->name.size > 6) {
432       c->name = new_prefix;
433       c->value = new_ns;
434       c->next = state->top_xmlns_attrs;
435       state->top_xmlns_attrs = c;
436     } else
437       enif_free(c);
438   }
439 
440   if (!non_xmpp_ns && state->depth == 1 && state->normalize_ns) {
441     state->top_xmlns_attrs = &stream_stream_ns_attr;
442   }
443 
444   if (xmlns_op == OP_REPLACE_XMLNS) {
445     attrs_term = append_attr(state, attrs_term, dup_to_term(env, "xmlns", 5),
446                              dup_to_term(env, ns, strlen(ns)));
447   } else if (xmlns_op == OP_REMOVE_PREFIX) {
448     enif_free(pfx);
449   }
450 
451   if (!ns && state->normalize_ns)
452     PARSER_MEM_ASSERT(ns = dup_buf("", 0));
453 
454   xmlel_stack_t *xmlel = enif_alloc(sizeof(xmlel_stack_t));
455   PARSER_MEM_ASSERT(xmlel);
456 
457   xmlel->next = state->elements_stack;
458   xmlel->attrs = attrs_term;
459   xmlel->namespace_str = ns;
460   xmlel->children = NULL;
461   xmlel->redefined_top_prefix = redefined_top_prefix;
462 
463   state->elements_stack = xmlel;
464 
465   if (state->pid && state->depth == 1) {
466     if (state->use_maps) {
467       ERL_NIF_TERM map = enif_make_new_map(env);
468       enif_make_map_put(env, map, enif_make_atom(env, "__struct__"),
469                         enif_make_atom(env, "Elixir.FastXML.StreamStart"), &map);
470       enif_make_map_put(env, map, enif_make_atom(env, "name"),
471                         enif_make_binary(env, &name_bin), &map);
472       enif_make_map_put(env, map, enif_make_atom(env, "attrs"),
473                         attrs_term, &map);
474       send_event(state, map);
475     } else {
476       send_event(state,
477                  enif_make_tuple3(env,
478                                   enif_make_atom(env, "xmlstreamstart"),
479                                   enif_make_binary(env, &name_bin),
480                                   attrs_term));
481     }
482   } else {
483     xmlel->name = enif_make_binary(env, &name_bin);
484   }
485 }
486 
erlXML_CharacterDataHandler(state_t * state,const XML_Char * s,int len)487 void erlXML_CharacterDataHandler(state_t *state, const XML_Char *s, int len)
488 {
489   ErlNifEnv *env = state->send_env;
490 
491   if (state->error)
492     return;
493 
494   if (state->depth == 0)
495     return;
496 
497   if (state->pid && state->depth == 1) {
498     ErlNifBinary cdata;
499     PARSER_MEM_ASSERT(enif_alloc_binary(len, &cdata));
500     memcpy(cdata.data, s, len);
501     send_all_state_event(state,
502 			 enif_make_tuple2(env,
503 					  enif_make_atom(env, "xmlstreamcdata"),
504 					  enif_make_binary(env, &cdata)));
505     return;
506   }
507 
508   children_list_t *children = state->elements_stack->children;
509 
510   if (children && children->is_cdata) {
511     int old_size = children->cdata.size;
512     PARSER_MEM_ASSERT(enif_realloc_binary(&children->cdata, old_size + len));
513     memcpy(children->cdata.data+old_size, s, len);
514   } else {
515     children = enif_alloc(sizeof(children_list_t));
516     PARSER_MEM_ASSERT(children);
517     if (!enif_alloc_binary(len, &children->cdata)) {
518       enif_free(children);
519       PARSER_MEM_ASSERT(0);
520     }
521     children->is_cdata = 1;
522     memcpy(children->cdata.data, s, len);
523     children->next = state->elements_stack->children;
524     state->elements_stack->children = children;
525   }
526 
527   return;
528 }
529 
530 ERL_NIF_TERM
make_xmlel_children_list(state_t * state,children_list_t * list)531 make_xmlel_children_list(state_t *state, children_list_t *list) {
532   ErlNifEnv *env = state->send_env;
533   ERL_NIF_TERM children_list = enif_make_list(env, 0);
534 
535   while (list) {
536     if (list->is_cdata) {
537       ERL_NIF_TERM data;
538       if (state->use_maps) {
539         data = enif_make_binary(env, &list->cdata);
540       } else {
541         data = enif_make_tuple2(env,
542                          enif_make_atom(env, "xmlcdata"),
543                          enif_make_binary(env, &list->cdata));
544       }
545       children_list = enif_make_list_cell(env, data, children_list);
546     } else
547       children_list = enif_make_list_cell(env, list->term, children_list);
548 
549     children_list_t *old_head = list;
550     list = list->next;
551 
552     enif_free(old_head);
553   }
554 
555   return children_list;
556 }
557 
erlXML_EndElementHandler(state_t * state,const XML_Char * name)558 void erlXML_EndElementHandler(state_t *state, const XML_Char *name)
559 {
560   ErlNifEnv *env = state->send_env;
561 
562   if (state->error)
563     return;
564 
565   state->depth--;
566 
567   if (state->pid && state->depth == 0) {
568     ErlNifBinary name_bin;
569 
570     PARSER_MEM_ASSERT(encode_name(state, name, &name_bin, NULL, NULL, 0));
571 
572     if (state->use_maps) {
573       ERL_NIF_TERM map = enif_make_new_map(env);
574       enif_make_map_put(env, map, enif_make_atom(env, "__struct__"),
575                         enif_make_atom(env, "Elixir.FastXML.StreamEnd"), &map);
576       enif_make_map_put(env, map, enif_make_atom(env, "name"),
577                         enif_make_binary(env, &name_bin), &map);
578       send_event(state, map);
579     } else {
580       send_event(state,
581                  enif_make_tuple2(env,
582                                   enif_make_atom(env, "xmlstreamend"),
583                                   enif_make_binary(env, &name_bin)));
584     }
585     return;
586   }
587 
588   ERL_NIF_TERM xmlel_term;
589 
590   if (state->use_maps) {
591     xmlel_term = enif_make_new_map(env);
592     enif_make_map_put(env, xmlel_term, enif_make_atom(env, "__struct__"),
593                       enif_make_atom(env, "Elixir.FastXML.El"), &xmlel_term);
594     enif_make_map_put(env, xmlel_term, enif_make_atom(env, "name"), state->elements_stack->name, &xmlel_term);
595     enif_make_map_put(env, xmlel_term, enif_make_atom(env, "attrs"), state->elements_stack->attrs, &xmlel_term);
596     enif_make_map_put(env, xmlel_term, enif_make_atom(env, "children"),
597         make_xmlel_children_list(state, state->elements_stack->children), &xmlel_term);
598   } else {
599     xmlel_term = enif_make_tuple4(env, enif_make_atom(env, "xmlel"),
600                                   state->elements_stack->name,
601                                   state->elements_stack->attrs,
602                                   make_xmlel_children_list(state, state->elements_stack->children));
603   }
604 
605   if (!state->pid || state->depth > 1) {
606     children_list_t *el;
607     xmlel_stack_t *cur_el = state->elements_stack;
608 
609     PARSER_MEM_ASSERT(el = enif_alloc(sizeof(children_list_t)));
610 
611     state->elements_stack = state->elements_stack->next;
612 
613     el->is_cdata = 0;
614     el->term = xmlel_term;
615     el->next = state->elements_stack->children;
616     state->elements_stack->children = el;
617     if (cur_el->namespace_str != state->elements_stack->namespace_str)
618       enif_free(cur_el->namespace_str);
619     enif_free(cur_el);
620   } else {
621     xmlel_stack_t *cur_el = state->elements_stack;
622     state->elements_stack = cur_el->next;
623     if (!state->elements_stack || cur_el->namespace_str != state->elements_stack->namespace_str)
624       enif_free(cur_el->namespace_str);
625     enif_free(cur_el);
626     if (state->use_maps) {
627       enif_make_map_put(env, xmlel_term, enif_make_atom(env, "__struct__"),
628                         enif_make_atom(env, "Elixir.FastXML.El"), &xmlel_term);
629       send_event(state, xmlel_term);
630     } else {
631       send_event(state,
632                  enif_make_tuple2(state->send_env,
633                                   enif_make_atom(state->send_env, "xmlstreamelement"),
634                                   xmlel_term));
635     }
636   }
637 
638   return;
639 }
640 
erlXML_StartNamespaceDeclHandler(state_t * state,const XML_Char * prefix,const XML_Char * uri)641 void erlXML_StartNamespaceDeclHandler(state_t *state,
642                                       const XML_Char *prefix,
643                                       const XML_Char *uri)
644 {
645   /* From the expat documentation:
646      "For a default namespace declaration (xmlns='...'),
647      the prefix will be null ...
648      ... The URI will be null for the case where
649      the default namespace is being unset."
650 
651      FIXME: I'm not quite sure what all that means */
652   if (uri == NULL)
653       return;
654 
655   if (state->error)
656     return;
657 
658   attrs_list_t *c = enif_alloc(sizeof(attrs_list_t));
659   PARSER_MEM_ASSERT(c);
660 
661   if (prefix) {
662     size_t len = strlen(prefix);
663 
664     if (!enif_alloc_binary(len + 6, &c->name)) {
665       enif_free(c);
666       PARSER_MEM_ASSERT(0);
667     }
668     memcpy(c->name.data, "xmlns:", 6);
669     memcpy(c->name.data + 6, prefix, len);
670   } else {
671     if (!enif_alloc_binary(5, &c->name)) {
672       enif_free(c);
673       PARSER_MEM_ASSERT(0);
674     }
675     memcpy(c->name.data, "xmlns", 5);
676   };
677 
678   size_t len = strlen(uri);
679   if (!enif_alloc_binary(len, &c->value)) {
680     enif_release_binary(&c->name);
681     enif_free(c);
682     PARSER_MEM_ASSERT(0);
683   }
684 
685   memcpy(c->value.data, uri, len);
686 
687   c->next = state->xmlns_attrs;
688   state->xmlns_attrs = c;
689 
690   return;
691 }
692 
693 /*
694  * Prevent entity expansion attacks (CVE-2013-1664) by refusing
695  * to process any XML that contains a DTD.
696  */
erlXML_StartDoctypeDeclHandler(state_t * state,const XML_Char * doctypeName,const XML_Char * doctypeSysid,const XML_Char * doctypePubid,int hasInternalSubset)697 void erlXML_StartDoctypeDeclHandler(state_t *state,
698                                     const XML_Char *doctypeName,
699                                     const XML_Char *doctypeSysid,
700                                     const XML_Char *doctypePubid,
701                                     int hasInternalSubset)
702 {
703   XML_StopParser(state->parser, PARSING_NOT_RESUMABLE);
704   return;
705 }
706 
707 /*
708  * Prevent entity expansion attacks (CVE-2013-1664) by having an explicit
709  * default handler. According to the documentation,
710  *
711  * "Setting the handler with this call has the side effect of turning off
712  *  expansion of references to internally defined general entities. Instead
713  *  these references are passed to the default handler."
714  */
erlXML_DefaultHandler(state_t * state,const XML_Char * s,int len)715 void erlXML_DefaultHandler(state_t *state, const XML_Char *s, int len)
716 {
717   return;
718 }
719 
free_parser_allocated_structs(state_t * state)720 static void free_parser_allocated_structs(state_t *state) {
721   while (state->xmlns_attrs) {
722     attrs_list_t *c = state->xmlns_attrs;
723     state->xmlns_attrs = c->next;
724 
725     enif_release_binary(&c->name);
726     enif_release_binary(&c->value);
727     enif_free(c);
728   }
729   while (state->elements_stack) {
730     xmlel_stack_t *c = state->elements_stack;
731     while (c->children) {
732       children_list_t *cc = c->children;
733       if (cc->is_cdata)
734         enif_release_binary(&cc->cdata);
735       c->children = cc->next;
736       enif_free(cc);
737     }
738     if (!c->next || c->namespace_str != c->next->namespace_str)
739       enif_free(c->namespace_str);
740     state->elements_stack = c->next;
741     enif_free(c);
742   }
743   if (state->top_xmlns_attrs != &stream_stream_ns_attr)
744     while (state->top_xmlns_attrs) {
745       attrs_list_t *c = state->top_xmlns_attrs;
746       state->top_xmlns_attrs = c->next;
747       enif_release_binary(&c->name);
748       enif_release_binary(&c->value);
749       enif_free(c);
750     }
751 }
752 
destroy_parser_state(ErlNifEnv * env,void * data)753 static void destroy_parser_state(ErlNifEnv *env, void *data)
754 {
755   state_t *state = (state_t *) data;
756   if (state) {
757     if (state->parser) XML_ParserFree(state->parser);
758     if (state->pid) enif_free(state->pid);
759     if (state->send_env) enif_free_env(state->send_env);
760 
761     free_parser_allocated_structs(state);
762 
763     memset(state, 0, sizeof(state_t));
764   }
765 }
766 
setup_parser(state_t * state)767 static void setup_parser(state_t *state)
768 {
769   XML_SetUserData(state->parser, state);
770   XML_SetStartElementHandler(state->parser,
771 			     (XML_StartElementHandler) erlXML_StartElementHandler);
772   XML_SetEndElementHandler(state->parser,
773 			   (XML_EndElementHandler) erlXML_EndElementHandler);
774   XML_SetCharacterDataHandler(state->parser,
775 			      (XML_CharacterDataHandler) erlXML_CharacterDataHandler);
776   XML_SetStartNamespaceDeclHandler(state->parser,
777 				   (XML_StartNamespaceDeclHandler)
778 				   erlXML_StartNamespaceDeclHandler);
779   XML_SetStartDoctypeDeclHandler(state->parser,
780 				 (XML_StartDoctypeDeclHandler)
781 				 erlXML_StartDoctypeDeclHandler);
782   XML_SetReturnNSTriplet(state->parser, 1);
783   XML_SetDefaultHandler(state->parser, (XML_DefaultHandler) erlXML_DefaultHandler);
784 }
785 
init_parser_state(ErlNifPid * pid)786 static state_t *init_parser_state(ErlNifPid *pid)
787 {
788   state_t *state = enif_alloc_resource(parser_state_t, sizeof(state_t));
789   ASSERT(state);
790   memset(state, 0, sizeof(state_t));
791   if (pid) {
792     state->send_env = enif_alloc_env();
793     ASSERT(state->send_env);
794     state->pid = enif_alloc(sizeof(ErlNifPid));
795     ASSERT(state->pid);
796     memcpy(state->pid, pid, sizeof(ErlNifPid));
797   }
798   state->parser = XML_ParserCreate_MM("UTF-8", &ms, "\n");
799   setup_parser(state);
800   return state;
801 }
802 
load(ErlNifEnv * env,void ** priv,ERL_NIF_TERM load_info)803 static int load(ErlNifEnv* env, void** priv, ERL_NIF_TERM load_info)
804 {
805   ErlNifResourceFlags flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER;
806   parser_state_t = enif_open_resource_type(env, NULL, "parser_state_t",
807 					   destroy_parser_state,
808 					   flags, NULL);
809 
810   return 0;
811 }
812 
make_parse_error(ErlNifEnv * env,XML_Parser parser)813 static ERL_NIF_TERM make_parse_error(ErlNifEnv *env, XML_Parser parser)
814 {
815   enum XML_Error errcode = XML_GetErrorCode(parser);
816   const char *errstring;
817 
818   if (errcode == XML_ERROR_EXTERNAL_ENTITY_HANDLING)
819     errstring = "DTDs are not allowed";
820   else
821     errstring = XML_ErrorString(errcode);
822 
823   return enif_make_tuple2(env, enif_make_uint(env, errcode),
824 			  str2bin(env, errstring));
825 }
826 
reset_nif(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])827 static ERL_NIF_TERM reset_nif(ErlNifEnv* env, int argc,
828 			      const ERL_NIF_TERM argv[])
829 {
830   state_t *state = NULL;
831 
832   if (argc != 1)
833     return enif_make_badarg(env);
834 
835   if (!enif_get_resource(env, argv[0], parser_state_t, (void *) &state))
836     return enif_make_badarg(env);
837 
838   ASSERT(XML_ParserReset(state->parser, "UTF-8"));
839   setup_parser(state);
840 
841   free_parser_allocated_structs(state);
842 
843   enif_clear_env(state->send_env);
844 
845   state->size = 0;
846   state->depth = 0;
847   state->error = NULL;
848 
849   return argv[0];
850 }
851 
parse_element_nif(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])852 static ERL_NIF_TERM parse_element_nif(ErlNifEnv* env, int argc,
853 				      const ERL_NIF_TERM argv[])
854 {
855   ERL_NIF_TERM el;
856   ErlNifBinary bin;
857 
858   if (argc != 1)
859     return enif_make_badarg(env);
860 
861   if (!enif_inspect_binary(env, argv[0], &bin))
862     return enif_make_badarg(env);
863 
864   state_t *state = init_parser_state(NULL);
865   if (!state)
866     return enif_make_badarg(env);
867 
868   state->send_env = env;
869 
870   xmlel_stack_t *xmlel = enif_alloc(sizeof(xmlel_stack_t));
871   if (!xmlel) {
872     enif_release_resource(state);
873     return enif_make_badarg(env);
874   }
875 
876   memset(xmlel, 0, sizeof(xmlel_stack_t));
877 
878   xmlel->next = state->elements_stack;
879   xmlel->children = NULL;
880 
881   state->elements_stack = xmlel;
882 
883   int res = XML_Parse(state->parser, (char *)bin.data, bin.size, 1);
884   if (res == XML_STATUS_OK && state->elements_stack->children &&
885           !state->elements_stack->children->is_cdata)
886     el = state->elements_stack->children->term;
887   else if (state->error)
888     el = enif_make_tuple2(env, enif_make_atom(env, "error"),
889                           enif_make_atom(env, state->error));
890   else
891     el = enif_make_tuple2(env, enif_make_atom(env, "error"),
892 			  make_parse_error(env, state->parser));
893 
894   state->send_env = NULL;
895 
896   enif_release_resource(state);
897 
898   return el;
899 }
900 
send_error(state_t * state,ERL_NIF_TERM msg)901 static void send_error(state_t *state, ERL_NIF_TERM msg) {
902   ErlNifEnv *env = state->send_env;
903 
904   if (state->use_maps) {
905     ERL_NIF_TERM map = enif_make_new_map(env);
906     enif_make_map_put(env, map, enif_make_atom(env, "__struct__"),
907                       enif_make_atom(env, "Elixir.FastXML.StreamError"), &map);
908     enif_make_map_put(env, map, enif_make_atom(env, "desc"),
909                       msg, &map);
910 
911     send_event(state, map);
912   } else {
913     send_event(state,
914                enif_make_tuple2(env,
915                                 enif_make_atom(env, "xmlstreamerror"),
916                                 msg));
917   }
918 }
919 
parse_nif(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])920 static ERL_NIF_TERM parse_nif(ErlNifEnv* env, int argc,
921 			      const ERL_NIF_TERM argv[])
922 {
923   state_t *state = NULL;
924   ErlNifBinary bin;
925 
926   if (argc != 2)
927     return enif_make_badarg(env);
928 
929   if (!enif_get_resource(env, argv[0], parser_state_t, (void *) &state))
930     return enif_make_badarg(env);
931 
932   if (!enif_inspect_binary(env, argv[1], &bin))
933     return enif_make_badarg(env);
934 
935   if (!state->parser || !state->pid || !state->send_env)
936     return enif_make_badarg(env);
937 
938   state->size += bin.size;
939   state->env = env;
940 
941   if (state->size >= state->max_size) {
942     size_t size = state->size;
943     send_error(state, str2bin(state->send_env, "XML stanza is too big"));
944     /* Don't let send_event() to set size to zero */
945     state->size = size;
946   } else {
947     int res = XML_Parse(state->parser, (char *)bin.data, bin.size, 0);
948     if (!res)
949       send_error(state, state->error ?
950                         str2bin(state->send_env, state->error) :
951                         make_parse_error(state->send_env, state->parser));
952   }
953 
954   return argv[0];
955 }
956 
change_callback_pid_nif(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])957 static ERL_NIF_TERM change_callback_pid_nif(ErlNifEnv* env, int argc,
958 					    const ERL_NIF_TERM argv[])
959 {
960   state_t *state = NULL;
961   ErlNifPid pid;
962 
963   if (argc != 2)
964     return enif_make_badarg(env);
965 
966   if (!enif_get_resource(env, argv[0], parser_state_t, (void *) &state))
967     return enif_make_badarg(env);
968 
969   if (!state->parser || !state->pid || !state->send_env)
970     return enif_make_badarg(env);
971 
972   if (!enif_get_local_pid(env, argv[1], &pid))
973     return enif_make_badarg(env);
974 
975   memcpy(state->pid, &pid, sizeof(ErlNifPid));
976 
977   return enif_make_resource(env, state);
978 }
979 
close_nif(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])980 static ERL_NIF_TERM close_nif(ErlNifEnv* env, int argc,
981 			      const ERL_NIF_TERM argv[])
982 {
983   state_t *state = NULL;
984 
985   if (argc != 1)
986     return enif_make_badarg(env);
987 
988   if (!enif_get_resource(env, argv[0], parser_state_t, (void *) &state))
989     return enif_make_badarg(env);
990 
991   if (!state->parser || !state->pid)
992     return enif_make_badarg(env);
993 
994   destroy_parser_state(env, state);
995 
996   return enif_make_atom(env, "true");
997 }
998 
new_nif(ErlNifEnv * env,int argc,const ERL_NIF_TERM argv[])999 static ERL_NIF_TERM new_nif(ErlNifEnv* env, int argc,
1000 			    const ERL_NIF_TERM argv[])
1001 {
1002   int gen_srv = 1;
1003   int use_maps = 0;
1004 
1005   if (argc != 2 && argc != 3)
1006     return enif_make_badarg(env);
1007 
1008   if (argc == 3) {
1009     if (!enif_is_list(env, argv[2]))
1010       return enif_make_badarg(env);
1011     ERL_NIF_TERM head, tail = argv[2];
1012     while (enif_get_list_cell(env, tail, &head, &tail)) {
1013       char buf[16];
1014       if (enif_get_atom(env, head, buf, sizeof(buf), ERL_NIF_LATIN1)) {
1015         if (strcmp("no_gen_server", buf) == 0)
1016           gen_srv = 0;
1017         else if (strcmp("use_maps", buf) == 0)
1018           use_maps = 1;
1019       }
1020     }
1021   }
1022 
1023   ErlNifPid pid;
1024   if (!enif_get_local_pid(env, argv[0], &pid))
1025     return enif_make_badarg(env);
1026 
1027   state_t *state = init_parser_state(&pid);
1028   if (!state)
1029     return enif_make_badarg(env);
1030 
1031   state->normalize_ns = 1;
1032   state->use_maps = use_maps;
1033   state->gen_server = gen_srv;
1034 
1035   ERL_NIF_TERM result = enif_make_resource(env, state);
1036   enif_release_resource(state);
1037 
1038   ErlNifUInt64 max_size;
1039   if (enif_get_uint64(env, argv[1], &max_size))
1040     state->max_size = (size_t) max_size;
1041   else if (!enif_compare(argv[1], enif_make_atom(env, "infinity")))
1042     state->max_size = (size_t) - 1;
1043   else
1044     return enif_make_badarg(env);
1045 
1046   return result;
1047 }
1048 
1049 static ErlNifFunc nif_funcs[] =
1050   {
1051     {"new", 2, new_nif},
1052     {"new", 3, new_nif},
1053     {"parse", 2, parse_nif},
1054     {"parse_element", 1, parse_element_nif},
1055     {"reset", 1, reset_nif},
1056     {"close", 1, close_nif},
1057     {"change_callback_pid", 2, change_callback_pid_nif}
1058   };
1059 
1060 ERL_NIF_INIT(fxml_stream, nif_funcs, load, NULL, NULL, NULL)
1061