1 /*
2 * IXmlReader implementation
3 *
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21 #define COBJMACROS
22
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include "windef.h"
27 #include "winbase.h"
28 #include "initguid.h"
29 #include "objbase.h"
30 #include "xmllite.h"
31 #include "xmllite_private.h"
32 #ifdef __REACTOS__
33 #include <winnls.h>
34 #endif
35
36 #include "wine/debug.h"
37 #include "wine/list.h"
38
39 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
40
41 /* not defined in public headers */
42 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
43
44 typedef enum
45 {
46 XmlReadInState_Initial,
47 XmlReadInState_XmlDecl,
48 XmlReadInState_Misc_DTD,
49 XmlReadInState_DTD,
50 XmlReadInState_DTD_Misc,
51 XmlReadInState_Element,
52 XmlReadInState_Content,
53 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
54 XmlReadInState_Eof
55 } XmlReaderInternalState;
56
57 /* This state denotes where parsing was interrupted by input problem.
58 Reader resumes parsing using this information. */
59 typedef enum
60 {
61 XmlReadResumeState_Initial,
62 XmlReadResumeState_PITarget,
63 XmlReadResumeState_PIBody,
64 XmlReadResumeState_CDATA,
65 XmlReadResumeState_Comment,
66 XmlReadResumeState_STag,
67 XmlReadResumeState_CharData,
68 XmlReadResumeState_Whitespace
69 } XmlReaderResumeState;
70
71 /* saved pointer index to resume from particular input position */
72 typedef enum
73 {
74 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
75 XmlReadResume_Local, /* local for QName */
76 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
77 XmlReadResume_Last
78 } XmlReaderResume;
79
80 typedef enum
81 {
82 StringValue_LocalName,
83 StringValue_Prefix,
84 StringValue_QualifiedName,
85 StringValue_Value,
86 StringValue_Last
87 } XmlReaderStringValue;
88
89 static const WCHAR usasciiW[] = {'U','S','-','A','S','C','I','I',0};
90 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
91 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
92
93 static const WCHAR dblquoteW[] = {'\"',0};
94 static const WCHAR quoteW[] = {'\'',0};
95 static const WCHAR ltW[] = {'<',0};
96 static const WCHAR gtW[] = {'>',0};
97 static const WCHAR commentW[] = {'<','!','-','-',0};
98 static const WCHAR piW[] = {'<','?',0};
99
100 BOOL is_namestartchar(WCHAR ch);
101
debugstr_nodetype(XmlNodeType nodetype)102 static const char *debugstr_nodetype(XmlNodeType nodetype)
103 {
104 static const char * const type_names[] =
105 {
106 "None",
107 "Element",
108 "Attribute",
109 "Text",
110 "CDATA",
111 "",
112 "",
113 "ProcessingInstruction",
114 "Comment",
115 "",
116 "DocumentType",
117 "",
118 "",
119 "Whitespace",
120 "",
121 "EndElement",
122 "",
123 "XmlDeclaration"
124 };
125
126 if (nodetype > _XmlNodeType_Last)
127 return wine_dbg_sprintf("unknown type=%d", nodetype);
128
129 return type_names[nodetype];
130 }
131
debugstr_reader_prop(XmlReaderProperty prop)132 static const char *debugstr_reader_prop(XmlReaderProperty prop)
133 {
134 static const char * const prop_names[] =
135 {
136 "MultiLanguage",
137 "ConformanceLevel",
138 "RandomAccess",
139 "XmlResolver",
140 "DtdProcessing",
141 "ReadState",
142 "MaxElementDepth",
143 "MaxEntityExpansion"
144 };
145
146 if (prop > _XmlReaderProperty_Last)
147 return wine_dbg_sprintf("unknown property=%d", prop);
148
149 return prop_names[prop];
150 }
151
152 struct xml_encoding_data
153 {
154 const WCHAR *name;
155 xml_encoding enc;
156 UINT cp;
157 };
158
159 static const struct xml_encoding_data xml_encoding_map[] = {
160 { usasciiW, XmlEncoding_USASCII, 20127 },
161 { utf16W, XmlEncoding_UTF16, 1200 },
162 { utf8W, XmlEncoding_UTF8, CP_UTF8 },
163 };
164
get_encoding_name(xml_encoding encoding)165 const WCHAR *get_encoding_name(xml_encoding encoding)
166 {
167 return xml_encoding_map[encoding].name;
168 }
169
get_encoding_from_codepage(UINT codepage)170 xml_encoding get_encoding_from_codepage(UINT codepage)
171 {
172 int i;
173 for (i = 0; i < ARRAY_SIZE(xml_encoding_map); i++)
174 {
175 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
176 }
177 return XmlEncoding_Unknown;
178 }
179
180 typedef struct
181 {
182 char *data;
183 UINT cur;
184 unsigned int allocated;
185 unsigned int written;
186 BOOL prev_cr;
187 } encoded_buffer;
188
189 typedef struct input_buffer input_buffer;
190
191 typedef struct
192 {
193 IXmlReaderInput IXmlReaderInput_iface;
194 LONG ref;
195 /* reference passed on IXmlReaderInput creation, is kept when input is created */
196 IUnknown *input;
197 IMalloc *imalloc;
198 xml_encoding encoding;
199 BOOL hint;
200 WCHAR *baseuri;
201 /* stream reference set after SetInput() call from reader,
202 stored as sequential stream, cause currently
203 optimizations possible with IStream aren't implemented */
204 ISequentialStream *stream;
205 input_buffer *buffer;
206 unsigned int pending : 1;
207 } xmlreaderinput;
208
209 static const struct IUnknownVtbl xmlreaderinputvtbl;
210
211 /* Structure to hold parsed string of specific length.
212
213 Reader stores node value as 'start' pointer, on request
214 a null-terminated version of it is allocated.
215
216 To init a strval variable use reader_init_strval(),
217 to set strval as a reader value use reader_set_strval().
218 */
219 typedef struct
220 {
221 WCHAR *str; /* allocated null-terminated string */
222 UINT len; /* length in WCHARs, altered after ReadValueChunk */
223 UINT start; /* input position where value starts */
224 } strval;
225
226 static WCHAR emptyW[] = {0};
227 static WCHAR xmlW[] = {'x','m','l',0};
228 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
229 static const strval strval_empty = { emptyW };
230 static const strval strval_xml = { xmlW, 3 };
231 static const strval strval_xmlns = { xmlnsW, 5 };
232
233 struct reader_position
234 {
235 UINT line_number;
236 UINT line_position;
237 };
238
239 enum attribute_flags
240 {
241 ATTRIBUTE_NS_DEFINITION = 0x1,
242 ATTRIBUTE_DEFAULT_NS_DEFINITION = 0x2,
243 };
244
245 struct attribute
246 {
247 struct list entry;
248 strval prefix;
249 strval localname;
250 strval qname;
251 strval value;
252 struct reader_position position;
253 unsigned int flags;
254 };
255
256 struct element
257 {
258 struct list entry;
259 strval prefix;
260 strval localname;
261 strval qname;
262 struct reader_position position;
263 };
264
265 struct ns
266 {
267 struct list entry;
268 strval prefix;
269 strval uri;
270 struct element *element;
271 };
272
273 typedef struct
274 {
275 IXmlReader IXmlReader_iface;
276 LONG ref;
277 xmlreaderinput *input;
278 IMalloc *imalloc;
279 XmlReadState state;
280 HRESULT error; /* error set on XmlReadState_Error */
281 XmlReaderInternalState instate;
282 XmlReaderResumeState resumestate;
283 XmlNodeType nodetype;
284 DtdProcessing dtdmode;
285 IXmlResolver *resolver;
286 IUnknown *mlang;
287 struct reader_position position;
288 struct list attrs; /* attributes list for current node */
289 struct attribute *attr; /* current attribute */
290 UINT attr_count;
291 struct list nsdef;
292 struct list ns;
293 struct list elements;
294 int chunk_read_off;
295 strval strvalues[StringValue_Last];
296 UINT depth;
297 UINT max_depth;
298 BOOL is_empty_element;
299 struct element empty_element; /* used for empty elements without end tag <a />,
300 and to keep <?xml reader position */
301 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
302 } xmlreader;
303
304 struct input_buffer
305 {
306 encoded_buffer utf16;
307 encoded_buffer encoded;
308 UINT code_page;
309 xmlreaderinput *input;
310 };
311
impl_from_IXmlReader(IXmlReader * iface)312 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
313 {
314 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
315 }
316
impl_from_IXmlReaderInput(IXmlReaderInput * iface)317 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
318 {
319 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
320 }
321
322 /* reader memory allocation functions */
reader_alloc(xmlreader * reader,size_t len)323 static inline void *reader_alloc(xmlreader *reader, size_t len)
324 {
325 return m_alloc(reader->imalloc, len);
326 }
327
reader_alloc_zero(xmlreader * reader,size_t len)328 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
329 {
330 void *ret = reader_alloc(reader, len);
331 if (ret)
332 memset(ret, 0, len);
333 return ret;
334 }
335
reader_free(xmlreader * reader,void * mem)336 static inline void reader_free(xmlreader *reader, void *mem)
337 {
338 m_free(reader->imalloc, mem);
339 }
340
341 /* Just return pointer from offset, no attempt to read more. */
reader_get_ptr2(const xmlreader * reader,UINT offset)342 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
343 {
344 encoded_buffer *buffer = &reader->input->buffer->utf16;
345 return (WCHAR*)buffer->data + offset;
346 }
347
reader_get_strptr(const xmlreader * reader,const strval * v)348 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
349 {
350 return v->str ? v->str : reader_get_ptr2(reader, v->start);
351 }
352
reader_strvaldup(xmlreader * reader,const strval * src,strval * dest)353 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
354 {
355 *dest = *src;
356
357 if (src->str != strval_empty.str)
358 {
359 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
360 if (!dest->str) return E_OUTOFMEMORY;
361 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
362 dest->str[dest->len] = 0;
363 dest->start = 0;
364 }
365
366 return S_OK;
367 }
368
369 /* reader input memory allocation functions */
readerinput_alloc(xmlreaderinput * input,size_t len)370 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
371 {
372 return m_alloc(input->imalloc, len);
373 }
374
readerinput_realloc(xmlreaderinput * input,void * mem,size_t len)375 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
376 {
377 return m_realloc(input->imalloc, mem, len);
378 }
379
readerinput_free(xmlreaderinput * input,void * mem)380 static inline void readerinput_free(xmlreaderinput *input, void *mem)
381 {
382 m_free(input->imalloc, mem);
383 }
384
readerinput_strdupW(xmlreaderinput * input,const WCHAR * str)385 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
386 {
387 LPWSTR ret = NULL;
388
389 if(str) {
390 DWORD size;
391
392 size = (lstrlenW(str)+1)*sizeof(WCHAR);
393 ret = readerinput_alloc(input, size);
394 if (ret) memcpy(ret, str, size);
395 }
396
397 return ret;
398 }
399
400 /* This one frees stored string value if needed */
reader_free_strvalued(xmlreader * reader,strval * v)401 static void reader_free_strvalued(xmlreader *reader, strval *v)
402 {
403 if (v->str != strval_empty.str)
404 {
405 reader_free(reader, v->str);
406 *v = strval_empty;
407 }
408 }
409
reader_clear_attrs(xmlreader * reader)410 static void reader_clear_attrs(xmlreader *reader)
411 {
412 struct attribute *attr, *attr2;
413 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
414 {
415 reader_free_strvalued(reader, &attr->localname);
416 reader_free_strvalued(reader, &attr->value);
417 reader_free(reader, attr);
418 }
419 list_init(&reader->attrs);
420 reader->attr_count = 0;
421 reader->attr = NULL;
422 }
423
424 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
425 while we are on a node with attributes */
reader_add_attr(xmlreader * reader,strval * prefix,strval * localname,strval * qname,strval * value,const struct reader_position * position,unsigned int flags)426 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *qname,
427 strval *value, const struct reader_position *position, unsigned int flags)
428 {
429 struct attribute *attr;
430 HRESULT hr;
431
432 attr = reader_alloc(reader, sizeof(*attr));
433 if (!attr) return E_OUTOFMEMORY;
434
435 hr = reader_strvaldup(reader, localname, &attr->localname);
436 if (hr == S_OK)
437 {
438 hr = reader_strvaldup(reader, value, &attr->value);
439 if (hr != S_OK)
440 reader_free_strvalued(reader, &attr->value);
441 }
442 if (hr != S_OK)
443 {
444 reader_free(reader, attr);
445 return hr;
446 }
447
448 if (prefix)
449 attr->prefix = *prefix;
450 else
451 memset(&attr->prefix, 0, sizeof(attr->prefix));
452 attr->qname = qname ? *qname : *localname;
453 attr->position = *position;
454 attr->flags = flags;
455 list_add_tail(&reader->attrs, &attr->entry);
456 reader->attr_count++;
457
458 return S_OK;
459 }
460
461 /* Returns current element, doesn't check if reader is actually positioned on it. */
reader_get_element(xmlreader * reader)462 static struct element *reader_get_element(xmlreader *reader)
463 {
464 if (reader->is_empty_element)
465 return &reader->empty_element;
466
467 return LIST_ENTRY(list_head(&reader->elements), struct element, entry);
468 }
469
reader_init_strvalue(UINT start,UINT len,strval * v)470 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
471 {
472 v->start = start;
473 v->len = len;
474 v->str = NULL;
475 }
476
debug_strval(const xmlreader * reader,const strval * v)477 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
478 {
479 return debugstr_wn(reader_get_strptr(reader, v), v->len);
480 }
481
482 /* used to initialize from constant string */
reader_init_cstrvalue(WCHAR * str,UINT len,strval * v)483 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
484 {
485 v->start = 0;
486 v->len = len;
487 v->str = str;
488 }
489
reader_free_strvalue(xmlreader * reader,XmlReaderStringValue type)490 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
491 {
492 reader_free_strvalued(reader, &reader->strvalues[type]);
493 }
494
reader_free_strvalues(xmlreader * reader)495 static void reader_free_strvalues(xmlreader *reader)
496 {
497 int type;
498 for (type = 0; type < StringValue_Last; type++)
499 reader_free_strvalue(reader, type);
500 }
501
502 /* This helper should only be used to test if strings are the same,
503 it doesn't try to sort. */
strval_eq(const xmlreader * reader,const strval * str1,const strval * str2)504 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
505 {
506 if (str1->len != str2->len) return 0;
507 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
508 }
509
reader_clear_elements(xmlreader * reader)510 static void reader_clear_elements(xmlreader *reader)
511 {
512 struct element *elem, *elem2;
513 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
514 {
515 reader_free_strvalued(reader, &elem->prefix);
516 reader_free_strvalued(reader, &elem->localname);
517 reader_free_strvalued(reader, &elem->qname);
518 reader_free(reader, elem);
519 }
520 list_init(&reader->elements);
521 reader_free_strvalued(reader, &reader->empty_element.localname);
522 reader_free_strvalued(reader, &reader->empty_element.qname);
523 reader->is_empty_element = FALSE;
524 }
525
reader_lookup_ns(xmlreader * reader,const strval * prefix)526 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
527 {
528 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
529 struct ns *ns;
530
531 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
532 if (strval_eq(reader, prefix, &ns->prefix))
533 return ns;
534 }
535
536 return NULL;
537 }
538
reader_inc_depth(xmlreader * reader)539 static HRESULT reader_inc_depth(xmlreader *reader)
540 {
541 return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK;
542 }
543
reader_dec_depth(xmlreader * reader)544 static void reader_dec_depth(xmlreader *reader)
545 {
546 if (reader->depth)
547 reader->depth--;
548 }
549
reader_push_ns(xmlreader * reader,const strval * prefix,const strval * uri,BOOL def)550 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
551 {
552 struct ns *ns;
553 HRESULT hr;
554
555 ns = reader_alloc(reader, sizeof(*ns));
556 if (!ns) return E_OUTOFMEMORY;
557
558 if (def)
559 memset(&ns->prefix, 0, sizeof(ns->prefix));
560 else {
561 hr = reader_strvaldup(reader, prefix, &ns->prefix);
562 if (FAILED(hr)) {
563 reader_free(reader, ns);
564 return hr;
565 }
566 }
567
568 hr = reader_strvaldup(reader, uri, &ns->uri);
569 if (FAILED(hr)) {
570 reader_free_strvalued(reader, &ns->prefix);
571 reader_free(reader, ns);
572 return hr;
573 }
574
575 ns->element = NULL;
576 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
577 return hr;
578 }
579
reader_free_element(xmlreader * reader,struct element * element)580 static void reader_free_element(xmlreader *reader, struct element *element)
581 {
582 reader_free_strvalued(reader, &element->prefix);
583 reader_free_strvalued(reader, &element->localname);
584 reader_free_strvalued(reader, &element->qname);
585 reader_free(reader, element);
586 }
587
reader_mark_ns_nodes(xmlreader * reader,struct element * element)588 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
589 {
590 struct ns *ns;
591
592 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
593 if (ns->element)
594 break;
595 ns->element = element;
596 }
597
598 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
599 if (ns->element)
600 break;
601 ns->element = element;
602 }
603 }
604
reader_push_element(xmlreader * reader,strval * prefix,strval * localname,strval * qname,const struct reader_position * position)605 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
606 strval *qname, const struct reader_position *position)
607 {
608 struct element *element;
609 HRESULT hr;
610
611 element = reader_alloc_zero(reader, sizeof(*element));
612 if (!element)
613 return E_OUTOFMEMORY;
614
615 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) == S_OK &&
616 (hr = reader_strvaldup(reader, localname, &element->localname)) == S_OK &&
617 (hr = reader_strvaldup(reader, qname, &element->qname)) == S_OK)
618 {
619 list_add_head(&reader->elements, &element->entry);
620 reader_mark_ns_nodes(reader, element);
621 reader->is_empty_element = FALSE;
622 element->position = *position;
623 }
624 else
625 reader_free_element(reader, element);
626
627 return hr;
628 }
629
reader_pop_ns_nodes(xmlreader * reader,struct element * element)630 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
631 {
632 struct ns *ns, *ns2;
633
634 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
635 if (ns->element != element)
636 break;
637
638 list_remove(&ns->entry);
639 reader_free_strvalued(reader, &ns->prefix);
640 reader_free_strvalued(reader, &ns->uri);
641 reader_free(reader, ns);
642 }
643
644 if (!list_empty(&reader->nsdef)) {
645 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
646 if (ns->element == element) {
647 list_remove(&ns->entry);
648 reader_free_strvalued(reader, &ns->prefix);
649 reader_free_strvalued(reader, &ns->uri);
650 reader_free(reader, ns);
651 }
652 }
653 }
654
reader_pop_element(xmlreader * reader)655 static void reader_pop_element(xmlreader *reader)
656 {
657 struct element *element;
658
659 if (list_empty(&reader->elements))
660 return;
661
662 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
663 list_remove(&element->entry);
664
665 reader_pop_ns_nodes(reader, element);
666 reader_free_element(reader, element);
667
668 /* It was a root element, the rest is expected as Misc */
669 if (list_empty(&reader->elements))
670 reader->instate = XmlReadInState_MiscEnd;
671 }
672
673 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
674 means node value is to be determined. */
reader_set_strvalue(xmlreader * reader,XmlReaderStringValue type,const strval * value)675 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
676 {
677 strval *v = &reader->strvalues[type];
678
679 reader_free_strvalue(reader, type);
680 if (!value)
681 {
682 v->str = NULL;
683 v->start = 0;
684 v->len = 0;
685 return;
686 }
687
688 if (value->str == strval_empty.str)
689 *v = *value;
690 else
691 {
692 if (type == StringValue_Value)
693 {
694 /* defer allocation for value string */
695 v->str = NULL;
696 v->start = value->start;
697 v->len = value->len;
698 }
699 else
700 {
701 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
702 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
703 v->str[value->len] = 0;
704 v->len = value->len;
705 }
706 }
707 }
708
is_reader_pending(xmlreader * reader)709 static inline int is_reader_pending(xmlreader *reader)
710 {
711 return reader->input->pending;
712 }
713
init_encoded_buffer(xmlreaderinput * input,encoded_buffer * buffer)714 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
715 {
716 const int initial_len = 0x2000;
717 buffer->data = readerinput_alloc(input, initial_len);
718 if (!buffer->data) return E_OUTOFMEMORY;
719
720 memset(buffer->data, 0, 4);
721 buffer->cur = 0;
722 buffer->allocated = initial_len;
723 buffer->written = 0;
724 buffer->prev_cr = FALSE;
725
726 return S_OK;
727 }
728
free_encoded_buffer(xmlreaderinput * input,encoded_buffer * buffer)729 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
730 {
731 readerinput_free(input, buffer->data);
732 }
733
get_code_page(xml_encoding encoding,UINT * cp)734 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
735 {
736 if (encoding == XmlEncoding_Unknown)
737 {
738 FIXME("unsupported encoding %d\n", encoding);
739 return E_NOTIMPL;
740 }
741
742 *cp = xml_encoding_map[encoding].cp;
743
744 return S_OK;
745 }
746
parse_encoding_name(const WCHAR * name,int len)747 xml_encoding parse_encoding_name(const WCHAR *name, int len)
748 {
749 int min, max, n, c;
750
751 if (!name) return XmlEncoding_Unknown;
752
753 min = 0;
754 max = ARRAY_SIZE(xml_encoding_map) - 1;
755
756 while (min <= max)
757 {
758 n = (min+max)/2;
759
760 if (len != -1)
761 c = _wcsnicmp(xml_encoding_map[n].name, name, len);
762 else
763 c = wcsicmp(xml_encoding_map[n].name, name);
764 if (!c)
765 return xml_encoding_map[n].enc;
766
767 if (c > 0)
768 max = n-1;
769 else
770 min = n+1;
771 }
772
773 return XmlEncoding_Unknown;
774 }
775
alloc_input_buffer(xmlreaderinput * input)776 static HRESULT alloc_input_buffer(xmlreaderinput *input)
777 {
778 input_buffer *buffer;
779 HRESULT hr;
780
781 input->buffer = NULL;
782
783 buffer = readerinput_alloc(input, sizeof(*buffer));
784 if (!buffer) return E_OUTOFMEMORY;
785
786 buffer->input = input;
787 buffer->code_page = ~0; /* code page is unknown at this point */
788 hr = init_encoded_buffer(input, &buffer->utf16);
789 if (hr != S_OK) {
790 readerinput_free(input, buffer);
791 return hr;
792 }
793
794 hr = init_encoded_buffer(input, &buffer->encoded);
795 if (hr != S_OK) {
796 free_encoded_buffer(input, &buffer->utf16);
797 readerinput_free(input, buffer);
798 return hr;
799 }
800
801 input->buffer = buffer;
802 return S_OK;
803 }
804
free_input_buffer(input_buffer * buffer)805 static void free_input_buffer(input_buffer *buffer)
806 {
807 free_encoded_buffer(buffer->input, &buffer->encoded);
808 free_encoded_buffer(buffer->input, &buffer->utf16);
809 readerinput_free(buffer->input, buffer);
810 }
811
readerinput_release_stream(xmlreaderinput * readerinput)812 static void readerinput_release_stream(xmlreaderinput *readerinput)
813 {
814 if (readerinput->stream) {
815 ISequentialStream_Release(readerinput->stream);
816 readerinput->stream = NULL;
817 }
818 }
819
820 /* Queries already stored interface for IStream/ISequentialStream.
821 Interface supplied on creation will be overwritten */
readerinput_query_for_stream(xmlreaderinput * readerinput)822 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
823 {
824 HRESULT hr;
825
826 readerinput_release_stream(readerinput);
827 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
828 if (hr != S_OK)
829 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
830
831 return hr;
832 }
833
834 /* reads a chunk to raw buffer */
readerinput_growraw(xmlreaderinput * readerinput)835 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
836 {
837 encoded_buffer *buffer = &readerinput->buffer->encoded;
838 /* to make sure aligned length won't exceed allocated length */
839 ULONG len = buffer->allocated - buffer->written - 4;
840 ULONG read;
841 HRESULT hr;
842
843 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
844 variable width encodings like UTF-8 */
845 len = (len + 3) & ~3;
846 /* try to use allocated space or grow */
847 if (buffer->allocated - buffer->written < len)
848 {
849 buffer->allocated *= 2;
850 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
851 len = buffer->allocated - buffer->written;
852 }
853
854 read = 0;
855 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
856 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
857 readerinput->pending = hr == E_PENDING;
858 if (FAILED(hr)) return hr;
859 buffer->written += read;
860
861 return hr;
862 }
863
864 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
readerinput_grow(xmlreaderinput * readerinput,int length)865 static void readerinput_grow(xmlreaderinput *readerinput, int length)
866 {
867 encoded_buffer *buffer = &readerinput->buffer->utf16;
868
869 length *= sizeof(WCHAR);
870 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
871 if (buffer->allocated < buffer->written + length + 4)
872 {
873 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
874 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
875 buffer->allocated = grown_size;
876 }
877 }
878
readerinput_is_utf8(xmlreaderinput * readerinput)879 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
880 {
881 static const char startA[] = {'<','?'};
882 static const char commentA[] = {'<','!'};
883 encoded_buffer *buffer = &readerinput->buffer->encoded;
884 unsigned char *ptr = (unsigned char*)buffer->data;
885
886 return !memcmp(buffer->data, startA, sizeof(startA)) ||
887 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
888 /* test start byte */
889 (ptr[0] == '<' &&
890 (
891 (ptr[1] && (ptr[1] <= 0x7f)) ||
892 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
893 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
894 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
895 );
896 }
897
readerinput_detectencoding(xmlreaderinput * readerinput,xml_encoding * enc)898 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
899 {
900 encoded_buffer *buffer = &readerinput->buffer->encoded;
901 static const char utf8bom[] = {0xef,0xbb,0xbf};
902 static const char utf16lebom[] = {0xff,0xfe};
903 WCHAR *ptrW;
904
905 *enc = XmlEncoding_Unknown;
906
907 if (buffer->written <= 3)
908 {
909 HRESULT hr = readerinput_growraw(readerinput);
910 if (FAILED(hr)) return hr;
911 if (buffer->written < 3) return MX_E_INPUTEND;
912 }
913
914 ptrW = (WCHAR *)buffer->data;
915 /* try start symbols if we have enough data to do that, input buffer should contain
916 first chunk already */
917 if (readerinput_is_utf8(readerinput))
918 *enc = XmlEncoding_UTF8;
919 else if (*ptrW == '<')
920 {
921 ptrW++;
922 if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW))
923 *enc = XmlEncoding_UTF16;
924 }
925 /* try with BOM now */
926 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
927 {
928 buffer->cur += sizeof(utf8bom);
929 *enc = XmlEncoding_UTF8;
930 }
931 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
932 {
933 buffer->cur += sizeof(utf16lebom);
934 *enc = XmlEncoding_UTF16;
935 }
936
937 return S_OK;
938 }
939
readerinput_get_utf8_convlen(xmlreaderinput * readerinput)940 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
941 {
942 encoded_buffer *buffer = &readerinput->buffer->encoded;
943 int len = buffer->written;
944
945 /* complete single byte char */
946 if (!(buffer->data[len-1] & 0x80)) return len;
947
948 /* find start byte of multibyte char */
949 while (--len && !(buffer->data[len] & 0xc0))
950 ;
951
952 return len;
953 }
954
955 /* Returns byte length of complete char sequence for buffer code page,
956 it's relative to current buffer position which is currently used for BOM handling
957 only. */
readerinput_get_convlen(xmlreaderinput * readerinput)958 static int readerinput_get_convlen(xmlreaderinput *readerinput)
959 {
960 encoded_buffer *buffer = &readerinput->buffer->encoded;
961 int len;
962
963 if (readerinput->buffer->code_page == CP_UTF8)
964 len = readerinput_get_utf8_convlen(readerinput);
965 else
966 len = buffer->written;
967
968 TRACE("%d\n", len - buffer->cur);
969 return len - buffer->cur;
970 }
971
972 /* It's possible that raw buffer has some leftovers from last conversion - some char
973 sequence that doesn't represent a full code point. Length argument should be calculated with
974 readerinput_get_convlen(), if it's -1 it will be calculated here. */
readerinput_shrinkraw(xmlreaderinput * readerinput,int len)975 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
976 {
977 encoded_buffer *buffer = &readerinput->buffer->encoded;
978
979 if (len == -1)
980 len = readerinput_get_convlen(readerinput);
981
982 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
983 /* everything below cur is lost too */
984 buffer->written -= len + buffer->cur;
985 /* after this point we don't need cur offset really,
986 it's used only to mark where actual data begins when first chunk is read */
987 buffer->cur = 0;
988 }
989
fixup_buffer_cr(encoded_buffer * buffer,int off)990 static void fixup_buffer_cr(encoded_buffer *buffer, int off)
991 {
992 BOOL prev_cr = buffer->prev_cr;
993 const WCHAR *src;
994 WCHAR *dest;
995
996 src = dest = (WCHAR*)buffer->data + off;
997 while ((const char*)src < buffer->data + buffer->written)
998 {
999 if (*src == '\r')
1000 {
1001 *dest++ = '\n';
1002 src++;
1003 prev_cr = TRUE;
1004 continue;
1005 }
1006 if(prev_cr && *src == '\n')
1007 src++;
1008 else
1009 *dest++ = *src++;
1010 prev_cr = FALSE;
1011 }
1012
1013 buffer->written = (char*)dest - buffer->data;
1014 buffer->prev_cr = prev_cr;
1015 *dest = 0;
1016 }
1017
1018 /* note that raw buffer content is kept */
readerinput_switchencoding(xmlreaderinput * readerinput,xml_encoding enc)1019 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
1020 {
1021 encoded_buffer *src = &readerinput->buffer->encoded;
1022 encoded_buffer *dest = &readerinput->buffer->utf16;
1023 int len, dest_len;
1024 UINT cp = ~0u;
1025 HRESULT hr;
1026 WCHAR *ptr;
1027
1028 hr = get_code_page(enc, &cp);
1029 if (FAILED(hr)) return;
1030
1031 readerinput->buffer->code_page = cp;
1032 len = readerinput_get_convlen(readerinput);
1033
1034 TRACE("switching to cp %d\n", cp);
1035
1036 /* just copy in this case */
1037 if (enc == XmlEncoding_UTF16)
1038 {
1039 readerinput_grow(readerinput, len);
1040 memcpy(dest->data, src->data + src->cur, len);
1041 dest->written += len*sizeof(WCHAR);
1042 }
1043 else
1044 {
1045 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1046 readerinput_grow(readerinput, dest_len);
1047 ptr = (WCHAR*)dest->data;
1048 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1049 ptr[dest_len] = 0;
1050 dest->written += dest_len*sizeof(WCHAR);
1051 }
1052
1053 fixup_buffer_cr(dest, 0);
1054 }
1055
1056 /* shrinks parsed data a buffer begins with */
reader_shrink(xmlreader * reader)1057 static void reader_shrink(xmlreader *reader)
1058 {
1059 encoded_buffer *buffer = &reader->input->buffer->utf16;
1060
1061 /* avoid to move too often using threshold shrink length */
1062 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
1063 {
1064 buffer->written -= buffer->cur*sizeof(WCHAR);
1065 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
1066 buffer->cur = 0;
1067 *(WCHAR*)&buffer->data[buffer->written] = 0;
1068 }
1069 }
1070
1071 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1072 It won't attempt to shrink but will grow destination buffer if needed */
reader_more(xmlreader * reader)1073 static HRESULT reader_more(xmlreader *reader)
1074 {
1075 xmlreaderinput *readerinput = reader->input;
1076 encoded_buffer *src = &readerinput->buffer->encoded;
1077 encoded_buffer *dest = &readerinput->buffer->utf16;
1078 UINT cp = readerinput->buffer->code_page;
1079 int len, dest_len, prev_len;
1080 HRESULT hr;
1081 WCHAR *ptr;
1082
1083 /* get some raw data from stream first */
1084 hr = readerinput_growraw(readerinput);
1085 len = readerinput_get_convlen(readerinput);
1086 prev_len = dest->written / sizeof(WCHAR);
1087
1088 /* just copy for UTF-16 case */
1089 if (cp == 1200)
1090 {
1091 readerinput_grow(readerinput, len);
1092 memcpy(dest->data + dest->written, src->data + src->cur, len);
1093 dest->written += len*sizeof(WCHAR);
1094 }
1095 else
1096 {
1097 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1098 readerinput_grow(readerinput, dest_len);
1099 ptr = (WCHAR*)(dest->data + dest->written);
1100 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1101 ptr[dest_len] = 0;
1102 dest->written += dest_len*sizeof(WCHAR);
1103 /* get rid of processed data */
1104 readerinput_shrinkraw(readerinput, len);
1105 }
1106
1107 fixup_buffer_cr(dest, prev_len);
1108 return hr;
1109 }
1110
reader_get_cur(xmlreader * reader)1111 static inline UINT reader_get_cur(xmlreader *reader)
1112 {
1113 return reader->input->buffer->utf16.cur;
1114 }
1115
reader_get_ptr(xmlreader * reader)1116 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1117 {
1118 encoded_buffer *buffer = &reader->input->buffer->utf16;
1119 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1120 if (!*ptr) reader_more(reader);
1121 return (WCHAR*)buffer->data + buffer->cur;
1122 }
1123
reader_cmp(xmlreader * reader,const WCHAR * str)1124 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1125 {
1126 int i=0;
1127 const WCHAR *ptr = reader_get_ptr(reader);
1128 while (str[i])
1129 {
1130 if (!ptr[i])
1131 {
1132 reader_more(reader);
1133 ptr = reader_get_ptr(reader);
1134 }
1135 if (str[i] != ptr[i])
1136 return ptr[i] - str[i];
1137 i++;
1138 }
1139 return 0;
1140 }
1141
reader_update_position(xmlreader * reader,WCHAR ch)1142 static void reader_update_position(xmlreader *reader, WCHAR ch)
1143 {
1144 if (ch == '\r')
1145 reader->position.line_position = 1;
1146 else if (ch == '\n')
1147 {
1148 reader->position.line_number++;
1149 reader->position.line_position = 1;
1150 }
1151 else
1152 reader->position.line_position++;
1153 }
1154
1155 /* moves cursor n WCHARs forward */
reader_skipn(xmlreader * reader,int n)1156 static void reader_skipn(xmlreader *reader, int n)
1157 {
1158 encoded_buffer *buffer = &reader->input->buffer->utf16;
1159 const WCHAR *ptr;
1160
1161 while (*(ptr = reader_get_ptr(reader)) && n--)
1162 {
1163 reader_update_position(reader, *ptr);
1164 buffer->cur++;
1165 }
1166 }
1167
is_wchar_space(WCHAR ch)1168 static inline BOOL is_wchar_space(WCHAR ch)
1169 {
1170 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1171 }
1172
1173 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
reader_skipspaces(xmlreader * reader)1174 static int reader_skipspaces(xmlreader *reader)
1175 {
1176 const WCHAR *ptr = reader_get_ptr(reader);
1177 UINT start = reader_get_cur(reader);
1178
1179 while (is_wchar_space(*ptr))
1180 {
1181 reader_skipn(reader, 1);
1182 ptr = reader_get_ptr(reader);
1183 }
1184
1185 return reader_get_cur(reader) - start;
1186 }
1187
1188 /* [26] VersionNum ::= '1.' [0-9]+ */
reader_parse_versionnum(xmlreader * reader,strval * val)1189 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1190 {
1191 static const WCHAR onedotW[] = {'1','.',0};
1192 WCHAR *ptr, *ptr2;
1193 UINT start;
1194
1195 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1196
1197 start = reader_get_cur(reader);
1198 /* skip "1." */
1199 reader_skipn(reader, 2);
1200
1201 ptr2 = ptr = reader_get_ptr(reader);
1202 while (*ptr >= '0' && *ptr <= '9')
1203 {
1204 reader_skipn(reader, 1);
1205 ptr = reader_get_ptr(reader);
1206 }
1207
1208 if (ptr2 == ptr) return WC_E_DIGIT;
1209 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1210 TRACE("version=%s\n", debug_strval(reader, val));
1211 return S_OK;
1212 }
1213
1214 /* [25] Eq ::= S? '=' S? */
reader_parse_eq(xmlreader * reader)1215 static HRESULT reader_parse_eq(xmlreader *reader)
1216 {
1217 static const WCHAR eqW[] = {'=',0};
1218 reader_skipspaces(reader);
1219 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1220 /* skip '=' */
1221 reader_skipn(reader, 1);
1222 reader_skipspaces(reader);
1223 return S_OK;
1224 }
1225
1226 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
reader_parse_versioninfo(xmlreader * reader)1227 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1228 {
1229 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1230 struct reader_position position;
1231 strval val, name;
1232 HRESULT hr;
1233
1234 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1235
1236 position = reader->position;
1237 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1238 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1239 /* skip 'version' */
1240 reader_skipn(reader, 7);
1241
1242 hr = reader_parse_eq(reader);
1243 if (FAILED(hr)) return hr;
1244
1245 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1246 return WC_E_QUOTE;
1247 /* skip "'"|'"' */
1248 reader_skipn(reader, 1);
1249
1250 hr = reader_parse_versionnum(reader, &val);
1251 if (FAILED(hr)) return hr;
1252
1253 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1254 return WC_E_QUOTE;
1255
1256 /* skip "'"|'"' */
1257 reader_skipn(reader, 1);
1258
1259 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1260 }
1261
1262 /* ([A-Za-z0-9._] | '-') */
is_wchar_encname(WCHAR ch)1263 static inline BOOL is_wchar_encname(WCHAR ch)
1264 {
1265 return ((ch >= 'A' && ch <= 'Z') ||
1266 (ch >= 'a' && ch <= 'z') ||
1267 (ch >= '0' && ch <= '9') ||
1268 (ch == '.') || (ch == '_') ||
1269 (ch == '-'));
1270 }
1271
1272 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
reader_parse_encname(xmlreader * reader,strval * val)1273 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1274 {
1275 WCHAR *start = reader_get_ptr(reader), *ptr;
1276 xml_encoding enc;
1277 int len;
1278
1279 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1280 return WC_E_ENCNAME;
1281
1282 val->start = reader_get_cur(reader);
1283
1284 ptr = start;
1285 while (is_wchar_encname(*++ptr))
1286 ;
1287
1288 len = ptr - start;
1289 enc = parse_encoding_name(start, len);
1290 TRACE("encoding name %s\n", debugstr_wn(start, len));
1291 val->str = start;
1292 val->len = len;
1293
1294 if (enc == XmlEncoding_Unknown)
1295 return WC_E_ENCNAME;
1296
1297 /* skip encoding name */
1298 reader_skipn(reader, len);
1299 return S_OK;
1300 }
1301
1302 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
reader_parse_encdecl(xmlreader * reader)1303 static HRESULT reader_parse_encdecl(xmlreader *reader)
1304 {
1305 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1306 struct reader_position position;
1307 strval name, val;
1308 HRESULT hr;
1309
1310 if (!reader_skipspaces(reader)) return S_FALSE;
1311
1312 position = reader->position;
1313 if (reader_cmp(reader, encodingW)) return S_FALSE;
1314 name.str = reader_get_ptr(reader);
1315 name.start = reader_get_cur(reader);
1316 name.len = 8;
1317 /* skip 'encoding' */
1318 reader_skipn(reader, 8);
1319
1320 hr = reader_parse_eq(reader);
1321 if (FAILED(hr)) return hr;
1322
1323 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1324 return WC_E_QUOTE;
1325 /* skip "'"|'"' */
1326 reader_skipn(reader, 1);
1327
1328 hr = reader_parse_encname(reader, &val);
1329 if (FAILED(hr)) return hr;
1330
1331 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1332 return WC_E_QUOTE;
1333
1334 /* skip "'"|'"' */
1335 reader_skipn(reader, 1);
1336
1337 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1338 }
1339
1340 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
reader_parse_sddecl(xmlreader * reader)1341 static HRESULT reader_parse_sddecl(xmlreader *reader)
1342 {
1343 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1344 static const WCHAR yesW[] = {'y','e','s',0};
1345 static const WCHAR noW[] = {'n','o',0};
1346 struct reader_position position;
1347 strval name, val;
1348 UINT start;
1349 HRESULT hr;
1350
1351 if (!reader_skipspaces(reader)) return S_FALSE;
1352
1353 position = reader->position;
1354 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1355 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1356 /* skip 'standalone' */
1357 reader_skipn(reader, 10);
1358
1359 hr = reader_parse_eq(reader);
1360 if (FAILED(hr)) return hr;
1361
1362 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1363 return WC_E_QUOTE;
1364 /* skip "'"|'"' */
1365 reader_skipn(reader, 1);
1366
1367 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1368 return WC_E_XMLDECL;
1369
1370 start = reader_get_cur(reader);
1371 /* skip 'yes'|'no' */
1372 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1373 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1374 TRACE("standalone=%s\n", debug_strval(reader, &val));
1375
1376 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1377 return WC_E_QUOTE;
1378 /* skip "'"|'"' */
1379 reader_skipn(reader, 1);
1380
1381 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1382 }
1383
1384 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
reader_parse_xmldecl(xmlreader * reader)1385 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1386 {
1387 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1388 static const WCHAR declcloseW[] = {'?','>',0};
1389 struct reader_position position;
1390 HRESULT hr;
1391
1392 /* check if we have "<?xml " */
1393 if (reader_cmp(reader, xmldeclW))
1394 return S_FALSE;
1395
1396 reader_skipn(reader, 2);
1397 position = reader->position;
1398 reader_skipn(reader, 3);
1399 hr = reader_parse_versioninfo(reader);
1400 if (FAILED(hr))
1401 return hr;
1402
1403 hr = reader_parse_encdecl(reader);
1404 if (FAILED(hr))
1405 return hr;
1406
1407 hr = reader_parse_sddecl(reader);
1408 if (FAILED(hr))
1409 return hr;
1410
1411 reader_skipspaces(reader);
1412 if (reader_cmp(reader, declcloseW))
1413 return WC_E_XMLDECL;
1414
1415 /* skip '?>' */
1416 reader_skipn(reader, 2);
1417
1418 reader->nodetype = XmlNodeType_XmlDeclaration;
1419 reader->empty_element.position = position;
1420 reader_set_strvalue(reader, StringValue_LocalName, &strval_xml);
1421 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_xml);
1422
1423 return S_OK;
1424 }
1425
1426 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
reader_parse_comment(xmlreader * reader)1427 static HRESULT reader_parse_comment(xmlreader *reader)
1428 {
1429 WCHAR *ptr;
1430 UINT start;
1431
1432 if (reader->resumestate == XmlReadResumeState_Comment)
1433 {
1434 start = reader->resume[XmlReadResume_Body];
1435 ptr = reader_get_ptr(reader);
1436 }
1437 else
1438 {
1439 /* skip '<!--' */
1440 reader_skipn(reader, 4);
1441 reader_shrink(reader);
1442 ptr = reader_get_ptr(reader);
1443 start = reader_get_cur(reader);
1444 reader->nodetype = XmlNodeType_Comment;
1445 reader->resume[XmlReadResume_Body] = start;
1446 reader->resumestate = XmlReadResumeState_Comment;
1447 reader_set_strvalue(reader, StringValue_Value, NULL);
1448 }
1449
1450 /* will exit when there's no more data, it won't attempt to
1451 read more from stream */
1452 while (*ptr)
1453 {
1454 if (ptr[0] == '-')
1455 {
1456 if (ptr[1] == '-')
1457 {
1458 if (ptr[2] == '>')
1459 {
1460 strval value;
1461
1462 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1463 TRACE("%s\n", debug_strval(reader, &value));
1464
1465 /* skip rest of markup '->' */
1466 reader_skipn(reader, 3);
1467
1468 reader_set_strvalue(reader, StringValue_Value, &value);
1469 reader->resume[XmlReadResume_Body] = 0;
1470 reader->resumestate = XmlReadResumeState_Initial;
1471 return S_OK;
1472 }
1473 else
1474 return WC_E_COMMENT;
1475 }
1476 }
1477
1478 reader_skipn(reader, 1);
1479 ptr++;
1480 }
1481
1482 return S_OK;
1483 }
1484
1485 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
is_char(WCHAR ch)1486 static inline BOOL is_char(WCHAR ch)
1487 {
1488 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1489 (ch >= 0x20 && ch <= 0xd7ff) ||
1490 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1491 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1492 (ch >= 0xe000 && ch <= 0xfffd);
1493 }
1494
1495 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
is_pubchar(WCHAR ch)1496 BOOL is_pubchar(WCHAR ch)
1497 {
1498 return (ch == ' ') ||
1499 (ch >= 'a' && ch <= 'z') ||
1500 (ch >= 'A' && ch <= 'Z') ||
1501 (ch >= '0' && ch <= '9') ||
1502 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1503 (ch == '=') || (ch == '?') ||
1504 (ch == '@') || (ch == '!') ||
1505 (ch >= '#' && ch <= '%') || /* #$% */
1506 (ch == '_') || (ch == '\r') || (ch == '\n');
1507 }
1508
is_namestartchar(WCHAR ch)1509 BOOL is_namestartchar(WCHAR ch)
1510 {
1511 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1512 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1513 (ch >= 0xc0 && ch <= 0xd6) ||
1514 (ch >= 0xd8 && ch <= 0xf6) ||
1515 (ch >= 0xf8 && ch <= 0x2ff) ||
1516 (ch >= 0x370 && ch <= 0x37d) ||
1517 (ch >= 0x37f && ch <= 0x1fff) ||
1518 (ch >= 0x200c && ch <= 0x200d) ||
1519 (ch >= 0x2070 && ch <= 0x218f) ||
1520 (ch >= 0x2c00 && ch <= 0x2fef) ||
1521 (ch >= 0x3001 && ch <= 0xd7ff) ||
1522 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1523 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1524 (ch >= 0xf900 && ch <= 0xfdcf) ||
1525 (ch >= 0xfdf0 && ch <= 0xfffd);
1526 }
1527
1528 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
is_ncnamechar(WCHAR ch)1529 BOOL is_ncnamechar(WCHAR ch)
1530 {
1531 return (ch >= 'A' && ch <= 'Z') ||
1532 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1533 (ch == '-') || (ch == '.') ||
1534 (ch >= '0' && ch <= '9') ||
1535 (ch == 0xb7) ||
1536 (ch >= 0xc0 && ch <= 0xd6) ||
1537 (ch >= 0xd8 && ch <= 0xf6) ||
1538 (ch >= 0xf8 && ch <= 0x2ff) ||
1539 (ch >= 0x300 && ch <= 0x36f) ||
1540 (ch >= 0x370 && ch <= 0x37d) ||
1541 (ch >= 0x37f && ch <= 0x1fff) ||
1542 (ch >= 0x200c && ch <= 0x200d) ||
1543 (ch >= 0x203f && ch <= 0x2040) ||
1544 (ch >= 0x2070 && ch <= 0x218f) ||
1545 (ch >= 0x2c00 && ch <= 0x2fef) ||
1546 (ch >= 0x3001 && ch <= 0xd7ff) ||
1547 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1548 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1549 (ch >= 0xf900 && ch <= 0xfdcf) ||
1550 (ch >= 0xfdf0 && ch <= 0xfffd);
1551 }
1552
is_namechar(WCHAR ch)1553 BOOL is_namechar(WCHAR ch)
1554 {
1555 return (ch == ':') || is_ncnamechar(ch);
1556 }
1557
reader_get_nodetype(const xmlreader * reader)1558 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1559 {
1560 /* When we're on attribute always return attribute type, container node type is kept.
1561 Note that container is not necessarily an element, and attribute doesn't mean it's
1562 an attribute in XML spec terms. */
1563 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1564 }
1565
1566 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1567 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1568 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1569 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1570 [5] Name ::= NameStartChar (NameChar)* */
reader_parse_name(xmlreader * reader,strval * name)1571 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1572 {
1573 WCHAR *ptr;
1574 UINT start;
1575
1576 if (reader->resume[XmlReadResume_Name])
1577 {
1578 start = reader->resume[XmlReadResume_Name];
1579 ptr = reader_get_ptr(reader);
1580 }
1581 else
1582 {
1583 ptr = reader_get_ptr(reader);
1584 start = reader_get_cur(reader);
1585 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1586 }
1587
1588 while (is_namechar(*ptr))
1589 {
1590 reader_skipn(reader, 1);
1591 ptr = reader_get_ptr(reader);
1592 }
1593
1594 if (is_reader_pending(reader))
1595 {
1596 reader->resume[XmlReadResume_Name] = start;
1597 return E_PENDING;
1598 }
1599 else
1600 reader->resume[XmlReadResume_Name] = 0;
1601
1602 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1603 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1604
1605 return S_OK;
1606 }
1607
1608 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
reader_parse_pitarget(xmlreader * reader,strval * target)1609 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1610 {
1611 static const WCHAR xmlW[] = {'x','m','l'};
1612 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1613 strval name;
1614 WCHAR *ptr;
1615 HRESULT hr;
1616 UINT i;
1617
1618 hr = reader_parse_name(reader, &name);
1619 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1620
1621 /* now that we got name check for illegal content */
1622 if (strval_eq(reader, &name, &xmlval))
1623 return WC_E_LEADINGXML;
1624
1625 /* PITarget can't be a qualified name */
1626 ptr = reader_get_strptr(reader, &name);
1627 for (i = 0; i < name.len; i++)
1628 if (ptr[i] == ':')
1629 return i ? NC_E_NAMECOLON : WC_E_PI;
1630
1631 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1632 *target = name;
1633 return S_OK;
1634 }
1635
1636 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
reader_parse_pi(xmlreader * reader)1637 static HRESULT reader_parse_pi(xmlreader *reader)
1638 {
1639 strval target;
1640 WCHAR *ptr;
1641 UINT start;
1642 HRESULT hr;
1643
1644 switch (reader->resumestate)
1645 {
1646 case XmlReadResumeState_Initial:
1647 /* skip '<?' */
1648 reader_skipn(reader, 2);
1649 reader_shrink(reader);
1650 reader->resumestate = XmlReadResumeState_PITarget;
1651 case XmlReadResumeState_PITarget:
1652 hr = reader_parse_pitarget(reader, &target);
1653 if (FAILED(hr)) return hr;
1654 reader_set_strvalue(reader, StringValue_LocalName, &target);
1655 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1656 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1657 reader->resumestate = XmlReadResumeState_PIBody;
1658 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1659 default:
1660 ;
1661 }
1662
1663 start = reader->resume[XmlReadResume_Body];
1664 ptr = reader_get_ptr(reader);
1665 while (*ptr)
1666 {
1667 if (ptr[0] == '?')
1668 {
1669 if (ptr[1] == '>')
1670 {
1671 UINT cur = reader_get_cur(reader);
1672 strval value;
1673
1674 /* strip all leading whitespace chars */
1675 while (start < cur)
1676 {
1677 ptr = reader_get_ptr2(reader, start);
1678 if (!is_wchar_space(*ptr)) break;
1679 start++;
1680 }
1681
1682 reader_init_strvalue(start, cur-start, &value);
1683
1684 /* skip '?>' */
1685 reader_skipn(reader, 2);
1686 TRACE("%s\n", debug_strval(reader, &value));
1687 reader->nodetype = XmlNodeType_ProcessingInstruction;
1688 reader->resumestate = XmlReadResumeState_Initial;
1689 reader->resume[XmlReadResume_Body] = 0;
1690 reader_set_strvalue(reader, StringValue_Value, &value);
1691 return S_OK;
1692 }
1693 }
1694
1695 reader_skipn(reader, 1);
1696 ptr = reader_get_ptr(reader);
1697 }
1698
1699 return S_OK;
1700 }
1701
1702 /* This one is used to parse significant whitespace nodes, like in Misc production */
reader_parse_whitespace(xmlreader * reader)1703 static HRESULT reader_parse_whitespace(xmlreader *reader)
1704 {
1705 switch (reader->resumestate)
1706 {
1707 case XmlReadResumeState_Initial:
1708 reader_shrink(reader);
1709 reader->resumestate = XmlReadResumeState_Whitespace;
1710 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1711 reader->nodetype = XmlNodeType_Whitespace;
1712 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1713 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1714 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1715 /* fallthrough */
1716 case XmlReadResumeState_Whitespace:
1717 {
1718 strval value;
1719 UINT start;
1720
1721 reader_skipspaces(reader);
1722 if (is_reader_pending(reader)) return S_OK;
1723
1724 start = reader->resume[XmlReadResume_Body];
1725 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1726 reader_set_strvalue(reader, StringValue_Value, &value);
1727 TRACE("%s\n", debug_strval(reader, &value));
1728 reader->resumestate = XmlReadResumeState_Initial;
1729 }
1730 default:
1731 ;
1732 }
1733
1734 return S_OK;
1735 }
1736
1737 /* [27] Misc ::= Comment | PI | S */
reader_parse_misc(xmlreader * reader)1738 static HRESULT reader_parse_misc(xmlreader *reader)
1739 {
1740 HRESULT hr = S_FALSE;
1741
1742 if (reader->resumestate != XmlReadResumeState_Initial)
1743 {
1744 hr = reader_more(reader);
1745 if (FAILED(hr)) return hr;
1746
1747 /* finish current node */
1748 switch (reader->resumestate)
1749 {
1750 case XmlReadResumeState_PITarget:
1751 case XmlReadResumeState_PIBody:
1752 return reader_parse_pi(reader);
1753 case XmlReadResumeState_Comment:
1754 return reader_parse_comment(reader);
1755 case XmlReadResumeState_Whitespace:
1756 return reader_parse_whitespace(reader);
1757 default:
1758 ERR("unknown resume state %d\n", reader->resumestate);
1759 }
1760 }
1761
1762 while (1)
1763 {
1764 const WCHAR *cur = reader_get_ptr(reader);
1765
1766 if (is_wchar_space(*cur))
1767 hr = reader_parse_whitespace(reader);
1768 else if (!reader_cmp(reader, commentW))
1769 hr = reader_parse_comment(reader);
1770 else if (!reader_cmp(reader, piW))
1771 hr = reader_parse_pi(reader);
1772 else
1773 break;
1774
1775 if (hr != S_FALSE) return hr;
1776 }
1777
1778 return hr;
1779 }
1780
1781 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
reader_parse_sys_literal(xmlreader * reader,strval * literal)1782 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1783 {
1784 WCHAR *cur = reader_get_ptr(reader), quote;
1785 UINT start;
1786
1787 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1788
1789 quote = *cur;
1790 reader_skipn(reader, 1);
1791
1792 cur = reader_get_ptr(reader);
1793 start = reader_get_cur(reader);
1794 while (is_char(*cur) && *cur != quote)
1795 {
1796 reader_skipn(reader, 1);
1797 cur = reader_get_ptr(reader);
1798 }
1799 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1800 if (*cur == quote) reader_skipn(reader, 1);
1801
1802 TRACE("%s\n", debug_strval(reader, literal));
1803 return S_OK;
1804 }
1805
1806 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1807 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
reader_parse_pub_literal(xmlreader * reader,strval * literal)1808 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1809 {
1810 WCHAR *cur = reader_get_ptr(reader), quote;
1811 UINT start;
1812
1813 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1814
1815 quote = *cur;
1816 reader_skipn(reader, 1);
1817
1818 start = reader_get_cur(reader);
1819 cur = reader_get_ptr(reader);
1820 while (is_pubchar(*cur) && *cur != quote)
1821 {
1822 reader_skipn(reader, 1);
1823 cur = reader_get_ptr(reader);
1824 }
1825 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1826 if (*cur == quote) reader_skipn(reader, 1);
1827
1828 TRACE("%s\n", debug_strval(reader, literal));
1829 return S_OK;
1830 }
1831
1832 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
reader_parse_externalid(xmlreader * reader)1833 static HRESULT reader_parse_externalid(xmlreader *reader)
1834 {
1835 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1836 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1837 struct reader_position position = reader->position;
1838 strval name, sys;
1839 HRESULT hr;
1840 int cnt;
1841
1842 if (!reader_cmp(reader, publicW)) {
1843 strval pub;
1844
1845 /* public id */
1846 reader_skipn(reader, 6);
1847 cnt = reader_skipspaces(reader);
1848 if (!cnt) return WC_E_WHITESPACE;
1849
1850 hr = reader_parse_pub_literal(reader, &pub);
1851 if (FAILED(hr)) return hr;
1852
1853 reader_init_cstrvalue(publicW, lstrlenW(publicW), &name);
1854 hr = reader_add_attr(reader, NULL, &name, NULL, &pub, &position, 0);
1855 if (FAILED(hr)) return hr;
1856
1857 cnt = reader_skipspaces(reader);
1858 if (!cnt) return S_OK;
1859
1860 /* optional system id */
1861 hr = reader_parse_sys_literal(reader, &sys);
1862 if (FAILED(hr)) return S_OK;
1863
1864 reader_init_cstrvalue(systemW, lstrlenW(systemW), &name);
1865 hr = reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1866 if (FAILED(hr)) return hr;
1867
1868 return S_OK;
1869 } else if (!reader_cmp(reader, systemW)) {
1870 /* system id */
1871 reader_skipn(reader, 6);
1872 cnt = reader_skipspaces(reader);
1873 if (!cnt) return WC_E_WHITESPACE;
1874
1875 hr = reader_parse_sys_literal(reader, &sys);
1876 if (FAILED(hr)) return hr;
1877
1878 reader_init_cstrvalue(systemW, lstrlenW(systemW), &name);
1879 return reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1880 }
1881
1882 return S_FALSE;
1883 }
1884
1885 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
reader_parse_dtd(xmlreader * reader)1886 static HRESULT reader_parse_dtd(xmlreader *reader)
1887 {
1888 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1889 strval name;
1890 WCHAR *cur;
1891 HRESULT hr;
1892
1893 /* check if we have "<!DOCTYPE" */
1894 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1895 reader_shrink(reader);
1896
1897 /* DTD processing is not allowed by default */
1898 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1899
1900 reader_skipn(reader, 9);
1901 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1902
1903 /* name */
1904 hr = reader_parse_name(reader, &name);
1905 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1906
1907 reader_skipspaces(reader);
1908
1909 hr = reader_parse_externalid(reader);
1910 if (FAILED(hr)) return hr;
1911
1912 reader_skipspaces(reader);
1913
1914 cur = reader_get_ptr(reader);
1915 if (*cur != '>')
1916 {
1917 FIXME("internal subset parsing not implemented\n");
1918 return E_NOTIMPL;
1919 }
1920
1921 /* skip '>' */
1922 reader_skipn(reader, 1);
1923
1924 reader->nodetype = XmlNodeType_DocumentType;
1925 reader_set_strvalue(reader, StringValue_LocalName, &name);
1926 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1927
1928 return S_OK;
1929 }
1930
1931 /* [11 NS] LocalPart ::= NCName */
reader_parse_local(xmlreader * reader,strval * local,BOOL check_for_separator)1932 static HRESULT reader_parse_local(xmlreader *reader, strval *local, BOOL check_for_separator)
1933 {
1934 WCHAR *ptr;
1935 UINT start;
1936
1937 if (reader->resume[XmlReadResume_Local])
1938 {
1939 start = reader->resume[XmlReadResume_Local];
1940 ptr = reader_get_ptr(reader);
1941 }
1942 else
1943 {
1944 ptr = reader_get_ptr(reader);
1945 start = reader_get_cur(reader);
1946 }
1947
1948 while (is_ncnamechar(*ptr))
1949 {
1950 reader_skipn(reader, 1);
1951 ptr = reader_get_ptr(reader);
1952 }
1953
1954 if (check_for_separator && *ptr == ':')
1955 return NC_E_QNAMECOLON;
1956
1957 if (is_reader_pending(reader))
1958 {
1959 reader->resume[XmlReadResume_Local] = start;
1960 return E_PENDING;
1961 }
1962 else
1963 reader->resume[XmlReadResume_Local] = 0;
1964
1965 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1966
1967 return S_OK;
1968 }
1969
1970 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1971 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1972 [9 NS] UnprefixedName ::= LocalPart
1973 [10 NS] Prefix ::= NCName */
reader_parse_qname(xmlreader * reader,strval * prefix,strval * local,strval * qname)1974 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1975 {
1976 WCHAR *ptr;
1977 UINT start;
1978 HRESULT hr;
1979
1980 if (reader->resume[XmlReadResume_Name])
1981 {
1982 start = reader->resume[XmlReadResume_Name];
1983 ptr = reader_get_ptr(reader);
1984 }
1985 else
1986 {
1987 ptr = reader_get_ptr(reader);
1988 start = reader_get_cur(reader);
1989 reader->resume[XmlReadResume_Name] = start;
1990 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1991 }
1992
1993 if (reader->resume[XmlReadResume_Local])
1994 {
1995 hr = reader_parse_local(reader, local, FALSE);
1996 if (FAILED(hr)) return hr;
1997
1998 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1999 local->start - reader->resume[XmlReadResume_Name] - 1,
2000 prefix);
2001 }
2002 else
2003 {
2004 /* skip prefix part */
2005 while (is_ncnamechar(*ptr))
2006 {
2007 reader_skipn(reader, 1);
2008 ptr = reader_get_ptr(reader);
2009 }
2010
2011 if (is_reader_pending(reader)) return E_PENDING;
2012
2013 /* got a qualified name */
2014 if (*ptr == ':')
2015 {
2016 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
2017
2018 /* skip ':' */
2019 reader_skipn(reader, 1);
2020 hr = reader_parse_local(reader, local, TRUE);
2021 if (FAILED(hr)) return hr;
2022 }
2023 else
2024 {
2025 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
2026 reader_init_strvalue(0, 0, prefix);
2027 }
2028 }
2029
2030 if (prefix->len)
2031 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
2032 else
2033 TRACE("ncname %s\n", debug_strval(reader, local));
2034
2035 reader_init_strvalue(prefix->len ? prefix->start : local->start,
2036 /* count ':' too */
2037 (prefix->len ? prefix->len + 1 : 0) + local->len,
2038 qname);
2039
2040 reader->resume[XmlReadResume_Name] = 0;
2041 reader->resume[XmlReadResume_Local] = 0;
2042
2043 return S_OK;
2044 }
2045
get_predefined_entity(const xmlreader * reader,const strval * name)2046 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
2047 {
2048 static const WCHAR entltW[] = {'l','t'};
2049 static const WCHAR entgtW[] = {'g','t'};
2050 static const WCHAR entampW[] = {'a','m','p'};
2051 static const WCHAR entaposW[] = {'a','p','o','s'};
2052 static const WCHAR entquotW[] = {'q','u','o','t'};
2053 static const strval lt = { (WCHAR*)entltW, 2 };
2054 static const strval gt = { (WCHAR*)entgtW, 2 };
2055 static const strval amp = { (WCHAR*)entampW, 3 };
2056 static const strval apos = { (WCHAR*)entaposW, 4 };
2057 static const strval quot = { (WCHAR*)entquotW, 4 };
2058 WCHAR *str = reader_get_strptr(reader, name);
2059
2060 switch (*str)
2061 {
2062 case 'l':
2063 if (strval_eq(reader, name, <)) return '<';
2064 break;
2065 case 'g':
2066 if (strval_eq(reader, name, >)) return '>';
2067 break;
2068 case 'a':
2069 if (strval_eq(reader, name, &))
2070 return '&';
2071 else if (strval_eq(reader, name, &apos))
2072 return '\'';
2073 break;
2074 case 'q':
2075 if (strval_eq(reader, name, ")) return '\"';
2076 break;
2077 default:
2078 ;
2079 }
2080
2081 return 0;
2082 }
2083
2084 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2085 [67] Reference ::= EntityRef | CharRef
2086 [68] EntityRef ::= '&' Name ';' */
reader_parse_reference(xmlreader * reader)2087 static HRESULT reader_parse_reference(xmlreader *reader)
2088 {
2089 encoded_buffer *buffer = &reader->input->buffer->utf16;
2090 WCHAR *start = reader_get_ptr(reader), *ptr;
2091 UINT cur = reader_get_cur(reader);
2092 WCHAR ch = 0;
2093 int len;
2094
2095 /* skip '&' */
2096 reader_skipn(reader, 1);
2097 ptr = reader_get_ptr(reader);
2098
2099 if (*ptr == '#')
2100 {
2101 reader_skipn(reader, 1);
2102 ptr = reader_get_ptr(reader);
2103
2104 /* hex char or decimal */
2105 if (*ptr == 'x')
2106 {
2107 reader_skipn(reader, 1);
2108 ptr = reader_get_ptr(reader);
2109
2110 while (*ptr != ';')
2111 {
2112 if ((*ptr >= '0' && *ptr <= '9'))
2113 ch = ch*16 + *ptr - '0';
2114 else if ((*ptr >= 'a' && *ptr <= 'f'))
2115 ch = ch*16 + *ptr - 'a' + 10;
2116 else if ((*ptr >= 'A' && *ptr <= 'F'))
2117 ch = ch*16 + *ptr - 'A' + 10;
2118 else
2119 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2120 reader_skipn(reader, 1);
2121 ptr = reader_get_ptr(reader);
2122 }
2123 }
2124 else
2125 {
2126 while (*ptr != ';')
2127 {
2128 if ((*ptr >= '0' && *ptr <= '9'))
2129 {
2130 ch = ch*10 + *ptr - '0';
2131 reader_skipn(reader, 1);
2132 ptr = reader_get_ptr(reader);
2133 }
2134 else
2135 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2136 }
2137 }
2138
2139 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2140
2141 /* normalize */
2142 if (is_wchar_space(ch)) ch = ' ';
2143
2144 ptr = reader_get_ptr(reader);
2145 start = reader_get_ptr2(reader, cur);
2146 len = buffer->written - ((char *)ptr - buffer->data);
2147 memmove(start + 1, ptr + 1, len);
2148
2149 buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR);
2150 buffer->cur = cur + 1;
2151
2152 *start = ch;
2153 }
2154 else
2155 {
2156 strval name;
2157 HRESULT hr;
2158
2159 hr = reader_parse_name(reader, &name);
2160 if (FAILED(hr)) return hr;
2161
2162 ptr = reader_get_ptr(reader);
2163 if (*ptr != ';') return WC_E_SEMICOLON;
2164
2165 /* predefined entities resolve to a single character */
2166 ch = get_predefined_entity(reader, &name);
2167 if (ch)
2168 {
2169 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2170 memmove(start+1, ptr+1, len);
2171 buffer->cur = cur + 1;
2172 buffer->written -= (ptr - start) * sizeof(WCHAR);
2173
2174 *start = ch;
2175 }
2176 else
2177 {
2178 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2179 return WC_E_UNDECLAREDENTITY;
2180 }
2181
2182 }
2183
2184 return S_OK;
2185 }
2186
2187 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
reader_parse_attvalue(xmlreader * reader,strval * value)2188 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2189 {
2190 WCHAR *ptr, quote;
2191 UINT start;
2192
2193 ptr = reader_get_ptr(reader);
2194
2195 /* skip opening quote */
2196 quote = *ptr;
2197 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2198 reader_skipn(reader, 1);
2199
2200 ptr = reader_get_ptr(reader);
2201 start = reader_get_cur(reader);
2202 while (*ptr)
2203 {
2204 if (*ptr == '<') return WC_E_LESSTHAN;
2205
2206 if (*ptr == quote)
2207 {
2208 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2209 /* skip closing quote */
2210 reader_skipn(reader, 1);
2211 return S_OK;
2212 }
2213
2214 if (*ptr == '&')
2215 {
2216 HRESULT hr = reader_parse_reference(reader);
2217 if (FAILED(hr)) return hr;
2218 }
2219 else
2220 {
2221 /* replace all whitespace chars with ' ' */
2222 if (is_wchar_space(*ptr)) *ptr = ' ';
2223 reader_skipn(reader, 1);
2224 }
2225 ptr = reader_get_ptr(reader);
2226 }
2227
2228 return WC_E_QUOTE;
2229 }
2230
2231 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2232 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2233 [3 NS] DefaultAttName ::= 'xmlns'
2234 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
reader_parse_attribute(xmlreader * reader)2235 static HRESULT reader_parse_attribute(xmlreader *reader)
2236 {
2237 struct reader_position position = reader->position;
2238 strval prefix, local, qname, value;
2239 enum attribute_flags flags = 0;
2240 HRESULT hr;
2241
2242 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2243 if (FAILED(hr)) return hr;
2244
2245 if (strval_eq(reader, &prefix, &strval_xmlns))
2246 flags |= ATTRIBUTE_NS_DEFINITION;
2247
2248 if (strval_eq(reader, &qname, &strval_xmlns))
2249 flags |= ATTRIBUTE_DEFAULT_NS_DEFINITION;
2250
2251 hr = reader_parse_eq(reader);
2252 if (FAILED(hr)) return hr;
2253
2254 hr = reader_parse_attvalue(reader, &value);
2255 if (FAILED(hr)) return hr;
2256
2257 if (flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
2258 reader_push_ns(reader, &local, &value, !!(flags & ATTRIBUTE_DEFAULT_NS_DEFINITION));
2259
2260 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2261 return reader_add_attr(reader, &prefix, &local, &qname, &value, &position, flags);
2262 }
2263
2264 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2265 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
reader_parse_stag(xmlreader * reader,strval * prefix,strval * local,strval * qname)2266 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname)
2267 {
2268 struct reader_position position = reader->position;
2269 HRESULT hr;
2270
2271 hr = reader_parse_qname(reader, prefix, local, qname);
2272 if (FAILED(hr)) return hr;
2273
2274 for (;;)
2275 {
2276 static const WCHAR endW[] = {'/','>',0};
2277
2278 reader_skipspaces(reader);
2279
2280 /* empty element */
2281 if ((reader->is_empty_element = !reader_cmp(reader, endW)))
2282 {
2283 struct element *element = &reader->empty_element;
2284
2285 /* skip '/>' */
2286 reader_skipn(reader, 2);
2287
2288 reader_free_strvalued(reader, &element->qname);
2289 reader_free_strvalued(reader, &element->localname);
2290
2291 element->prefix = *prefix;
2292 reader_strvaldup(reader, qname, &element->qname);
2293 reader_strvaldup(reader, local, &element->localname);
2294 element->position = position;
2295 reader_mark_ns_nodes(reader, element);
2296 return S_OK;
2297 }
2298
2299 /* got a start tag */
2300 if (!reader_cmp(reader, gtW))
2301 {
2302 /* skip '>' */
2303 reader_skipn(reader, 1);
2304 return reader_push_element(reader, prefix, local, qname, &position);
2305 }
2306
2307 hr = reader_parse_attribute(reader);
2308 if (FAILED(hr)) return hr;
2309 }
2310
2311 return S_OK;
2312 }
2313
2314 /* [39] element ::= EmptyElemTag | STag content ETag */
reader_parse_element(xmlreader * reader)2315 static HRESULT reader_parse_element(xmlreader *reader)
2316 {
2317 HRESULT hr;
2318
2319 switch (reader->resumestate)
2320 {
2321 case XmlReadResumeState_Initial:
2322 /* check if we are really on element */
2323 if (reader_cmp(reader, ltW)) return S_FALSE;
2324
2325 /* skip '<' */
2326 reader_skipn(reader, 1);
2327
2328 reader_shrink(reader);
2329 reader->resumestate = XmlReadResumeState_STag;
2330 case XmlReadResumeState_STag:
2331 {
2332 strval qname, prefix, local;
2333
2334 /* this handles empty elements too */
2335 hr = reader_parse_stag(reader, &prefix, &local, &qname);
2336 if (FAILED(hr)) return hr;
2337
2338 /* FIXME: need to check for defined namespace to reject invalid prefix */
2339
2340 /* if we got empty element and stack is empty go straight to Misc */
2341 if (reader->is_empty_element && list_empty(&reader->elements))
2342 reader->instate = XmlReadInState_MiscEnd;
2343 else
2344 reader->instate = XmlReadInState_Content;
2345
2346 reader->nodetype = XmlNodeType_Element;
2347 reader->resumestate = XmlReadResumeState_Initial;
2348 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2349 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2350 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
2351 break;
2352 }
2353 default:
2354 hr = E_FAIL;
2355 }
2356
2357 return hr;
2358 }
2359
2360 /* [13 NS] ETag ::= '</' QName S? '>' */
reader_parse_endtag(xmlreader * reader)2361 static HRESULT reader_parse_endtag(xmlreader *reader)
2362 {
2363 struct reader_position position;
2364 strval prefix, local, qname;
2365 struct element *element;
2366 HRESULT hr;
2367
2368 /* skip '</' */
2369 reader_skipn(reader, 2);
2370
2371 position = reader->position;
2372 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2373 if (FAILED(hr)) return hr;
2374
2375 reader_skipspaces(reader);
2376
2377 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2378
2379 /* skip '>' */
2380 reader_skipn(reader, 1);
2381
2382 /* Element stack should never be empty at this point, cause we shouldn't get to
2383 content parsing if it's empty. */
2384 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2385 if (!strval_eq(reader, &element->qname, &qname)) return WC_E_ELEMENTMATCH;
2386
2387 /* update position stored for start tag, we won't be using it */
2388 element->position = position;
2389
2390 reader->nodetype = XmlNodeType_EndElement;
2391 reader->is_empty_element = FALSE;
2392 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2393
2394 return S_OK;
2395 }
2396
2397 /* [18] CDSect ::= CDStart CData CDEnd
2398 [19] CDStart ::= '<![CDATA['
2399 [20] CData ::= (Char* - (Char* ']]>' Char*))
2400 [21] CDEnd ::= ']]>' */
reader_parse_cdata(xmlreader * reader)2401 static HRESULT reader_parse_cdata(xmlreader *reader)
2402 {
2403 WCHAR *ptr;
2404 UINT start;
2405
2406 if (reader->resumestate == XmlReadResumeState_CDATA)
2407 {
2408 start = reader->resume[XmlReadResume_Body];
2409 ptr = reader_get_ptr(reader);
2410 }
2411 else
2412 {
2413 /* skip markup '<![CDATA[' */
2414 reader_skipn(reader, 9);
2415 reader_shrink(reader);
2416 ptr = reader_get_ptr(reader);
2417 start = reader_get_cur(reader);
2418 reader->nodetype = XmlNodeType_CDATA;
2419 reader->resume[XmlReadResume_Body] = start;
2420 reader->resumestate = XmlReadResumeState_CDATA;
2421 reader_set_strvalue(reader, StringValue_Value, NULL);
2422 }
2423
2424 while (*ptr)
2425 {
2426 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2427 {
2428 strval value;
2429
2430 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2431
2432 /* skip ']]>' */
2433 reader_skipn(reader, 3);
2434 TRACE("%s\n", debug_strval(reader, &value));
2435
2436 reader_set_strvalue(reader, StringValue_Value, &value);
2437 reader->resume[XmlReadResume_Body] = 0;
2438 reader->resumestate = XmlReadResumeState_Initial;
2439 return S_OK;
2440 }
2441 else
2442 {
2443 reader_skipn(reader, 1);
2444 ptr++;
2445 }
2446 }
2447
2448 return S_OK;
2449 }
2450
2451 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
reader_parse_chardata(xmlreader * reader)2452 static HRESULT reader_parse_chardata(xmlreader *reader)
2453 {
2454 struct reader_position position;
2455 WCHAR *ptr;
2456 UINT start;
2457
2458 if (reader->resumestate == XmlReadResumeState_CharData)
2459 {
2460 start = reader->resume[XmlReadResume_Body];
2461 ptr = reader_get_ptr(reader);
2462 }
2463 else
2464 {
2465 reader_shrink(reader);
2466 ptr = reader_get_ptr(reader);
2467 start = reader_get_cur(reader);
2468 /* There's no text */
2469 if (!*ptr || *ptr == '<') return S_OK;
2470 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2471 reader->resume[XmlReadResume_Body] = start;
2472 reader->resumestate = XmlReadResumeState_CharData;
2473 reader_set_strvalue(reader, StringValue_Value, NULL);
2474 }
2475
2476 position = reader->position;
2477 while (*ptr)
2478 {
2479 static const WCHAR ampW[] = {'&',0};
2480
2481 /* CDATA closing sequence ']]>' is not allowed */
2482 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2483 return WC_E_CDSECTEND;
2484
2485 /* Found next markup part */
2486 if (ptr[0] == '<')
2487 {
2488 strval value;
2489
2490 reader->empty_element.position = position;
2491 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2492 reader_set_strvalue(reader, StringValue_Value, &value);
2493 reader->resume[XmlReadResume_Body] = 0;
2494 reader->resumestate = XmlReadResumeState_Initial;
2495 return S_OK;
2496 }
2497
2498 /* this covers a case when text has leading whitespace chars */
2499 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2500
2501 if (!reader_cmp(reader, ampW))
2502 reader_parse_reference(reader);
2503 else
2504 reader_skipn(reader, 1);
2505
2506 ptr = reader_get_ptr(reader);
2507 }
2508
2509 return S_OK;
2510 }
2511
2512 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
reader_parse_content(xmlreader * reader)2513 static HRESULT reader_parse_content(xmlreader *reader)
2514 {
2515 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2516 static const WCHAR etagW[] = {'<','/',0};
2517
2518 if (reader->resumestate != XmlReadResumeState_Initial)
2519 {
2520 switch (reader->resumestate)
2521 {
2522 case XmlReadResumeState_CDATA:
2523 return reader_parse_cdata(reader);
2524 case XmlReadResumeState_Comment:
2525 return reader_parse_comment(reader);
2526 case XmlReadResumeState_PIBody:
2527 case XmlReadResumeState_PITarget:
2528 return reader_parse_pi(reader);
2529 case XmlReadResumeState_CharData:
2530 return reader_parse_chardata(reader);
2531 default:
2532 ERR("unknown resume state %d\n", reader->resumestate);
2533 }
2534 }
2535
2536 reader_shrink(reader);
2537
2538 /* handle end tag here, it indicates end of content as well */
2539 if (!reader_cmp(reader, etagW))
2540 return reader_parse_endtag(reader);
2541
2542 if (!reader_cmp(reader, commentW))
2543 return reader_parse_comment(reader);
2544
2545 if (!reader_cmp(reader, piW))
2546 return reader_parse_pi(reader);
2547
2548 if (!reader_cmp(reader, cdstartW))
2549 return reader_parse_cdata(reader);
2550
2551 if (!reader_cmp(reader, ltW))
2552 return reader_parse_element(reader);
2553
2554 /* what's left must be CharData */
2555 return reader_parse_chardata(reader);
2556 }
2557
reader_parse_nextnode(xmlreader * reader)2558 static HRESULT reader_parse_nextnode(xmlreader *reader)
2559 {
2560 XmlNodeType nodetype = reader_get_nodetype(reader);
2561 HRESULT hr;
2562
2563 if (!is_reader_pending(reader))
2564 {
2565 reader->chunk_read_off = 0;
2566 reader_clear_attrs(reader);
2567 }
2568
2569 /* When moving from EndElement or empty element, pop its own namespace definitions */
2570 switch (nodetype)
2571 {
2572 case XmlNodeType_Attribute:
2573 reader_dec_depth(reader);
2574 /* fallthrough */
2575 case XmlNodeType_Element:
2576 if (reader->is_empty_element)
2577 reader_pop_ns_nodes(reader, &reader->empty_element);
2578 else if (FAILED(hr = reader_inc_depth(reader)))
2579 return hr;
2580 break;
2581 case XmlNodeType_EndElement:
2582 reader_pop_element(reader);
2583 reader_dec_depth(reader);
2584 break;
2585 default:
2586 ;
2587 }
2588
2589 for (;;)
2590 {
2591 switch (reader->instate)
2592 {
2593 /* if it's a first call for a new input we need to detect stream encoding */
2594 case XmlReadInState_Initial:
2595 {
2596 xml_encoding enc;
2597
2598 hr = readerinput_growraw(reader->input);
2599 if (FAILED(hr)) return hr;
2600
2601 reader->position.line_number = 1;
2602 reader->position.line_position = 1;
2603
2604 /* try to detect encoding by BOM or data and set input code page */
2605 hr = readerinput_detectencoding(reader->input, &enc);
2606 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" :
2607 debugstr_w(xml_encoding_map[enc].name), hr);
2608 if (FAILED(hr)) return hr;
2609
2610 /* always switch first time cause we have to put something in */
2611 readerinput_switchencoding(reader->input, enc);
2612
2613 /* parse xml declaration */
2614 hr = reader_parse_xmldecl(reader);
2615 if (FAILED(hr)) return hr;
2616
2617 readerinput_shrinkraw(reader->input, -1);
2618 reader->instate = XmlReadInState_Misc_DTD;
2619 if (hr == S_OK) return hr;
2620 }
2621 break;
2622 case XmlReadInState_Misc_DTD:
2623 hr = reader_parse_misc(reader);
2624 if (FAILED(hr)) return hr;
2625
2626 if (hr == S_FALSE)
2627 reader->instate = XmlReadInState_DTD;
2628 else
2629 return hr;
2630 break;
2631 case XmlReadInState_DTD:
2632 hr = reader_parse_dtd(reader);
2633 if (FAILED(hr)) return hr;
2634
2635 if (hr == S_OK)
2636 {
2637 reader->instate = XmlReadInState_DTD_Misc;
2638 return hr;
2639 }
2640 else
2641 reader->instate = XmlReadInState_Element;
2642 break;
2643 case XmlReadInState_DTD_Misc:
2644 hr = reader_parse_misc(reader);
2645 if (FAILED(hr)) return hr;
2646
2647 if (hr == S_FALSE)
2648 reader->instate = XmlReadInState_Element;
2649 else
2650 return hr;
2651 break;
2652 case XmlReadInState_Element:
2653 return reader_parse_element(reader);
2654 case XmlReadInState_Content:
2655 return reader_parse_content(reader);
2656 case XmlReadInState_MiscEnd:
2657 hr = reader_parse_misc(reader);
2658 if (hr != S_FALSE) return hr;
2659
2660 if (*reader_get_ptr(reader))
2661 {
2662 WARN("found garbage in the end of XML\n");
2663 return WC_E_SYNTAX;
2664 }
2665
2666 reader->instate = XmlReadInState_Eof;
2667 reader->state = XmlReadState_EndOfFile;
2668 reader->nodetype = XmlNodeType_None;
2669 return hr;
2670 case XmlReadInState_Eof:
2671 return S_FALSE;
2672 default:
2673 FIXME("internal state %d not handled\n", reader->instate);
2674 return E_NOTIMPL;
2675 }
2676 }
2677
2678 return E_NOTIMPL;
2679 }
2680
xmlreader_QueryInterface(IXmlReader * iface,REFIID riid,void ** ppvObject)2681 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2682 {
2683 xmlreader *This = impl_from_IXmlReader(iface);
2684
2685 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2686
2687 if (IsEqualGUID(riid, &IID_IUnknown) ||
2688 IsEqualGUID(riid, &IID_IXmlReader))
2689 {
2690 *ppvObject = iface;
2691 }
2692 else
2693 {
2694 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2695 *ppvObject = NULL;
2696 return E_NOINTERFACE;
2697 }
2698
2699 IXmlReader_AddRef(iface);
2700
2701 return S_OK;
2702 }
2703
xmlreader_AddRef(IXmlReader * iface)2704 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2705 {
2706 xmlreader *This = impl_from_IXmlReader(iface);
2707 ULONG ref = InterlockedIncrement(&This->ref);
2708 TRACE("(%p)->(%d)\n", This, ref);
2709 return ref;
2710 }
2711
reader_clear_ns(xmlreader * reader)2712 static void reader_clear_ns(xmlreader *reader)
2713 {
2714 struct ns *ns, *ns2;
2715
2716 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2717 list_remove(&ns->entry);
2718 reader_free_strvalued(reader, &ns->prefix);
2719 reader_free_strvalued(reader, &ns->uri);
2720 reader_free(reader, ns);
2721 }
2722
2723 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2724 list_remove(&ns->entry);
2725 reader_free_strvalued(reader, &ns->uri);
2726 reader_free(reader, ns);
2727 }
2728 }
2729
reader_reset_parser(xmlreader * reader)2730 static void reader_reset_parser(xmlreader *reader)
2731 {
2732 reader->position.line_number = 0;
2733 reader->position.line_position = 0;
2734
2735 reader_clear_elements(reader);
2736 reader_clear_attrs(reader);
2737 reader_clear_ns(reader);
2738 reader_free_strvalues(reader);
2739
2740 reader->depth = 0;
2741 reader->nodetype = XmlNodeType_None;
2742 reader->resumestate = XmlReadResumeState_Initial;
2743 memset(reader->resume, 0, sizeof(reader->resume));
2744 reader->is_empty_element = FALSE;
2745 }
2746
xmlreader_Release(IXmlReader * iface)2747 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2748 {
2749 xmlreader *This = impl_from_IXmlReader(iface);
2750 LONG ref = InterlockedDecrement(&This->ref);
2751
2752 TRACE("(%p)->(%d)\n", This, ref);
2753
2754 if (ref == 0)
2755 {
2756 IMalloc *imalloc = This->imalloc;
2757 reader_reset_parser(This);
2758 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2759 if (This->resolver) IXmlResolver_Release(This->resolver);
2760 if (This->mlang) IUnknown_Release(This->mlang);
2761 reader_free(This, This);
2762 if (imalloc) IMalloc_Release(imalloc);
2763 }
2764
2765 return ref;
2766 }
2767
xmlreader_SetInput(IXmlReader * iface,IUnknown * input)2768 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2769 {
2770 xmlreader *This = impl_from_IXmlReader(iface);
2771 IXmlReaderInput *readerinput;
2772 HRESULT hr;
2773
2774 TRACE("(%p)->(%p)\n", This, input);
2775
2776 if (This->input)
2777 {
2778 readerinput_release_stream(This->input);
2779 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2780 This->input = NULL;
2781 }
2782
2783 reader_reset_parser(This);
2784
2785 /* just reset current input */
2786 if (!input)
2787 {
2788 This->state = XmlReadState_Initial;
2789 return S_OK;
2790 }
2791
2792 /* now try IXmlReaderInput, ISequentialStream, IStream */
2793 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2794 if (hr == S_OK)
2795 {
2796 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2797 This->input = impl_from_IXmlReaderInput(readerinput);
2798 else
2799 {
2800 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2801 readerinput, readerinput->lpVtbl);
2802 IUnknown_Release(readerinput);
2803 return E_FAIL;
2804
2805 }
2806 }
2807
2808 if (hr != S_OK || !readerinput)
2809 {
2810 /* create IXmlReaderInput basing on supplied interface */
2811 hr = CreateXmlReaderInputWithEncodingName(input,
2812 This->imalloc, NULL, FALSE, NULL, &readerinput);
2813 if (hr != S_OK) return hr;
2814 This->input = impl_from_IXmlReaderInput(readerinput);
2815 }
2816
2817 /* set stream for supplied IXmlReaderInput */
2818 hr = readerinput_query_for_stream(This->input);
2819 if (hr == S_OK)
2820 {
2821 This->state = XmlReadState_Initial;
2822 This->instate = XmlReadInState_Initial;
2823 }
2824 return hr;
2825 }
2826
xmlreader_GetProperty(IXmlReader * iface,UINT property,LONG_PTR * value)2827 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2828 {
2829 xmlreader *This = impl_from_IXmlReader(iface);
2830
2831 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2832
2833 if (!value) return E_INVALIDARG;
2834
2835 switch (property)
2836 {
2837 case XmlReaderProperty_MultiLanguage:
2838 *value = (LONG_PTR)This->mlang;
2839 if (This->mlang)
2840 IUnknown_AddRef(This->mlang);
2841 break;
2842 case XmlReaderProperty_XmlResolver:
2843 *value = (LONG_PTR)This->resolver;
2844 if (This->resolver)
2845 IXmlResolver_AddRef(This->resolver);
2846 break;
2847 case XmlReaderProperty_DtdProcessing:
2848 *value = This->dtdmode;
2849 break;
2850 case XmlReaderProperty_ReadState:
2851 *value = This->state;
2852 break;
2853 case XmlReaderProperty_MaxElementDepth:
2854 *value = This->max_depth;
2855 break;
2856 default:
2857 FIXME("Unimplemented property (%u)\n", property);
2858 return E_NOTIMPL;
2859 }
2860
2861 return S_OK;
2862 }
2863
xmlreader_SetProperty(IXmlReader * iface,UINT property,LONG_PTR value)2864 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2865 {
2866 xmlreader *This = impl_from_IXmlReader(iface);
2867
2868 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2869
2870 switch (property)
2871 {
2872 case XmlReaderProperty_MultiLanguage:
2873 if (This->mlang)
2874 IUnknown_Release(This->mlang);
2875 This->mlang = (IUnknown*)value;
2876 if (This->mlang)
2877 IUnknown_AddRef(This->mlang);
2878 if (This->mlang)
2879 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2880 break;
2881 case XmlReaderProperty_XmlResolver:
2882 if (This->resolver)
2883 IXmlResolver_Release(This->resolver);
2884 This->resolver = (IXmlResolver*)value;
2885 if (This->resolver)
2886 IXmlResolver_AddRef(This->resolver);
2887 break;
2888 case XmlReaderProperty_DtdProcessing:
2889 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2890 This->dtdmode = value;
2891 break;
2892 case XmlReaderProperty_MaxElementDepth:
2893 This->max_depth = value;
2894 break;
2895 default:
2896 FIXME("Unimplemented property (%u)\n", property);
2897 return E_NOTIMPL;
2898 }
2899
2900 return S_OK;
2901 }
2902
xmlreader_Read(IXmlReader * iface,XmlNodeType * nodetype)2903 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2904 {
2905 xmlreader *This = impl_from_IXmlReader(iface);
2906 XmlNodeType oldtype = This->nodetype;
2907 XmlNodeType type;
2908 HRESULT hr;
2909
2910 TRACE("(%p)->(%p)\n", This, nodetype);
2911
2912 if (!nodetype)
2913 nodetype = &type;
2914
2915 switch (This->state)
2916 {
2917 case XmlReadState_Closed:
2918 hr = S_FALSE;
2919 break;
2920 case XmlReadState_Error:
2921 hr = This->error;
2922 break;
2923 default:
2924 hr = reader_parse_nextnode(This);
2925 if (SUCCEEDED(hr) && oldtype == XmlNodeType_None && This->nodetype != oldtype)
2926 This->state = XmlReadState_Interactive;
2927
2928 if (FAILED(hr))
2929 {
2930 This->state = XmlReadState_Error;
2931 This->nodetype = XmlNodeType_None;
2932 This->depth = 0;
2933 This->error = hr;
2934 }
2935 }
2936
2937 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2938 *nodetype = This->nodetype;
2939
2940 return hr;
2941 }
2942
xmlreader_GetNodeType(IXmlReader * iface,XmlNodeType * node_type)2943 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2944 {
2945 xmlreader *This = impl_from_IXmlReader(iface);
2946
2947 TRACE("(%p)->(%p)\n", This, node_type);
2948
2949 if (!node_type)
2950 return E_INVALIDARG;
2951
2952 *node_type = reader_get_nodetype(This);
2953 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2954 }
2955
reader_set_current_attribute(xmlreader * reader,struct attribute * attr)2956 static void reader_set_current_attribute(xmlreader *reader, struct attribute *attr)
2957 {
2958 reader->attr = attr;
2959 reader->chunk_read_off = 0;
2960 reader_set_strvalue(reader, StringValue_Prefix, &attr->prefix);
2961 reader_set_strvalue(reader, StringValue_QualifiedName, &attr->qname);
2962 reader_set_strvalue(reader, StringValue_Value, &attr->value);
2963 }
2964
reader_move_to_first_attribute(xmlreader * reader)2965 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2966 {
2967 if (!reader->attr_count)
2968 return S_FALSE;
2969
2970 if (!reader->attr)
2971 reader_inc_depth(reader);
2972
2973 reader_set_current_attribute(reader, LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry));
2974
2975 return S_OK;
2976 }
2977
xmlreader_MoveToFirstAttribute(IXmlReader * iface)2978 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2979 {
2980 xmlreader *This = impl_from_IXmlReader(iface);
2981
2982 TRACE("(%p)\n", This);
2983
2984 return reader_move_to_first_attribute(This);
2985 }
2986
xmlreader_MoveToNextAttribute(IXmlReader * iface)2987 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2988 {
2989 xmlreader *This = impl_from_IXmlReader(iface);
2990 const struct list *next;
2991
2992 TRACE("(%p)\n", This);
2993
2994 if (!This->attr_count) return S_FALSE;
2995
2996 if (!This->attr)
2997 return reader_move_to_first_attribute(This);
2998
2999 next = list_next(&This->attrs, &This->attr->entry);
3000 if (next)
3001 reader_set_current_attribute(This, LIST_ENTRY(next, struct attribute, entry));
3002
3003 return next ? S_OK : S_FALSE;
3004 }
3005
reader_get_attribute_ns_uri(xmlreader * reader,struct attribute * attr,const WCHAR ** uri,UINT * len)3006 static void reader_get_attribute_ns_uri(xmlreader *reader, struct attribute *attr, const WCHAR **uri, UINT *len)
3007 {
3008 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3009 '2','0','0','0','/','x','m','l','n','s','/',0};
3010 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3011 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
3012
3013 /* Check for reserved prefixes first */
3014 if ((strval_eq(reader, &attr->prefix, &strval_empty) && strval_eq(reader, &attr->localname, &strval_xmlns)) ||
3015 strval_eq(reader, &attr->prefix, &strval_xmlns))
3016 {
3017 *uri = xmlns_uriW;
3018 *len = ARRAY_SIZE(xmlns_uriW) - 1;
3019 }
3020 else if (strval_eq(reader, &attr->prefix, &strval_xml))
3021 {
3022 *uri = xml_uriW;
3023 *len = ARRAY_SIZE(xml_uriW) - 1;
3024 }
3025 else
3026 {
3027 *uri = NULL;
3028 *len = 0;
3029 }
3030
3031 if (!*uri)
3032 {
3033 struct ns *ns;
3034
3035 if ((ns = reader_lookup_ns(reader, &attr->prefix)))
3036 {
3037 *uri = ns->uri.str;
3038 *len = ns->uri.len;
3039 }
3040 else
3041 {
3042 *uri = emptyW;
3043 *len = 0;
3044 }
3045 }
3046 }
3047
reader_get_attribute_local_name(xmlreader * reader,struct attribute * attr,const WCHAR ** name,UINT * len)3048 static void reader_get_attribute_local_name(xmlreader *reader, struct attribute *attr, const WCHAR **name, UINT *len)
3049 {
3050 if (attr->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3051 {
3052 *name = xmlnsW;
3053 *len = 5;
3054 }
3055 else if (attr->flags & ATTRIBUTE_NS_DEFINITION)
3056 {
3057 const struct ns *ns = reader_lookup_ns(reader, &attr->localname);
3058 *name = ns->prefix.str;
3059 *len = ns->prefix.len;
3060 }
3061 else
3062 {
3063 *name = attr->localname.str;
3064 *len = attr->localname.len;
3065 }
3066 }
3067
xmlreader_MoveToAttributeByName(IXmlReader * iface,const WCHAR * local_name,const WCHAR * namespace_uri)3068 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
3069 const WCHAR *local_name, const WCHAR *namespace_uri)
3070 {
3071 xmlreader *This = impl_from_IXmlReader(iface);
3072 UINT target_name_len, target_uri_len;
3073 struct attribute *attr;
3074
3075 TRACE("(%p)->(%s %s)\n", This, debugstr_w(local_name), debugstr_w(namespace_uri));
3076
3077 if (!local_name)
3078 return E_INVALIDARG;
3079
3080 if (!This->attr_count)
3081 return S_FALSE;
3082
3083 if (!namespace_uri)
3084 namespace_uri = emptyW;
3085
3086 target_name_len = lstrlenW(local_name);
3087 target_uri_len = lstrlenW(namespace_uri);
3088
3089 LIST_FOR_EACH_ENTRY(attr, &This->attrs, struct attribute, entry)
3090 {
3091 UINT name_len, uri_len;
3092 const WCHAR *name, *uri;
3093
3094 reader_get_attribute_local_name(This, attr, &name, &name_len);
3095 reader_get_attribute_ns_uri(This, attr, &uri, &uri_len);
3096
3097 if (name_len == target_name_len && uri_len == target_uri_len &&
3098 !wcscmp(name, local_name) && !wcscmp(uri, namespace_uri))
3099 {
3100 reader_set_current_attribute(This, attr);
3101 return S_OK;
3102 }
3103 }
3104
3105 return S_FALSE;
3106 }
3107
xmlreader_MoveToElement(IXmlReader * iface)3108 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
3109 {
3110 xmlreader *This = impl_from_IXmlReader(iface);
3111
3112 TRACE("(%p)\n", This);
3113
3114 if (!This->attr_count) return S_FALSE;
3115
3116 if (This->attr)
3117 reader_dec_depth(This);
3118
3119 This->attr = NULL;
3120
3121 /* FIXME: support other node types with 'attributes' like DTD */
3122 if (This->is_empty_element) {
3123 reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix);
3124 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
3125 }
3126 else {
3127 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
3128 if (element) {
3129 reader_set_strvalue(This, StringValue_Prefix, &element->prefix);
3130 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
3131 }
3132 }
3133 This->chunk_read_off = 0;
3134 reader_set_strvalue(This, StringValue_Value, &strval_empty);
3135
3136 return S_OK;
3137 }
3138
xmlreader_GetQualifiedName(IXmlReader * iface,LPCWSTR * name,UINT * len)3139 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3140 {
3141 xmlreader *This = impl_from_IXmlReader(iface);
3142 struct attribute *attribute = This->attr;
3143 struct element *element;
3144 UINT length;
3145
3146 TRACE("(%p)->(%p %p)\n", This, name, len);
3147
3148 if (!len)
3149 len = &length;
3150
3151 switch (reader_get_nodetype(This))
3152 {
3153 case XmlNodeType_Text:
3154 case XmlNodeType_CDATA:
3155 case XmlNodeType_Comment:
3156 case XmlNodeType_Whitespace:
3157 *name = emptyW;
3158 *len = 0;
3159 break;
3160 case XmlNodeType_Element:
3161 case XmlNodeType_EndElement:
3162 element = reader_get_element(This);
3163 if (element->prefix.len)
3164 {
3165 *name = element->qname.str;
3166 *len = element->qname.len;
3167 }
3168 else
3169 {
3170 *name = element->localname.str;
3171 *len = element->localname.len;
3172 }
3173 break;
3174 case XmlNodeType_Attribute:
3175 if (attribute->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3176 {
3177 *name = xmlnsW;
3178 *len = 5;
3179 } else if (attribute->prefix.len)
3180 {
3181 *name = This->strvalues[StringValue_QualifiedName].str;
3182 *len = This->strvalues[StringValue_QualifiedName].len;
3183 }
3184 else
3185 {
3186 *name = attribute->localname.str;
3187 *len = attribute->localname.len;
3188 }
3189 break;
3190 default:
3191 *name = This->strvalues[StringValue_QualifiedName].str;
3192 *len = This->strvalues[StringValue_QualifiedName].len;
3193 break;
3194 }
3195
3196 return S_OK;
3197 }
3198
reader_lookup_nsdef(xmlreader * reader)3199 static struct ns *reader_lookup_nsdef(xmlreader *reader)
3200 {
3201 if (list_empty(&reader->nsdef))
3202 return NULL;
3203
3204 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
3205 }
3206
xmlreader_GetNamespaceUri(IXmlReader * iface,const WCHAR ** uri,UINT * len)3207 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
3208 {
3209 xmlreader *This = impl_from_IXmlReader(iface);
3210 const strval *prefix = &This->strvalues[StringValue_Prefix];
3211 XmlNodeType nodetype;
3212 struct ns *ns;
3213 UINT length;
3214
3215 TRACE("(%p %p %p)\n", iface, uri, len);
3216
3217 if (!len)
3218 len = &length;
3219
3220 switch ((nodetype = reader_get_nodetype(This)))
3221 {
3222 case XmlNodeType_Attribute:
3223 reader_get_attribute_ns_uri(This, This->attr, uri, len);
3224 break;
3225 case XmlNodeType_Element:
3226 case XmlNodeType_EndElement:
3227 {
3228 ns = reader_lookup_ns(This, prefix);
3229
3230 /* pick top default ns if any */
3231 if (!ns)
3232 ns = reader_lookup_nsdef(This);
3233
3234 if (ns) {
3235 *uri = ns->uri.str;
3236 *len = ns->uri.len;
3237 }
3238 else {
3239 *uri = emptyW;
3240 *len = 0;
3241 }
3242 }
3243 break;
3244 case XmlNodeType_Text:
3245 case XmlNodeType_CDATA:
3246 case XmlNodeType_ProcessingInstruction:
3247 case XmlNodeType_Comment:
3248 case XmlNodeType_Whitespace:
3249 case XmlNodeType_XmlDeclaration:
3250 *uri = emptyW;
3251 *len = 0;
3252 break;
3253 default:
3254 FIXME("Unhandled node type %d\n", nodetype);
3255 *uri = NULL;
3256 *len = 0;
3257 return E_NOTIMPL;
3258 }
3259
3260 return S_OK;
3261 }
3262
xmlreader_GetLocalName(IXmlReader * iface,LPCWSTR * name,UINT * len)3263 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3264 {
3265 xmlreader *This = impl_from_IXmlReader(iface);
3266 struct element *element;
3267 UINT length;
3268
3269 TRACE("(%p)->(%p %p)\n", This, name, len);
3270
3271 if (!len)
3272 len = &length;
3273
3274 switch (reader_get_nodetype(This))
3275 {
3276 case XmlNodeType_Text:
3277 case XmlNodeType_CDATA:
3278 case XmlNodeType_Comment:
3279 case XmlNodeType_Whitespace:
3280 *name = emptyW;
3281 *len = 0;
3282 break;
3283 case XmlNodeType_Element:
3284 case XmlNodeType_EndElement:
3285 element = reader_get_element(This);
3286 *name = element->localname.str;
3287 *len = element->localname.len;
3288 break;
3289 case XmlNodeType_Attribute:
3290 reader_get_attribute_local_name(This, This->attr, name, len);
3291 break;
3292 default:
3293 *name = This->strvalues[StringValue_LocalName].str;
3294 *len = This->strvalues[StringValue_LocalName].len;
3295 break;
3296 }
3297
3298 return S_OK;
3299 }
3300
xmlreader_GetPrefix(IXmlReader * iface,const WCHAR ** ret,UINT * len)3301 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, const WCHAR **ret, UINT *len)
3302 {
3303 xmlreader *This = impl_from_IXmlReader(iface);
3304 XmlNodeType nodetype;
3305 UINT length;
3306
3307 TRACE("(%p)->(%p %p)\n", This, ret, len);
3308
3309 if (!len)
3310 len = &length;
3311
3312 *ret = emptyW;
3313 *len = 0;
3314
3315 switch ((nodetype = reader_get_nodetype(This)))
3316 {
3317 case XmlNodeType_Element:
3318 case XmlNodeType_EndElement:
3319 case XmlNodeType_Attribute:
3320 {
3321 const strval *prefix = &This->strvalues[StringValue_Prefix];
3322 struct ns *ns;
3323
3324 if (strval_eq(This, prefix, &strval_xml))
3325 {
3326 *ret = xmlW;
3327 *len = 3;
3328 }
3329 else if (strval_eq(This, prefix, &strval_xmlns))
3330 {
3331 *ret = xmlnsW;
3332 *len = 5;
3333 }
3334 else if ((ns = reader_lookup_ns(This, prefix)))
3335 {
3336 *ret = ns->prefix.str;
3337 *len = ns->prefix.len;
3338 }
3339
3340 break;
3341 }
3342 default:
3343 ;
3344 }
3345
3346 return S_OK;
3347 }
3348
reader_get_value(xmlreader * reader,BOOL ensure_allocated)3349 static const strval *reader_get_value(xmlreader *reader, BOOL ensure_allocated)
3350 {
3351 strval *val;
3352
3353 switch (reader_get_nodetype(reader))
3354 {
3355 case XmlNodeType_XmlDeclaration:
3356 case XmlNodeType_EndElement:
3357 case XmlNodeType_None:
3358 return &strval_empty;
3359 case XmlNodeType_Attribute:
3360 /* For namespace definition attributes return values from namespace list */
3361 if (reader->attr->flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
3362 {
3363 struct ns *ns;
3364
3365 if (!(ns = reader_lookup_ns(reader, &reader->attr->localname)))
3366 ns = reader_lookup_nsdef(reader);
3367
3368 return &ns->uri;
3369 }
3370 return &reader->attr->value;
3371 default:
3372 break;
3373 }
3374
3375 val = &reader->strvalues[StringValue_Value];
3376 if (!val->str && ensure_allocated)
3377 {
3378 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3379 if (!ptr) return NULL;
3380 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3381 ptr[val->len] = 0;
3382 val->str = ptr;
3383 }
3384
3385 return val;
3386 }
3387
xmlreader_GetValue(IXmlReader * iface,const WCHAR ** value,UINT * len)3388 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3389 {
3390 xmlreader *reader = impl_from_IXmlReader(iface);
3391 const strval *val = &reader->strvalues[StringValue_Value];
3392 UINT off;
3393
3394 TRACE("(%p)->(%p %p)\n", reader, value, len);
3395
3396 *value = NULL;
3397
3398 if ((reader->nodetype == XmlNodeType_Comment && !val->str && !val->len) || is_reader_pending(reader))
3399 {
3400 XmlNodeType type;
3401 HRESULT hr;
3402
3403 hr = IXmlReader_Read(iface, &type);
3404 if (FAILED(hr)) return hr;
3405
3406 /* return if still pending, partially read values are not reported */
3407 if (is_reader_pending(reader)) return E_PENDING;
3408 }
3409
3410 val = reader_get_value(reader, TRUE);
3411 if (!val)
3412 return E_OUTOFMEMORY;
3413
3414 off = abs(reader->chunk_read_off);
3415 assert(off <= val->len);
3416 *value = val->str + off;
3417 if (len) *len = val->len - off;
3418 reader->chunk_read_off = -off;
3419 return S_OK;
3420 }
3421
xmlreader_ReadValueChunk(IXmlReader * iface,WCHAR * buffer,UINT chunk_size,UINT * read)3422 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3423 {
3424 xmlreader *reader = impl_from_IXmlReader(iface);
3425 const strval *val;
3426 UINT len = 0;
3427
3428 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3429
3430 val = reader_get_value(reader, FALSE);
3431
3432 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3433 if (reader->chunk_read_off >= 0)
3434 {
3435 assert(reader->chunk_read_off <= val->len);
3436 len = min(val->len - reader->chunk_read_off, chunk_size);
3437 }
3438 if (read) *read = len;
3439
3440 if (len)
3441 {
3442 memcpy(buffer, reader_get_strptr(reader, val) + reader->chunk_read_off, len*sizeof(WCHAR));
3443 reader->chunk_read_off += len;
3444 }
3445
3446 return len || !chunk_size ? S_OK : S_FALSE;
3447 }
3448
xmlreader_GetBaseUri(IXmlReader * iface,LPCWSTR * baseUri,UINT * baseUri_length)3449 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3450 LPCWSTR *baseUri,
3451 UINT *baseUri_length)
3452 {
3453 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3454 return E_NOTIMPL;
3455 }
3456
xmlreader_IsDefault(IXmlReader * iface)3457 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3458 {
3459 FIXME("(%p): stub\n", iface);
3460 return FALSE;
3461 }
3462
xmlreader_IsEmptyElement(IXmlReader * iface)3463 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3464 {
3465 xmlreader *This = impl_from_IXmlReader(iface);
3466 TRACE("(%p)\n", This);
3467 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3468 when current node is start tag of an element */
3469 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3470 }
3471
xmlreader_GetLineNumber(IXmlReader * iface,UINT * line_number)3472 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *line_number)
3473 {
3474 xmlreader *This = impl_from_IXmlReader(iface);
3475 const struct element *element;
3476
3477 TRACE("(%p %p)\n", This, line_number);
3478
3479 if (!line_number)
3480 return E_INVALIDARG;
3481
3482 switch (reader_get_nodetype(This))
3483 {
3484 case XmlNodeType_Element:
3485 case XmlNodeType_EndElement:
3486 element = reader_get_element(This);
3487 *line_number = element->position.line_number;
3488 break;
3489 case XmlNodeType_Attribute:
3490 *line_number = This->attr->position.line_number;
3491 break;
3492 case XmlNodeType_Whitespace:
3493 case XmlNodeType_XmlDeclaration:
3494 *line_number = This->empty_element.position.line_number;
3495 break;
3496 default:
3497 *line_number = This->position.line_number;
3498 break;
3499 }
3500
3501 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3502 }
3503
xmlreader_GetLinePosition(IXmlReader * iface,UINT * line_position)3504 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *line_position)
3505 {
3506 xmlreader *This = impl_from_IXmlReader(iface);
3507 const struct element *element;
3508
3509 TRACE("(%p %p)\n", This, line_position);
3510
3511 if (!line_position)
3512 return E_INVALIDARG;
3513
3514 switch (reader_get_nodetype(This))
3515 {
3516 case XmlNodeType_Element:
3517 case XmlNodeType_EndElement:
3518 element = reader_get_element(This);
3519 *line_position = element->position.line_position;
3520 break;
3521 case XmlNodeType_Attribute:
3522 *line_position = This->attr->position.line_position;
3523 break;
3524 case XmlNodeType_Whitespace:
3525 case XmlNodeType_XmlDeclaration:
3526 *line_position = This->empty_element.position.line_position;
3527 break;
3528 default:
3529 *line_position = This->position.line_position;
3530 break;
3531 }
3532
3533 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3534 }
3535
xmlreader_GetAttributeCount(IXmlReader * iface,UINT * count)3536 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3537 {
3538 xmlreader *This = impl_from_IXmlReader(iface);
3539
3540 TRACE("(%p)->(%p)\n", This, count);
3541
3542 if (!count) return E_INVALIDARG;
3543
3544 *count = This->attr_count;
3545 return S_OK;
3546 }
3547
xmlreader_GetDepth(IXmlReader * iface,UINT * depth)3548 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3549 {
3550 xmlreader *This = impl_from_IXmlReader(iface);
3551 TRACE("(%p)->(%p)\n", This, depth);
3552 *depth = This->depth;
3553 return S_OK;
3554 }
3555
xmlreader_IsEOF(IXmlReader * iface)3556 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3557 {
3558 xmlreader *This = impl_from_IXmlReader(iface);
3559 TRACE("(%p)\n", iface);
3560 return This->state == XmlReadState_EndOfFile;
3561 }
3562
3563 static const struct IXmlReaderVtbl xmlreader_vtbl =
3564 {
3565 xmlreader_QueryInterface,
3566 xmlreader_AddRef,
3567 xmlreader_Release,
3568 xmlreader_SetInput,
3569 xmlreader_GetProperty,
3570 xmlreader_SetProperty,
3571 xmlreader_Read,
3572 xmlreader_GetNodeType,
3573 xmlreader_MoveToFirstAttribute,
3574 xmlreader_MoveToNextAttribute,
3575 xmlreader_MoveToAttributeByName,
3576 xmlreader_MoveToElement,
3577 xmlreader_GetQualifiedName,
3578 xmlreader_GetNamespaceUri,
3579 xmlreader_GetLocalName,
3580 xmlreader_GetPrefix,
3581 xmlreader_GetValue,
3582 xmlreader_ReadValueChunk,
3583 xmlreader_GetBaseUri,
3584 xmlreader_IsDefault,
3585 xmlreader_IsEmptyElement,
3586 xmlreader_GetLineNumber,
3587 xmlreader_GetLinePosition,
3588 xmlreader_GetAttributeCount,
3589 xmlreader_GetDepth,
3590 xmlreader_IsEOF
3591 };
3592
3593 /** IXmlReaderInput **/
xmlreaderinput_QueryInterface(IXmlReaderInput * iface,REFIID riid,void ** ppvObject)3594 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3595 {
3596 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3597
3598 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3599
3600 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3601 IsEqualGUID(riid, &IID_IUnknown))
3602 {
3603 *ppvObject = iface;
3604 }
3605 else
3606 {
3607 WARN("interface %s not implemented\n", debugstr_guid(riid));
3608 *ppvObject = NULL;
3609 return E_NOINTERFACE;
3610 }
3611
3612 IUnknown_AddRef(iface);
3613
3614 return S_OK;
3615 }
3616
xmlreaderinput_AddRef(IXmlReaderInput * iface)3617 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3618 {
3619 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3620 ULONG ref = InterlockedIncrement(&This->ref);
3621 TRACE("(%p)->(%d)\n", This, ref);
3622 return ref;
3623 }
3624
xmlreaderinput_Release(IXmlReaderInput * iface)3625 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3626 {
3627 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3628 LONG ref = InterlockedDecrement(&This->ref);
3629
3630 TRACE("(%p)->(%d)\n", This, ref);
3631
3632 if (ref == 0)
3633 {
3634 IMalloc *imalloc = This->imalloc;
3635 if (This->input) IUnknown_Release(This->input);
3636 if (This->stream) ISequentialStream_Release(This->stream);
3637 if (This->buffer) free_input_buffer(This->buffer);
3638 readerinput_free(This, This->baseuri);
3639 readerinput_free(This, This);
3640 if (imalloc) IMalloc_Release(imalloc);
3641 }
3642
3643 return ref;
3644 }
3645
3646 static const struct IUnknownVtbl xmlreaderinputvtbl =
3647 {
3648 xmlreaderinput_QueryInterface,
3649 xmlreaderinput_AddRef,
3650 xmlreaderinput_Release
3651 };
3652
CreateXmlReader(REFIID riid,void ** obj,IMalloc * imalloc)3653 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3654 {
3655 xmlreader *reader;
3656 HRESULT hr;
3657 int i;
3658
3659 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3660
3661 if (imalloc)
3662 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3663 else
3664 reader = heap_alloc(sizeof(*reader));
3665 if (!reader)
3666 return E_OUTOFMEMORY;
3667
3668 memset(reader, 0, sizeof(*reader));
3669 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3670 reader->ref = 1;
3671 reader->state = XmlReadState_Closed;
3672 reader->instate = XmlReadInState_Initial;
3673 reader->resumestate = XmlReadResumeState_Initial;
3674 reader->dtdmode = DtdProcessing_Prohibit;
3675 reader->imalloc = imalloc;
3676 if (imalloc) IMalloc_AddRef(imalloc);
3677 reader->nodetype = XmlNodeType_None;
3678 list_init(&reader->attrs);
3679 list_init(&reader->nsdef);
3680 list_init(&reader->ns);
3681 list_init(&reader->elements);
3682 reader->max_depth = 256;
3683
3684 reader->chunk_read_off = 0;
3685 for (i = 0; i < StringValue_Last; i++)
3686 reader->strvalues[i] = strval_empty;
3687
3688 hr = IXmlReader_QueryInterface(&reader->IXmlReader_iface, riid, obj);
3689 IXmlReader_Release(&reader->IXmlReader_iface);
3690
3691 TRACE("returning iface %p, hr %#x\n", *obj, hr);
3692
3693 return hr;
3694 }
3695
CreateXmlReaderInputWithEncodingName(IUnknown * stream,IMalloc * imalloc,LPCWSTR encoding,BOOL hint,LPCWSTR base_uri,IXmlReaderInput ** ppInput)3696 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3697 IMalloc *imalloc,
3698 LPCWSTR encoding,
3699 BOOL hint,
3700 LPCWSTR base_uri,
3701 IXmlReaderInput **ppInput)
3702 {
3703 xmlreaderinput *readerinput;
3704 HRESULT hr;
3705
3706 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3707 hint, wine_dbgstr_w(base_uri), ppInput);
3708
3709 if (!stream || !ppInput) return E_INVALIDARG;
3710
3711 if (imalloc)
3712 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3713 else
3714 readerinput = heap_alloc(sizeof(*readerinput));
3715 if(!readerinput) return E_OUTOFMEMORY;
3716
3717 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3718 readerinput->ref = 1;
3719 readerinput->imalloc = imalloc;
3720 readerinput->stream = NULL;
3721 if (imalloc) IMalloc_AddRef(imalloc);
3722 readerinput->encoding = parse_encoding_name(encoding, -1);
3723 readerinput->hint = hint;
3724 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3725 readerinput->pending = 0;
3726
3727 hr = alloc_input_buffer(readerinput);
3728 if (hr != S_OK)
3729 {
3730 readerinput_free(readerinput, readerinput->baseuri);
3731 readerinput_free(readerinput, readerinput);
3732 if (imalloc) IMalloc_Release(imalloc);
3733 return hr;
3734 }
3735 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3736
3737 *ppInput = &readerinput->IXmlReaderInput_iface;
3738
3739 TRACE("returning iface %p\n", *ppInput);
3740
3741 return S_OK;
3742 }
3743