xref: /reactos/dll/win32/xmllite/reader.c (revision cdf90707)
1 /*
2  * IXmlReader implementation
3  *
4  * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  */
20 
21 #define COBJMACROS
22 
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include "windef.h"
27 #include "winbase.h"
28 #include "initguid.h"
29 #include "objbase.h"
30 #include "xmllite.h"
31 #include "xmllite_private.h"
32 #ifdef __REACTOS__
33 #include <winnls.h>
34 #endif
35 
36 #include "wine/debug.h"
37 #include "wine/list.h"
38 
39 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
40 
41 /* not defined in public headers */
42 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
43 
44 typedef enum
45 {
46     XmlReadInState_Initial,
47     XmlReadInState_XmlDecl,
48     XmlReadInState_Misc_DTD,
49     XmlReadInState_DTD,
50     XmlReadInState_DTD_Misc,
51     XmlReadInState_Element,
52     XmlReadInState_Content,
53     XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
54     XmlReadInState_Eof
55 } XmlReaderInternalState;
56 
57 /* This state denotes where parsing was interrupted by input problem.
58    Reader resumes parsing using this information. */
59 typedef enum
60 {
61     XmlReadResumeState_Initial,
62     XmlReadResumeState_PITarget,
63     XmlReadResumeState_PIBody,
64     XmlReadResumeState_CDATA,
65     XmlReadResumeState_Comment,
66     XmlReadResumeState_STag,
67     XmlReadResumeState_CharData,
68     XmlReadResumeState_Whitespace
69 } XmlReaderResumeState;
70 
71 /* saved pointer index to resume from particular input position */
72 typedef enum
73 {
74     XmlReadResume_Name,  /* PITarget, name for NCName, prefix for QName */
75     XmlReadResume_Local, /* local for QName */
76     XmlReadResume_Body,  /* PI body, comment text, CDATA text, CharData text */
77     XmlReadResume_Last
78 } XmlReaderResume;
79 
80 typedef enum
81 {
82     StringValue_LocalName,
83     StringValue_Prefix,
84     StringValue_QualifiedName,
85     StringValue_Value,
86     StringValue_Last
87 } XmlReaderStringValue;
88 
89 static const WCHAR usasciiW[] = {'U','S','-','A','S','C','I','I',0};
90 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
91 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
92 
93 static const WCHAR dblquoteW[] = {'\"',0};
94 static const WCHAR quoteW[] = {'\'',0};
95 static const WCHAR ltW[] = {'<',0};
96 static const WCHAR gtW[] = {'>',0};
97 static const WCHAR commentW[] = {'<','!','-','-',0};
98 static const WCHAR piW[] = {'<','?',0};
99 
100 BOOL is_namestartchar(WCHAR ch);
101 
102 static const char *debugstr_nodetype(XmlNodeType nodetype)
103 {
104     static const char * const type_names[] =
105     {
106         "None",
107         "Element",
108         "Attribute",
109         "Text",
110         "CDATA",
111         "",
112         "",
113         "ProcessingInstruction",
114         "Comment",
115         "",
116         "DocumentType",
117         "",
118         "",
119         "Whitespace",
120         "",
121         "EndElement",
122         "",
123         "XmlDeclaration"
124     };
125 
126     if (nodetype > _XmlNodeType_Last)
127         return wine_dbg_sprintf("unknown type=%d", nodetype);
128 
129     return type_names[nodetype];
130 }
131 
132 static const char *debugstr_reader_prop(XmlReaderProperty prop)
133 {
134     static const char * const prop_names[] =
135     {
136         "MultiLanguage",
137         "ConformanceLevel",
138         "RandomAccess",
139         "XmlResolver",
140         "DtdProcessing",
141         "ReadState",
142         "MaxElementDepth",
143         "MaxEntityExpansion"
144     };
145 
146     if (prop > _XmlReaderProperty_Last)
147         return wine_dbg_sprintf("unknown property=%d", prop);
148 
149     return prop_names[prop];
150 }
151 
152 struct xml_encoding_data
153 {
154     const WCHAR *name;
155     xml_encoding enc;
156     UINT cp;
157 };
158 
159 static const struct xml_encoding_data xml_encoding_map[] = {
160     { usasciiW, XmlEncoding_USASCII, 20127 },
161     { utf16W, XmlEncoding_UTF16, 1200 },
162     { utf8W,  XmlEncoding_UTF8,  CP_UTF8 },
163 };
164 
165 const WCHAR *get_encoding_name(xml_encoding encoding)
166 {
167     return xml_encoding_map[encoding].name;
168 }
169 
170 xml_encoding get_encoding_from_codepage(UINT codepage)
171 {
172     int i;
173     for (i = 0; i < ARRAY_SIZE(xml_encoding_map); i++)
174     {
175         if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
176     }
177     return XmlEncoding_Unknown;
178 }
179 
180 typedef struct
181 {
182     char *data;
183     UINT  cur;
184     unsigned int allocated;
185     unsigned int written;
186     BOOL prev_cr;
187 } encoded_buffer;
188 
189 typedef struct input_buffer input_buffer;
190 
191 typedef struct
192 {
193     IXmlReaderInput IXmlReaderInput_iface;
194     LONG ref;
195     /* reference passed on IXmlReaderInput creation, is kept when input is created */
196     IUnknown *input;
197     IMalloc *imalloc;
198     xml_encoding encoding;
199     BOOL hint;
200     WCHAR *baseuri;
201     /* stream reference set after SetInput() call from reader,
202        stored as sequential stream, cause currently
203        optimizations possible with IStream aren't implemented */
204     ISequentialStream *stream;
205     input_buffer *buffer;
206     unsigned int pending : 1;
207 } xmlreaderinput;
208 
209 static const struct IUnknownVtbl xmlreaderinputvtbl;
210 
211 /* Structure to hold parsed string of specific length.
212 
213    Reader stores node value as 'start' pointer, on request
214    a null-terminated version of it is allocated.
215 
216    To init a strval variable use reader_init_strval(),
217    to set strval as a reader value use reader_set_strval().
218  */
219 typedef struct
220 {
221     WCHAR *str;   /* allocated null-terminated string */
222     UINT   len;   /* length in WCHARs, altered after ReadValueChunk */
223     UINT   start; /* input position where value starts */
224 } strval;
225 
226 static WCHAR emptyW[] = {0};
227 static WCHAR xmlW[] = {'x','m','l',0};
228 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
229 static const strval strval_empty = { emptyW };
230 static const strval strval_xml = { xmlW, 3 };
231 static const strval strval_xmlns = { xmlnsW, 5 };
232 
233 struct reader_position
234 {
235     UINT line_number;
236     UINT line_position;
237 };
238 
239 enum attribute_flags
240 {
241     ATTRIBUTE_NS_DEFINITION = 0x1,
242     ATTRIBUTE_DEFAULT_NS_DEFINITION = 0x2,
243 };
244 
245 struct attribute
246 {
247     struct list entry;
248     strval prefix;
249     strval localname;
250     strval qname;
251     strval value;
252     struct reader_position position;
253     unsigned int flags;
254 };
255 
256 struct element
257 {
258     struct list entry;
259     strval prefix;
260     strval localname;
261     strval qname;
262     struct reader_position position;
263 };
264 
265 struct ns
266 {
267     struct list entry;
268     strval prefix;
269     strval uri;
270     struct element *element;
271 };
272 
273 typedef struct
274 {
275     IXmlReader IXmlReader_iface;
276     LONG ref;
277     xmlreaderinput *input;
278     IMalloc *imalloc;
279     XmlReadState state;
280     HRESULT error; /* error set on XmlReadState_Error */
281     XmlReaderInternalState instate;
282     XmlReaderResumeState resumestate;
283     XmlNodeType nodetype;
284     DtdProcessing dtdmode;
285     IXmlResolver *resolver;
286     IUnknown *mlang;
287     struct reader_position position;
288     struct list attrs; /* attributes list for current node */
289     struct attribute *attr; /* current attribute */
290     UINT attr_count;
291     struct list nsdef;
292     struct list ns;
293     struct list elements;
294     int chunk_read_off;
295     strval strvalues[StringValue_Last];
296     UINT depth;
297     UINT max_depth;
298     BOOL is_empty_element;
299     struct element empty_element; /* used for empty elements without end tag <a />,
300                                      and to keep <?xml reader position */
301     UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
302 } xmlreader;
303 
304 struct input_buffer
305 {
306     encoded_buffer utf16;
307     encoded_buffer encoded;
308     UINT code_page;
309     xmlreaderinput *input;
310 };
311 
312 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
313 {
314     return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
315 }
316 
317 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
318 {
319     return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
320 }
321 
322 /* reader memory allocation functions */
323 static inline void *reader_alloc(xmlreader *reader, size_t len)
324 {
325     return m_alloc(reader->imalloc, len);
326 }
327 
328 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
329 {
330     void *ret = reader_alloc(reader, len);
331     if (ret)
332         memset(ret, 0, len);
333     return ret;
334 }
335 
336 static inline void reader_free(xmlreader *reader, void *mem)
337 {
338     m_free(reader->imalloc, mem);
339 }
340 
341 /* Just return pointer from offset, no attempt to read more. */
342 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
343 {
344     encoded_buffer *buffer = &reader->input->buffer->utf16;
345     return (WCHAR*)buffer->data + offset;
346 }
347 
348 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
349 {
350     return v->str ? v->str : reader_get_ptr2(reader, v->start);
351 }
352 
353 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
354 {
355     *dest = *src;
356 
357     if (src->str != strval_empty.str)
358     {
359         dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
360         if (!dest->str) return E_OUTOFMEMORY;
361         memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
362         dest->str[dest->len] = 0;
363         dest->start = 0;
364     }
365 
366     return S_OK;
367 }
368 
369 /* reader input memory allocation functions */
370 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
371 {
372     return m_alloc(input->imalloc, len);
373 }
374 
375 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
376 {
377     return m_realloc(input->imalloc, mem, len);
378 }
379 
380 static inline void readerinput_free(xmlreaderinput *input, void *mem)
381 {
382     m_free(input->imalloc, mem);
383 }
384 
385 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
386 {
387     LPWSTR ret = NULL;
388 
389     if(str) {
390         DWORD size;
391 
392         size = (lstrlenW(str)+1)*sizeof(WCHAR);
393         ret = readerinput_alloc(input, size);
394         if (ret) memcpy(ret, str, size);
395     }
396 
397     return ret;
398 }
399 
400 /* This one frees stored string value if needed */
401 static void reader_free_strvalued(xmlreader *reader, strval *v)
402 {
403     if (v->str != strval_empty.str)
404     {
405         reader_free(reader, v->str);
406         *v = strval_empty;
407     }
408 }
409 
410 static void reader_clear_attrs(xmlreader *reader)
411 {
412     struct attribute *attr, *attr2;
413     LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
414     {
415         reader_free_strvalued(reader, &attr->localname);
416         reader_free_strvalued(reader, &attr->value);
417         reader_free(reader, attr);
418     }
419     list_init(&reader->attrs);
420     reader->attr_count = 0;
421     reader->attr = NULL;
422 }
423 
424 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
425    while we are on a node with attributes */
426 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *qname,
427     strval *value, const struct reader_position *position, unsigned int flags)
428 {
429     struct attribute *attr;
430     HRESULT hr;
431 
432     attr = reader_alloc(reader, sizeof(*attr));
433     if (!attr) return E_OUTOFMEMORY;
434 
435     hr = reader_strvaldup(reader, localname, &attr->localname);
436     if (hr == S_OK)
437     {
438         hr = reader_strvaldup(reader, value, &attr->value);
439         if (hr != S_OK)
440             reader_free_strvalued(reader, &attr->value);
441     }
442     if (hr != S_OK)
443     {
444         reader_free(reader, attr);
445         return hr;
446     }
447 
448     if (prefix)
449         attr->prefix = *prefix;
450     else
451         memset(&attr->prefix, 0, sizeof(attr->prefix));
452     attr->qname = qname ? *qname : *localname;
453     attr->position = *position;
454     attr->flags = flags;
455     list_add_tail(&reader->attrs, &attr->entry);
456     reader->attr_count++;
457 
458     return S_OK;
459 }
460 
461 /* Returns current element, doesn't check if reader is actually positioned on it. */
462 static struct element *reader_get_element(xmlreader *reader)
463 {
464     if (reader->is_empty_element)
465         return &reader->empty_element;
466 
467     return LIST_ENTRY(list_head(&reader->elements), struct element, entry);
468 }
469 
470 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
471 {
472     v->start = start;
473     v->len = len;
474     v->str = NULL;
475 }
476 
477 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
478 {
479     return debugstr_wn(reader_get_strptr(reader, v), v->len);
480 }
481 
482 /* used to initialize from constant string */
483 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
484 {
485     v->start = 0;
486     v->len = len;
487     v->str = str;
488 }
489 
490 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
491 {
492     reader_free_strvalued(reader, &reader->strvalues[type]);
493 }
494 
495 static void reader_free_strvalues(xmlreader *reader)
496 {
497     int type;
498     for (type = 0; type < StringValue_Last; type++)
499         reader_free_strvalue(reader, type);
500 }
501 
502 /* This helper should only be used to test if strings are the same,
503    it doesn't try to sort. */
504 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
505 {
506     if (str1->len != str2->len) return 0;
507     return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
508 }
509 
510 static void reader_clear_elements(xmlreader *reader)
511 {
512     struct element *elem, *elem2;
513     LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
514     {
515         reader_free_strvalued(reader, &elem->prefix);
516         reader_free_strvalued(reader, &elem->localname);
517         reader_free_strvalued(reader, &elem->qname);
518         reader_free(reader, elem);
519     }
520     list_init(&reader->elements);
521     reader_free_strvalued(reader, &reader->empty_element.localname);
522     reader_free_strvalued(reader, &reader->empty_element.qname);
523     reader->is_empty_element = FALSE;
524 }
525 
526 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
527 {
528     struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
529     struct ns *ns;
530 
531     LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
532         if (strval_eq(reader, prefix, &ns->prefix))
533             return ns;
534     }
535 
536     return NULL;
537 }
538 
539 static HRESULT reader_inc_depth(xmlreader *reader)
540 {
541     return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK;
542 }
543 
544 static void reader_dec_depth(xmlreader *reader)
545 {
546     if (reader->depth)
547         reader->depth--;
548 }
549 
550 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
551 {
552     struct ns *ns;
553     HRESULT hr;
554 
555     ns = reader_alloc(reader, sizeof(*ns));
556     if (!ns) return E_OUTOFMEMORY;
557 
558     if (def)
559         memset(&ns->prefix, 0, sizeof(ns->prefix));
560     else {
561         hr = reader_strvaldup(reader, prefix, &ns->prefix);
562         if (FAILED(hr)) {
563             reader_free(reader, ns);
564             return hr;
565         }
566     }
567 
568     hr = reader_strvaldup(reader, uri, &ns->uri);
569     if (FAILED(hr)) {
570         reader_free_strvalued(reader, &ns->prefix);
571         reader_free(reader, ns);
572         return hr;
573     }
574 
575     ns->element = NULL;
576     list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
577     return hr;
578 }
579 
580 static void reader_free_element(xmlreader *reader, struct element *element)
581 {
582     reader_free_strvalued(reader, &element->prefix);
583     reader_free_strvalued(reader, &element->localname);
584     reader_free_strvalued(reader, &element->qname);
585     reader_free(reader, element);
586 }
587 
588 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
589 {
590     struct ns *ns;
591 
592     LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
593         if (ns->element)
594             break;
595         ns->element = element;
596     }
597 
598     LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
599         if (ns->element)
600             break;
601         ns->element = element;
602     }
603 }
604 
605 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
606     strval *qname, const struct reader_position *position)
607 {
608     struct element *element;
609     HRESULT hr;
610 
611     element = reader_alloc_zero(reader, sizeof(*element));
612     if (!element)
613         return E_OUTOFMEMORY;
614 
615     if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) == S_OK &&
616             (hr = reader_strvaldup(reader, localname, &element->localname)) == S_OK &&
617             (hr = reader_strvaldup(reader, qname, &element->qname)) == S_OK)
618     {
619         list_add_head(&reader->elements, &element->entry);
620         reader_mark_ns_nodes(reader, element);
621         reader->is_empty_element = FALSE;
622         element->position = *position;
623     }
624     else
625         reader_free_element(reader, element);
626 
627     return hr;
628 }
629 
630 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
631 {
632     struct ns *ns, *ns2;
633 
634     LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
635         if (ns->element != element)
636             break;
637 
638         list_remove(&ns->entry);
639         reader_free_strvalued(reader, &ns->prefix);
640         reader_free_strvalued(reader, &ns->uri);
641         reader_free(reader, ns);
642     }
643 
644     if (!list_empty(&reader->nsdef)) {
645         ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
646         if (ns->element == element) {
647             list_remove(&ns->entry);
648             reader_free_strvalued(reader, &ns->prefix);
649             reader_free_strvalued(reader, &ns->uri);
650             reader_free(reader, ns);
651         }
652     }
653 }
654 
655 static void reader_pop_element(xmlreader *reader)
656 {
657     struct element *element;
658 
659     if (list_empty(&reader->elements))
660         return;
661 
662     element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
663     list_remove(&element->entry);
664 
665     reader_pop_ns_nodes(reader, element);
666     reader_free_element(reader, element);
667 
668     /* It was a root element, the rest is expected as Misc */
669     if (list_empty(&reader->elements))
670         reader->instate = XmlReadInState_MiscEnd;
671 }
672 
673 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
674    means node value is to be determined. */
675 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
676 {
677     strval *v = &reader->strvalues[type];
678 
679     reader_free_strvalue(reader, type);
680     if (!value)
681     {
682         v->str = NULL;
683         v->start = 0;
684         v->len = 0;
685         return;
686     }
687 
688     if (value->str == strval_empty.str)
689         *v = *value;
690     else
691     {
692         if (type == StringValue_Value)
693         {
694             /* defer allocation for value string */
695             v->str = NULL;
696             v->start = value->start;
697             v->len = value->len;
698         }
699         else
700         {
701             v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
702             memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
703             v->str[value->len] = 0;
704             v->len = value->len;
705         }
706     }
707 }
708 
709 static inline int is_reader_pending(xmlreader *reader)
710 {
711     return reader->input->pending;
712 }
713 
714 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
715 {
716     const int initial_len = 0x2000;
717     buffer->data = readerinput_alloc(input, initial_len);
718     if (!buffer->data) return E_OUTOFMEMORY;
719 
720     memset(buffer->data, 0, 4);
721     buffer->cur = 0;
722     buffer->allocated = initial_len;
723     buffer->written = 0;
724     buffer->prev_cr = FALSE;
725 
726     return S_OK;
727 }
728 
729 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
730 {
731     readerinput_free(input, buffer->data);
732 }
733 
734 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
735 {
736     if (encoding == XmlEncoding_Unknown)
737     {
738         FIXME("unsupported encoding %d\n", encoding);
739         return E_NOTIMPL;
740     }
741 
742     *cp = xml_encoding_map[encoding].cp;
743 
744     return S_OK;
745 }
746 
747 xml_encoding parse_encoding_name(const WCHAR *name, int len)
748 {
749     int min, max, n, c;
750 
751     if (!name) return XmlEncoding_Unknown;
752 
753     min = 0;
754     max = ARRAY_SIZE(xml_encoding_map) - 1;
755 
756     while (min <= max)
757     {
758         n = (min+max)/2;
759 
760         if (len != -1)
761             c = _wcsnicmp(xml_encoding_map[n].name, name, len);
762         else
763             c = wcsicmp(xml_encoding_map[n].name, name);
764         if (!c)
765             return xml_encoding_map[n].enc;
766 
767         if (c > 0)
768             max = n-1;
769         else
770             min = n+1;
771     }
772 
773     return XmlEncoding_Unknown;
774 }
775 
776 static HRESULT alloc_input_buffer(xmlreaderinput *input)
777 {
778     input_buffer *buffer;
779     HRESULT hr;
780 
781     input->buffer = NULL;
782 
783     buffer = readerinput_alloc(input, sizeof(*buffer));
784     if (!buffer) return E_OUTOFMEMORY;
785 
786     buffer->input = input;
787     buffer->code_page = ~0; /* code page is unknown at this point */
788     hr = init_encoded_buffer(input, &buffer->utf16);
789     if (hr != S_OK) {
790         readerinput_free(input, buffer);
791         return hr;
792     }
793 
794     hr = init_encoded_buffer(input, &buffer->encoded);
795     if (hr != S_OK) {
796         free_encoded_buffer(input, &buffer->utf16);
797         readerinput_free(input, buffer);
798         return hr;
799     }
800 
801     input->buffer = buffer;
802     return S_OK;
803 }
804 
805 static void free_input_buffer(input_buffer *buffer)
806 {
807     free_encoded_buffer(buffer->input, &buffer->encoded);
808     free_encoded_buffer(buffer->input, &buffer->utf16);
809     readerinput_free(buffer->input, buffer);
810 }
811 
812 static void readerinput_release_stream(xmlreaderinput *readerinput)
813 {
814     if (readerinput->stream) {
815         ISequentialStream_Release(readerinput->stream);
816         readerinput->stream = NULL;
817     }
818 }
819 
820 /* Queries already stored interface for IStream/ISequentialStream.
821    Interface supplied on creation will be overwritten */
822 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
823 {
824     HRESULT hr;
825 
826     readerinput_release_stream(readerinput);
827     hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
828     if (hr != S_OK)
829         hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
830 
831     return hr;
832 }
833 
834 /* reads a chunk to raw buffer */
835 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
836 {
837     encoded_buffer *buffer = &readerinput->buffer->encoded;
838     /* to make sure aligned length won't exceed allocated length */
839     ULONG len = buffer->allocated - buffer->written - 4;
840     ULONG read;
841     HRESULT hr;
842 
843     /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
844        variable width encodings like UTF-8 */
845     len = (len + 3) & ~3;
846     /* try to use allocated space or grow */
847     if (buffer->allocated - buffer->written < len)
848     {
849         buffer->allocated *= 2;
850         buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
851         len = buffer->allocated - buffer->written;
852     }
853 
854     read = 0;
855     hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
856     TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
857     readerinput->pending = hr == E_PENDING;
858     if (FAILED(hr)) return hr;
859     buffer->written += read;
860 
861     return hr;
862 }
863 
864 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
865 static void readerinput_grow(xmlreaderinput *readerinput, int length)
866 {
867     encoded_buffer *buffer = &readerinput->buffer->utf16;
868 
869     length *= sizeof(WCHAR);
870     /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
871     if (buffer->allocated < buffer->written + length + 4)
872     {
873         int grown_size = max(2*buffer->allocated, buffer->allocated + length);
874         buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
875         buffer->allocated = grown_size;
876     }
877 }
878 
879 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
880 {
881     static const char startA[] = {'<','?'};
882     static const char commentA[] = {'<','!'};
883     encoded_buffer *buffer = &readerinput->buffer->encoded;
884     unsigned char *ptr = (unsigned char*)buffer->data;
885 
886     return !memcmp(buffer->data, startA, sizeof(startA)) ||
887            !memcmp(buffer->data, commentA, sizeof(commentA)) ||
888            /* test start byte */
889            (ptr[0] == '<' &&
890             (
891              (ptr[1] && (ptr[1] <= 0x7f)) ||
892              (buffer->data[1] >> 5) == 0x6  || /* 2 bytes */
893              (buffer->data[1] >> 4) == 0xe  || /* 3 bytes */
894              (buffer->data[1] >> 3) == 0x1e)   /* 4 bytes */
895            );
896 }
897 
898 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
899 {
900     encoded_buffer *buffer = &readerinput->buffer->encoded;
901     static const char utf8bom[] = {0xef,0xbb,0xbf};
902     static const char utf16lebom[] = {0xff,0xfe};
903     WCHAR *ptrW;
904 
905     *enc = XmlEncoding_Unknown;
906 
907     if (buffer->written <= 3)
908     {
909         HRESULT hr = readerinput_growraw(readerinput);
910         if (FAILED(hr)) return hr;
911         if (buffer->written < 3) return MX_E_INPUTEND;
912     }
913 
914     ptrW = (WCHAR *)buffer->data;
915     /* try start symbols if we have enough data to do that, input buffer should contain
916        first chunk already */
917     if (readerinput_is_utf8(readerinput))
918         *enc = XmlEncoding_UTF8;
919     else if (*ptrW == '<')
920     {
921         ptrW++;
922         if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW))
923             *enc = XmlEncoding_UTF16;
924     }
925     /* try with BOM now */
926     else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
927     {
928         buffer->cur += sizeof(utf8bom);
929         *enc = XmlEncoding_UTF8;
930     }
931     else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
932     {
933         buffer->cur += sizeof(utf16lebom);
934         *enc = XmlEncoding_UTF16;
935     }
936 
937     return S_OK;
938 }
939 
940 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
941 {
942     encoded_buffer *buffer = &readerinput->buffer->encoded;
943     int len = buffer->written;
944 
945     /* complete single byte char */
946     if (!(buffer->data[len-1] & 0x80)) return len;
947 
948     /* find start byte of multibyte char */
949     while (--len && !(buffer->data[len] & 0xc0))
950         ;
951 
952     return len;
953 }
954 
955 /* Returns byte length of complete char sequence for buffer code page,
956    it's relative to current buffer position which is currently used for BOM handling
957    only. */
958 static int readerinput_get_convlen(xmlreaderinput *readerinput)
959 {
960     encoded_buffer *buffer = &readerinput->buffer->encoded;
961     int len;
962 
963     if (readerinput->buffer->code_page == CP_UTF8)
964         len = readerinput_get_utf8_convlen(readerinput);
965     else
966         len = buffer->written;
967 
968     TRACE("%d\n", len - buffer->cur);
969     return len - buffer->cur;
970 }
971 
972 /* It's possible that raw buffer has some leftovers from last conversion - some char
973    sequence that doesn't represent a full code point. Length argument should be calculated with
974    readerinput_get_convlen(), if it's -1 it will be calculated here. */
975 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
976 {
977     encoded_buffer *buffer = &readerinput->buffer->encoded;
978 
979     if (len == -1)
980         len = readerinput_get_convlen(readerinput);
981 
982     memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
983     /* everything below cur is lost too */
984     buffer->written -= len + buffer->cur;
985     /* after this point we don't need cur offset really,
986        it's used only to mark where actual data begins when first chunk is read */
987     buffer->cur = 0;
988 }
989 
990 static void fixup_buffer_cr(encoded_buffer *buffer, int off)
991 {
992     BOOL prev_cr = buffer->prev_cr;
993     const WCHAR *src;
994     WCHAR *dest;
995 
996     src = dest = (WCHAR*)buffer->data + off;
997     while ((const char*)src < buffer->data + buffer->written)
998     {
999         if (*src == '\r')
1000         {
1001             *dest++ = '\n';
1002             src++;
1003             prev_cr = TRUE;
1004             continue;
1005         }
1006         if(prev_cr && *src == '\n')
1007             src++;
1008         else
1009             *dest++ = *src++;
1010         prev_cr = FALSE;
1011     }
1012 
1013     buffer->written = (char*)dest - buffer->data;
1014     buffer->prev_cr = prev_cr;
1015     *dest = 0;
1016 }
1017 
1018 /* note that raw buffer content is kept */
1019 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
1020 {
1021     encoded_buffer *src = &readerinput->buffer->encoded;
1022     encoded_buffer *dest = &readerinput->buffer->utf16;
1023     int len, dest_len;
1024     UINT cp = ~0u;
1025     HRESULT hr;
1026     WCHAR *ptr;
1027 
1028     hr = get_code_page(enc, &cp);
1029     if (FAILED(hr)) return;
1030 
1031     readerinput->buffer->code_page = cp;
1032     len = readerinput_get_convlen(readerinput);
1033 
1034     TRACE("switching to cp %d\n", cp);
1035 
1036     /* just copy in this case */
1037     if (enc == XmlEncoding_UTF16)
1038     {
1039         readerinput_grow(readerinput, len);
1040         memcpy(dest->data, src->data + src->cur, len);
1041         dest->written += len*sizeof(WCHAR);
1042     }
1043     else
1044     {
1045         dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1046         readerinput_grow(readerinput, dest_len);
1047         ptr = (WCHAR*)dest->data;
1048         MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1049         ptr[dest_len] = 0;
1050         dest->written += dest_len*sizeof(WCHAR);
1051     }
1052 
1053     fixup_buffer_cr(dest, 0);
1054 }
1055 
1056 /* shrinks parsed data a buffer begins with */
1057 static void reader_shrink(xmlreader *reader)
1058 {
1059     encoded_buffer *buffer = &reader->input->buffer->utf16;
1060 
1061     /* avoid to move too often using threshold shrink length */
1062     if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
1063     {
1064         buffer->written -= buffer->cur*sizeof(WCHAR);
1065         memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
1066         buffer->cur = 0;
1067         *(WCHAR*)&buffer->data[buffer->written] = 0;
1068     }
1069 }
1070 
1071 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1072    It won't attempt to shrink but will grow destination buffer if needed */
1073 static HRESULT reader_more(xmlreader *reader)
1074 {
1075     xmlreaderinput *readerinput = reader->input;
1076     encoded_buffer *src = &readerinput->buffer->encoded;
1077     encoded_buffer *dest = &readerinput->buffer->utf16;
1078     UINT cp = readerinput->buffer->code_page;
1079     int len, dest_len, prev_len;
1080     HRESULT hr;
1081     WCHAR *ptr;
1082 
1083     /* get some raw data from stream first */
1084     hr = readerinput_growraw(readerinput);
1085     len = readerinput_get_convlen(readerinput);
1086     prev_len = dest->written / sizeof(WCHAR);
1087 
1088     /* just copy for UTF-16 case */
1089     if (cp == 1200)
1090     {
1091         readerinput_grow(readerinput, len);
1092         memcpy(dest->data + dest->written, src->data + src->cur, len);
1093         dest->written += len*sizeof(WCHAR);
1094     }
1095     else
1096     {
1097         dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1098         readerinput_grow(readerinput, dest_len);
1099         ptr = (WCHAR*)(dest->data + dest->written);
1100         MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1101         ptr[dest_len] = 0;
1102         dest->written += dest_len*sizeof(WCHAR);
1103         /* get rid of processed data */
1104         readerinput_shrinkraw(readerinput, len);
1105     }
1106 
1107     fixup_buffer_cr(dest, prev_len);
1108     return hr;
1109 }
1110 
1111 static inline UINT reader_get_cur(xmlreader *reader)
1112 {
1113     return reader->input->buffer->utf16.cur;
1114 }
1115 
1116 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1117 {
1118     encoded_buffer *buffer = &reader->input->buffer->utf16;
1119     WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1120     if (!*ptr) reader_more(reader);
1121     return (WCHAR*)buffer->data + buffer->cur;
1122 }
1123 
1124 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1125 {
1126     int i=0;
1127     const WCHAR *ptr = reader_get_ptr(reader);
1128     while (str[i])
1129     {
1130         if (!ptr[i])
1131         {
1132             reader_more(reader);
1133             ptr = reader_get_ptr(reader);
1134         }
1135         if (str[i] != ptr[i])
1136             return ptr[i] - str[i];
1137         i++;
1138     }
1139     return 0;
1140 }
1141 
1142 static void reader_update_position(xmlreader *reader, WCHAR ch)
1143 {
1144     if (ch == '\r')
1145         reader->position.line_position = 1;
1146     else if (ch == '\n')
1147     {
1148         reader->position.line_number++;
1149         reader->position.line_position = 1;
1150     }
1151     else
1152         reader->position.line_position++;
1153 }
1154 
1155 /* moves cursor n WCHARs forward */
1156 static void reader_skipn(xmlreader *reader, int n)
1157 {
1158     encoded_buffer *buffer = &reader->input->buffer->utf16;
1159     const WCHAR *ptr;
1160 
1161     while (*(ptr = reader_get_ptr(reader)) && n--)
1162     {
1163         reader_update_position(reader, *ptr);
1164         buffer->cur++;
1165     }
1166 }
1167 
1168 static inline BOOL is_wchar_space(WCHAR ch)
1169 {
1170     return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1171 }
1172 
1173 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1174 static int reader_skipspaces(xmlreader *reader)
1175 {
1176     const WCHAR *ptr = reader_get_ptr(reader);
1177     UINT start = reader_get_cur(reader);
1178 
1179     while (is_wchar_space(*ptr))
1180     {
1181         reader_skipn(reader, 1);
1182         ptr = reader_get_ptr(reader);
1183     }
1184 
1185     return reader_get_cur(reader) - start;
1186 }
1187 
1188 /* [26] VersionNum ::= '1.' [0-9]+ */
1189 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1190 {
1191     static const WCHAR onedotW[] = {'1','.',0};
1192     WCHAR *ptr, *ptr2;
1193     UINT start;
1194 
1195     if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1196 
1197     start = reader_get_cur(reader);
1198     /* skip "1." */
1199     reader_skipn(reader, 2);
1200 
1201     ptr2 = ptr = reader_get_ptr(reader);
1202     while (*ptr >= '0' && *ptr <= '9')
1203     {
1204         reader_skipn(reader, 1);
1205         ptr = reader_get_ptr(reader);
1206     }
1207 
1208     if (ptr2 == ptr) return WC_E_DIGIT;
1209     reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1210     TRACE("version=%s\n", debug_strval(reader, val));
1211     return S_OK;
1212 }
1213 
1214 /* [25] Eq ::= S? '=' S? */
1215 static HRESULT reader_parse_eq(xmlreader *reader)
1216 {
1217     static const WCHAR eqW[] = {'=',0};
1218     reader_skipspaces(reader);
1219     if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1220     /* skip '=' */
1221     reader_skipn(reader, 1);
1222     reader_skipspaces(reader);
1223     return S_OK;
1224 }
1225 
1226 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1227 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1228 {
1229     static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1230     struct reader_position position;
1231     strval val, name;
1232     HRESULT hr;
1233 
1234     if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1235 
1236     position = reader->position;
1237     if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1238     reader_init_strvalue(reader_get_cur(reader), 7, &name);
1239     /* skip 'version' */
1240     reader_skipn(reader, 7);
1241 
1242     hr = reader_parse_eq(reader);
1243     if (FAILED(hr)) return hr;
1244 
1245     if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1246         return WC_E_QUOTE;
1247     /* skip "'"|'"' */
1248     reader_skipn(reader, 1);
1249 
1250     hr = reader_parse_versionnum(reader, &val);
1251     if (FAILED(hr)) return hr;
1252 
1253     if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1254         return WC_E_QUOTE;
1255 
1256     /* skip "'"|'"' */
1257     reader_skipn(reader, 1);
1258 
1259     return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1260 }
1261 
1262 /* ([A-Za-z0-9._] | '-') */
1263 static inline BOOL is_wchar_encname(WCHAR ch)
1264 {
1265     return ((ch >= 'A' && ch <= 'Z') ||
1266             (ch >= 'a' && ch <= 'z') ||
1267             (ch >= '0' && ch <= '9') ||
1268             (ch == '.') || (ch == '_') ||
1269             (ch == '-'));
1270 }
1271 
1272 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1273 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1274 {
1275     WCHAR *start = reader_get_ptr(reader), *ptr;
1276     xml_encoding enc;
1277     int len;
1278 
1279     if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1280         return WC_E_ENCNAME;
1281 
1282     val->start = reader_get_cur(reader);
1283 
1284     ptr = start;
1285     while (is_wchar_encname(*++ptr))
1286         ;
1287 
1288     len = ptr - start;
1289     enc = parse_encoding_name(start, len);
1290     TRACE("encoding name %s\n", debugstr_wn(start, len));
1291     val->str = start;
1292     val->len = len;
1293 
1294     if (enc == XmlEncoding_Unknown)
1295         return WC_E_ENCNAME;
1296 
1297     /* skip encoding name */
1298     reader_skipn(reader, len);
1299     return S_OK;
1300 }
1301 
1302 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1303 static HRESULT reader_parse_encdecl(xmlreader *reader)
1304 {
1305     static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1306     struct reader_position position;
1307     strval name, val;
1308     HRESULT hr;
1309 
1310     if (!reader_skipspaces(reader)) return S_FALSE;
1311 
1312     position = reader->position;
1313     if (reader_cmp(reader, encodingW)) return S_FALSE;
1314     name.str = reader_get_ptr(reader);
1315     name.start = reader_get_cur(reader);
1316     name.len = 8;
1317     /* skip 'encoding' */
1318     reader_skipn(reader, 8);
1319 
1320     hr = reader_parse_eq(reader);
1321     if (FAILED(hr)) return hr;
1322 
1323     if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1324         return WC_E_QUOTE;
1325     /* skip "'"|'"' */
1326     reader_skipn(reader, 1);
1327 
1328     hr = reader_parse_encname(reader, &val);
1329     if (FAILED(hr)) return hr;
1330 
1331     if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1332         return WC_E_QUOTE;
1333 
1334     /* skip "'"|'"' */
1335     reader_skipn(reader, 1);
1336 
1337     return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1338 }
1339 
1340 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1341 static HRESULT reader_parse_sddecl(xmlreader *reader)
1342 {
1343     static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1344     static const WCHAR yesW[] = {'y','e','s',0};
1345     static const WCHAR noW[] = {'n','o',0};
1346     struct reader_position position;
1347     strval name, val;
1348     UINT start;
1349     HRESULT hr;
1350 
1351     if (!reader_skipspaces(reader)) return S_FALSE;
1352 
1353     position = reader->position;
1354     if (reader_cmp(reader, standaloneW)) return S_FALSE;
1355     reader_init_strvalue(reader_get_cur(reader), 10, &name);
1356     /* skip 'standalone' */
1357     reader_skipn(reader, 10);
1358 
1359     hr = reader_parse_eq(reader);
1360     if (FAILED(hr)) return hr;
1361 
1362     if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1363         return WC_E_QUOTE;
1364     /* skip "'"|'"' */
1365     reader_skipn(reader, 1);
1366 
1367     if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1368         return WC_E_XMLDECL;
1369 
1370     start = reader_get_cur(reader);
1371     /* skip 'yes'|'no' */
1372     reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1373     reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1374     TRACE("standalone=%s\n", debug_strval(reader, &val));
1375 
1376     if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1377         return WC_E_QUOTE;
1378     /* skip "'"|'"' */
1379     reader_skipn(reader, 1);
1380 
1381     return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1382 }
1383 
1384 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1385 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1386 {
1387     static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1388     static const WCHAR declcloseW[] = {'?','>',0};
1389     struct reader_position position;
1390     HRESULT hr;
1391 
1392     /* check if we have "<?xml " */
1393     if (reader_cmp(reader, xmldeclW))
1394         return S_FALSE;
1395 
1396     reader_skipn(reader, 2);
1397     position = reader->position;
1398     reader_skipn(reader, 3);
1399     hr = reader_parse_versioninfo(reader);
1400     if (FAILED(hr))
1401         return hr;
1402 
1403     hr = reader_parse_encdecl(reader);
1404     if (FAILED(hr))
1405         return hr;
1406 
1407     hr = reader_parse_sddecl(reader);
1408     if (FAILED(hr))
1409         return hr;
1410 
1411     reader_skipspaces(reader);
1412     if (reader_cmp(reader, declcloseW))
1413         return WC_E_XMLDECL;
1414 
1415     /* skip '?>' */
1416     reader_skipn(reader, 2);
1417 
1418     reader->nodetype = XmlNodeType_XmlDeclaration;
1419     reader->empty_element.position = position;
1420     reader_set_strvalue(reader, StringValue_LocalName, &strval_xml);
1421     reader_set_strvalue(reader, StringValue_QualifiedName, &strval_xml);
1422 
1423     return S_OK;
1424 }
1425 
1426 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1427 static HRESULT reader_parse_comment(xmlreader *reader)
1428 {
1429     WCHAR *ptr;
1430     UINT start;
1431 
1432     if (reader->resumestate == XmlReadResumeState_Comment)
1433     {
1434         start = reader->resume[XmlReadResume_Body];
1435         ptr = reader_get_ptr(reader);
1436     }
1437     else
1438     {
1439         /* skip '<!--' */
1440         reader_skipn(reader, 4);
1441         reader_shrink(reader);
1442         ptr = reader_get_ptr(reader);
1443         start = reader_get_cur(reader);
1444         reader->nodetype = XmlNodeType_Comment;
1445         reader->resume[XmlReadResume_Body] = start;
1446         reader->resumestate = XmlReadResumeState_Comment;
1447         reader_set_strvalue(reader, StringValue_Value, NULL);
1448     }
1449 
1450     /* will exit when there's no more data, it won't attempt to
1451        read more from stream */
1452     while (*ptr)
1453     {
1454         if (ptr[0] == '-')
1455         {
1456             if (ptr[1] == '-')
1457             {
1458                 if (ptr[2] == '>')
1459                 {
1460                     strval value;
1461 
1462                     reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1463                     TRACE("%s\n", debug_strval(reader, &value));
1464 
1465                     /* skip rest of markup '->' */
1466                     reader_skipn(reader, 3);
1467 
1468                     reader_set_strvalue(reader, StringValue_Value, &value);
1469                     reader->resume[XmlReadResume_Body] = 0;
1470                     reader->resumestate = XmlReadResumeState_Initial;
1471                     return S_OK;
1472                 }
1473                 else
1474                     return WC_E_COMMENT;
1475             }
1476         }
1477 
1478         reader_skipn(reader, 1);
1479         ptr++;
1480     }
1481 
1482     return S_OK;
1483 }
1484 
1485 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1486 static inline BOOL is_char(WCHAR ch)
1487 {
1488     return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1489            (ch >= 0x20 && ch <= 0xd7ff) ||
1490            (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1491            (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1492            (ch >= 0xe000 && ch <= 0xfffd);
1493 }
1494 
1495 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1496 BOOL is_pubchar(WCHAR ch)
1497 {
1498     return (ch == ' ') ||
1499            (ch >= 'a' && ch <= 'z') ||
1500            (ch >= 'A' && ch <= 'Z') ||
1501            (ch >= '0' && ch <= '9') ||
1502            (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1503            (ch == '=') || (ch == '?') ||
1504            (ch == '@') || (ch == '!') ||
1505            (ch >= '#' && ch <= '%') || /* #$% */
1506            (ch == '_') || (ch == '\r') || (ch == '\n');
1507 }
1508 
1509 BOOL is_namestartchar(WCHAR ch)
1510 {
1511     return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1512            (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1513            (ch >= 0xc0   && ch <= 0xd6)   ||
1514            (ch >= 0xd8   && ch <= 0xf6)   ||
1515            (ch >= 0xf8   && ch <= 0x2ff)  ||
1516            (ch >= 0x370  && ch <= 0x37d)  ||
1517            (ch >= 0x37f  && ch <= 0x1fff) ||
1518            (ch >= 0x200c && ch <= 0x200d) ||
1519            (ch >= 0x2070 && ch <= 0x218f) ||
1520            (ch >= 0x2c00 && ch <= 0x2fef) ||
1521            (ch >= 0x3001 && ch <= 0xd7ff) ||
1522            (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1523            (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1524            (ch >= 0xf900 && ch <= 0xfdcf) ||
1525            (ch >= 0xfdf0 && ch <= 0xfffd);
1526 }
1527 
1528 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1529 BOOL is_ncnamechar(WCHAR ch)
1530 {
1531     return (ch >= 'A' && ch <= 'Z') ||
1532            (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1533            (ch == '-') || (ch == '.') ||
1534            (ch >= '0'    && ch <= '9')    ||
1535            (ch == 0xb7)                   ||
1536            (ch >= 0xc0   && ch <= 0xd6)   ||
1537            (ch >= 0xd8   && ch <= 0xf6)   ||
1538            (ch >= 0xf8   && ch <= 0x2ff)  ||
1539            (ch >= 0x300  && ch <= 0x36f)  ||
1540            (ch >= 0x370  && ch <= 0x37d)  ||
1541            (ch >= 0x37f  && ch <= 0x1fff) ||
1542            (ch >= 0x200c && ch <= 0x200d) ||
1543            (ch >= 0x203f && ch <= 0x2040) ||
1544            (ch >= 0x2070 && ch <= 0x218f) ||
1545            (ch >= 0x2c00 && ch <= 0x2fef) ||
1546            (ch >= 0x3001 && ch <= 0xd7ff) ||
1547            (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1548            (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1549            (ch >= 0xf900 && ch <= 0xfdcf) ||
1550            (ch >= 0xfdf0 && ch <= 0xfffd);
1551 }
1552 
1553 BOOL is_namechar(WCHAR ch)
1554 {
1555     return (ch == ':') || is_ncnamechar(ch);
1556 }
1557 
1558 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1559 {
1560     /* When we're on attribute always return attribute type, container node type is kept.
1561        Note that container is not necessarily an element, and attribute doesn't mean it's
1562        an attribute in XML spec terms. */
1563     return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1564 }
1565 
1566 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1567                             [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1568                             [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1569    [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1570    [5]  Name     ::= NameStartChar (NameChar)* */
1571 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1572 {
1573     WCHAR *ptr;
1574     UINT start;
1575 
1576     if (reader->resume[XmlReadResume_Name])
1577     {
1578         start = reader->resume[XmlReadResume_Name];
1579         ptr = reader_get_ptr(reader);
1580     }
1581     else
1582     {
1583         ptr = reader_get_ptr(reader);
1584         start = reader_get_cur(reader);
1585         if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1586     }
1587 
1588     while (is_namechar(*ptr))
1589     {
1590         reader_skipn(reader, 1);
1591         ptr = reader_get_ptr(reader);
1592     }
1593 
1594     if (is_reader_pending(reader))
1595     {
1596         reader->resume[XmlReadResume_Name] = start;
1597         return E_PENDING;
1598     }
1599     else
1600         reader->resume[XmlReadResume_Name] = 0;
1601 
1602     reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1603     TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1604 
1605     return S_OK;
1606 }
1607 
1608 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1609 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1610 {
1611     static const WCHAR xmlW[] = {'x','m','l'};
1612     static const strval xmlval = { (WCHAR*)xmlW, 3 };
1613     strval name;
1614     WCHAR *ptr;
1615     HRESULT hr;
1616     UINT i;
1617 
1618     hr = reader_parse_name(reader, &name);
1619     if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1620 
1621     /* now that we got name check for illegal content */
1622     if (strval_eq(reader, &name, &xmlval))
1623         return WC_E_LEADINGXML;
1624 
1625     /* PITarget can't be a qualified name */
1626     ptr = reader_get_strptr(reader, &name);
1627     for (i = 0; i < name.len; i++)
1628         if (ptr[i] == ':')
1629             return i ? NC_E_NAMECOLON : WC_E_PI;
1630 
1631     TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1632     *target = name;
1633     return S_OK;
1634 }
1635 
1636 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1637 static HRESULT reader_parse_pi(xmlreader *reader)
1638 {
1639     strval target;
1640     WCHAR *ptr;
1641     UINT start;
1642     HRESULT hr;
1643 
1644     switch (reader->resumestate)
1645     {
1646     case XmlReadResumeState_Initial:
1647         /* skip '<?' */
1648         reader_skipn(reader, 2);
1649         reader_shrink(reader);
1650         reader->resumestate = XmlReadResumeState_PITarget;
1651     case XmlReadResumeState_PITarget:
1652         hr = reader_parse_pitarget(reader, &target);
1653         if (FAILED(hr)) return hr;
1654         reader_set_strvalue(reader, StringValue_LocalName, &target);
1655         reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1656         reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1657         reader->resumestate = XmlReadResumeState_PIBody;
1658         reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1659     default:
1660         ;
1661     }
1662 
1663     start = reader->resume[XmlReadResume_Body];
1664     ptr = reader_get_ptr(reader);
1665     while (*ptr)
1666     {
1667         if (ptr[0] == '?')
1668         {
1669             if (ptr[1] == '>')
1670             {
1671                 UINT cur = reader_get_cur(reader);
1672                 strval value;
1673 
1674                 /* strip all leading whitespace chars */
1675                 while (start < cur)
1676                 {
1677                     ptr = reader_get_ptr2(reader, start);
1678                     if (!is_wchar_space(*ptr)) break;
1679                     start++;
1680                 }
1681 
1682                 reader_init_strvalue(start, cur-start, &value);
1683 
1684                 /* skip '?>' */
1685                 reader_skipn(reader, 2);
1686                 TRACE("%s\n", debug_strval(reader, &value));
1687                 reader->nodetype = XmlNodeType_ProcessingInstruction;
1688                 reader->resumestate = XmlReadResumeState_Initial;
1689                 reader->resume[XmlReadResume_Body] = 0;
1690                 reader_set_strvalue(reader, StringValue_Value, &value);
1691                 return S_OK;
1692             }
1693         }
1694 
1695         reader_skipn(reader, 1);
1696         ptr = reader_get_ptr(reader);
1697     }
1698 
1699     return S_OK;
1700 }
1701 
1702 /* This one is used to parse significant whitespace nodes, like in Misc production */
1703 static HRESULT reader_parse_whitespace(xmlreader *reader)
1704 {
1705     switch (reader->resumestate)
1706     {
1707     case XmlReadResumeState_Initial:
1708         reader_shrink(reader);
1709         reader->resumestate = XmlReadResumeState_Whitespace;
1710         reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1711         reader->nodetype = XmlNodeType_Whitespace;
1712         reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1713         reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1714         reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1715         /* fallthrough */
1716     case XmlReadResumeState_Whitespace:
1717     {
1718         strval value;
1719         UINT start;
1720 
1721         reader_skipspaces(reader);
1722         if (is_reader_pending(reader)) return S_OK;
1723 
1724         start = reader->resume[XmlReadResume_Body];
1725         reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1726         reader_set_strvalue(reader, StringValue_Value, &value);
1727         TRACE("%s\n", debug_strval(reader, &value));
1728         reader->resumestate = XmlReadResumeState_Initial;
1729     }
1730     default:
1731         ;
1732     }
1733 
1734     return S_OK;
1735 }
1736 
1737 /* [27] Misc ::= Comment | PI | S */
1738 static HRESULT reader_parse_misc(xmlreader *reader)
1739 {
1740     HRESULT hr = S_FALSE;
1741 
1742     if (reader->resumestate != XmlReadResumeState_Initial)
1743     {
1744         hr = reader_more(reader);
1745         if (FAILED(hr)) return hr;
1746 
1747         /* finish current node */
1748         switch (reader->resumestate)
1749         {
1750         case XmlReadResumeState_PITarget:
1751         case XmlReadResumeState_PIBody:
1752             return reader_parse_pi(reader);
1753         case XmlReadResumeState_Comment:
1754             return reader_parse_comment(reader);
1755         case XmlReadResumeState_Whitespace:
1756             return reader_parse_whitespace(reader);
1757         default:
1758             ERR("unknown resume state %d\n", reader->resumestate);
1759         }
1760     }
1761 
1762     while (1)
1763     {
1764         const WCHAR *cur = reader_get_ptr(reader);
1765 
1766         if (is_wchar_space(*cur))
1767             hr = reader_parse_whitespace(reader);
1768         else if (!reader_cmp(reader, commentW))
1769             hr = reader_parse_comment(reader);
1770         else if (!reader_cmp(reader, piW))
1771             hr = reader_parse_pi(reader);
1772         else
1773             break;
1774 
1775         if (hr != S_FALSE) return hr;
1776     }
1777 
1778     return hr;
1779 }
1780 
1781 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1782 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1783 {
1784     WCHAR *cur = reader_get_ptr(reader), quote;
1785     UINT start;
1786 
1787     if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1788 
1789     quote = *cur;
1790     reader_skipn(reader, 1);
1791 
1792     cur = reader_get_ptr(reader);
1793     start = reader_get_cur(reader);
1794     while (is_char(*cur) && *cur != quote)
1795     {
1796         reader_skipn(reader, 1);
1797         cur = reader_get_ptr(reader);
1798     }
1799     reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1800     if (*cur == quote) reader_skipn(reader, 1);
1801 
1802     TRACE("%s\n", debug_strval(reader, literal));
1803     return S_OK;
1804 }
1805 
1806 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1807    [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1808 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1809 {
1810     WCHAR *cur = reader_get_ptr(reader), quote;
1811     UINT start;
1812 
1813     if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1814 
1815     quote = *cur;
1816     reader_skipn(reader, 1);
1817 
1818     start = reader_get_cur(reader);
1819     cur = reader_get_ptr(reader);
1820     while (is_pubchar(*cur) && *cur != quote)
1821     {
1822         reader_skipn(reader, 1);
1823         cur = reader_get_ptr(reader);
1824     }
1825     reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1826     if (*cur == quote) reader_skipn(reader, 1);
1827 
1828     TRACE("%s\n", debug_strval(reader, literal));
1829     return S_OK;
1830 }
1831 
1832 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1833 static HRESULT reader_parse_externalid(xmlreader *reader)
1834 {
1835     static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1836     static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1837     struct reader_position position = reader->position;
1838     strval name, sys;
1839     HRESULT hr;
1840     int cnt;
1841 
1842     if (!reader_cmp(reader, publicW)) {
1843         strval pub;
1844 
1845         /* public id */
1846         reader_skipn(reader, 6);
1847         cnt = reader_skipspaces(reader);
1848         if (!cnt) return WC_E_WHITESPACE;
1849 
1850         hr = reader_parse_pub_literal(reader, &pub);
1851         if (FAILED(hr)) return hr;
1852 
1853         reader_init_cstrvalue(publicW, lstrlenW(publicW), &name);
1854         hr = reader_add_attr(reader, NULL, &name, NULL, &pub, &position, 0);
1855         if (FAILED(hr)) return hr;
1856 
1857         cnt = reader_skipspaces(reader);
1858         if (!cnt) return S_OK;
1859 
1860         /* optional system id */
1861         hr = reader_parse_sys_literal(reader, &sys);
1862         if (FAILED(hr)) return S_OK;
1863 
1864         reader_init_cstrvalue(systemW, lstrlenW(systemW), &name);
1865         hr = reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1866         if (FAILED(hr)) return hr;
1867 
1868         return S_OK;
1869     } else if (!reader_cmp(reader, systemW)) {
1870         /* system id */
1871         reader_skipn(reader, 6);
1872         cnt = reader_skipspaces(reader);
1873         if (!cnt) return WC_E_WHITESPACE;
1874 
1875         hr = reader_parse_sys_literal(reader, &sys);
1876         if (FAILED(hr)) return hr;
1877 
1878         reader_init_cstrvalue(systemW, lstrlenW(systemW), &name);
1879         return reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1880     }
1881 
1882     return S_FALSE;
1883 }
1884 
1885 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1886 static HRESULT reader_parse_dtd(xmlreader *reader)
1887 {
1888     static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1889     strval name;
1890     WCHAR *cur;
1891     HRESULT hr;
1892 
1893     /* check if we have "<!DOCTYPE" */
1894     if (reader_cmp(reader, doctypeW)) return S_FALSE;
1895     reader_shrink(reader);
1896 
1897     /* DTD processing is not allowed by default */
1898     if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1899 
1900     reader_skipn(reader, 9);
1901     if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1902 
1903     /* name */
1904     hr = reader_parse_name(reader, &name);
1905     if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1906 
1907     reader_skipspaces(reader);
1908 
1909     hr = reader_parse_externalid(reader);
1910     if (FAILED(hr)) return hr;
1911 
1912     reader_skipspaces(reader);
1913 
1914     cur = reader_get_ptr(reader);
1915     if (*cur != '>')
1916     {
1917         FIXME("internal subset parsing not implemented\n");
1918         return E_NOTIMPL;
1919     }
1920 
1921     /* skip '>' */
1922     reader_skipn(reader, 1);
1923 
1924     reader->nodetype = XmlNodeType_DocumentType;
1925     reader_set_strvalue(reader, StringValue_LocalName, &name);
1926     reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1927 
1928     return S_OK;
1929 }
1930 
1931 /* [11 NS] LocalPart ::= NCName */
1932 static HRESULT reader_parse_local(xmlreader *reader, strval *local, BOOL check_for_separator)
1933 {
1934     WCHAR *ptr;
1935     UINT start;
1936 
1937     if (reader->resume[XmlReadResume_Local])
1938     {
1939         start = reader->resume[XmlReadResume_Local];
1940         ptr = reader_get_ptr(reader);
1941     }
1942     else
1943     {
1944         ptr = reader_get_ptr(reader);
1945         start = reader_get_cur(reader);
1946     }
1947 
1948     while (is_ncnamechar(*ptr))
1949     {
1950         reader_skipn(reader, 1);
1951         ptr = reader_get_ptr(reader);
1952     }
1953 
1954     if (check_for_separator && *ptr == ':')
1955         return NC_E_QNAMECOLON;
1956 
1957     if (is_reader_pending(reader))
1958     {
1959          reader->resume[XmlReadResume_Local] = start;
1960          return E_PENDING;
1961     }
1962     else
1963          reader->resume[XmlReadResume_Local] = 0;
1964 
1965     reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1966 
1967     return S_OK;
1968 }
1969 
1970 /* [7 NS]  QName ::= PrefixedName | UnprefixedName
1971    [8 NS]  PrefixedName ::= Prefix ':' LocalPart
1972    [9 NS]  UnprefixedName ::= LocalPart
1973    [10 NS] Prefix ::= NCName */
1974 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1975 {
1976     WCHAR *ptr;
1977     UINT start;
1978     HRESULT hr;
1979 
1980     if (reader->resume[XmlReadResume_Name])
1981     {
1982         start = reader->resume[XmlReadResume_Name];
1983         ptr = reader_get_ptr(reader);
1984     }
1985     else
1986     {
1987         ptr = reader_get_ptr(reader);
1988         start = reader_get_cur(reader);
1989         reader->resume[XmlReadResume_Name] = start;
1990         if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1991     }
1992 
1993     if (reader->resume[XmlReadResume_Local])
1994     {
1995         hr = reader_parse_local(reader, local, FALSE);
1996         if (FAILED(hr)) return hr;
1997 
1998         reader_init_strvalue(reader->resume[XmlReadResume_Name],
1999                              local->start - reader->resume[XmlReadResume_Name] - 1,
2000                              prefix);
2001     }
2002     else
2003     {
2004         /* skip prefix part */
2005         while (is_ncnamechar(*ptr))
2006         {
2007             reader_skipn(reader, 1);
2008             ptr = reader_get_ptr(reader);
2009         }
2010 
2011         if (is_reader_pending(reader)) return E_PENDING;
2012 
2013         /* got a qualified name */
2014         if (*ptr == ':')
2015         {
2016             reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
2017 
2018             /* skip ':' */
2019             reader_skipn(reader, 1);
2020             hr = reader_parse_local(reader, local, TRUE);
2021             if (FAILED(hr)) return hr;
2022         }
2023         else
2024         {
2025             reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
2026             reader_init_strvalue(0, 0, prefix);
2027         }
2028     }
2029 
2030     if (prefix->len)
2031         TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
2032     else
2033         TRACE("ncname %s\n", debug_strval(reader, local));
2034 
2035     reader_init_strvalue(prefix->len ? prefix->start : local->start,
2036                         /* count ':' too */
2037                         (prefix->len ? prefix->len + 1 : 0) + local->len,
2038                          qname);
2039 
2040     reader->resume[XmlReadResume_Name] = 0;
2041     reader->resume[XmlReadResume_Local] = 0;
2042 
2043     return S_OK;
2044 }
2045 
2046 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
2047 {
2048     static const WCHAR entltW[]   = {'l','t'};
2049     static const WCHAR entgtW[]   = {'g','t'};
2050     static const WCHAR entampW[]  = {'a','m','p'};
2051     static const WCHAR entaposW[] = {'a','p','o','s'};
2052     static const WCHAR entquotW[] = {'q','u','o','t'};
2053     static const strval lt   = { (WCHAR*)entltW,   2 };
2054     static const strval gt   = { (WCHAR*)entgtW,   2 };
2055     static const strval amp  = { (WCHAR*)entampW,  3 };
2056     static const strval apos = { (WCHAR*)entaposW, 4 };
2057     static const strval quot = { (WCHAR*)entquotW, 4 };
2058     WCHAR *str = reader_get_strptr(reader, name);
2059 
2060     switch (*str)
2061     {
2062     case 'l':
2063         if (strval_eq(reader, name, &lt)) return '<';
2064         break;
2065     case 'g':
2066         if (strval_eq(reader, name, &gt)) return '>';
2067         break;
2068     case 'a':
2069         if (strval_eq(reader, name, &amp))
2070             return '&';
2071         else if (strval_eq(reader, name, &apos))
2072             return '\'';
2073         break;
2074     case 'q':
2075         if (strval_eq(reader, name, &quot)) return '\"';
2076         break;
2077     default:
2078         ;
2079     }
2080 
2081     return 0;
2082 }
2083 
2084 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2085    [67] Reference ::= EntityRef | CharRef
2086    [68] EntityRef ::= '&' Name ';' */
2087 static HRESULT reader_parse_reference(xmlreader *reader)
2088 {
2089     encoded_buffer *buffer = &reader->input->buffer->utf16;
2090     WCHAR *start = reader_get_ptr(reader), *ptr;
2091     UINT cur = reader_get_cur(reader);
2092     WCHAR ch = 0;
2093     int len;
2094 
2095     /* skip '&' */
2096     reader_skipn(reader, 1);
2097     ptr = reader_get_ptr(reader);
2098 
2099     if (*ptr == '#')
2100     {
2101         reader_skipn(reader, 1);
2102         ptr = reader_get_ptr(reader);
2103 
2104         /* hex char or decimal */
2105         if (*ptr == 'x')
2106         {
2107             reader_skipn(reader, 1);
2108             ptr = reader_get_ptr(reader);
2109 
2110             while (*ptr != ';')
2111             {
2112                 if ((*ptr >= '0' && *ptr <= '9'))
2113                     ch = ch*16 + *ptr - '0';
2114                 else if ((*ptr >= 'a' && *ptr <= 'f'))
2115                     ch = ch*16 + *ptr - 'a' + 10;
2116                 else if ((*ptr >= 'A' && *ptr <= 'F'))
2117                     ch = ch*16 + *ptr - 'A' + 10;
2118                 else
2119                     return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2120                 reader_skipn(reader, 1);
2121                 ptr = reader_get_ptr(reader);
2122             }
2123         }
2124         else
2125         {
2126             while (*ptr != ';')
2127             {
2128                 if ((*ptr >= '0' && *ptr <= '9'))
2129                 {
2130                     ch = ch*10 + *ptr - '0';
2131                     reader_skipn(reader, 1);
2132                     ptr = reader_get_ptr(reader);
2133                 }
2134                 else
2135                     return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2136             }
2137         }
2138 
2139         if (!is_char(ch)) return WC_E_XMLCHARACTER;
2140 
2141         /* normalize */
2142         if (is_wchar_space(ch)) ch = ' ';
2143 
2144         ptr = reader_get_ptr(reader);
2145         start = reader_get_ptr2(reader, cur);
2146         len = buffer->written - ((char *)ptr - buffer->data);
2147         memmove(start + 1, ptr + 1, len);
2148 
2149         buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR);
2150         buffer->cur = cur + 1;
2151 
2152         *start = ch;
2153     }
2154     else
2155     {
2156         strval name;
2157         HRESULT hr;
2158 
2159         hr = reader_parse_name(reader, &name);
2160         if (FAILED(hr)) return hr;
2161 
2162         ptr = reader_get_ptr(reader);
2163         if (*ptr != ';') return WC_E_SEMICOLON;
2164 
2165         /* predefined entities resolve to a single character */
2166         ch = get_predefined_entity(reader, &name);
2167         if (ch)
2168         {
2169             len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2170             memmove(start+1, ptr+1, len);
2171             buffer->cur = cur + 1;
2172             buffer->written -= (ptr - start) * sizeof(WCHAR);
2173 
2174             *start = ch;
2175         }
2176         else
2177         {
2178             FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2179             return WC_E_UNDECLAREDENTITY;
2180         }
2181 
2182     }
2183 
2184     return S_OK;
2185 }
2186 
2187 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2188 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2189 {
2190     WCHAR *ptr, quote;
2191     UINT start;
2192 
2193     ptr = reader_get_ptr(reader);
2194 
2195     /* skip opening quote */
2196     quote = *ptr;
2197     if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2198     reader_skipn(reader, 1);
2199 
2200     ptr = reader_get_ptr(reader);
2201     start = reader_get_cur(reader);
2202     while (*ptr)
2203     {
2204         if (*ptr == '<') return WC_E_LESSTHAN;
2205 
2206         if (*ptr == quote)
2207         {
2208             reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2209             /* skip closing quote */
2210             reader_skipn(reader, 1);
2211             return S_OK;
2212         }
2213 
2214         if (*ptr == '&')
2215         {
2216             HRESULT hr = reader_parse_reference(reader);
2217             if (FAILED(hr)) return hr;
2218         }
2219         else
2220         {
2221             /* replace all whitespace chars with ' ' */
2222             if (is_wchar_space(*ptr)) *ptr = ' ';
2223             reader_skipn(reader, 1);
2224         }
2225         ptr = reader_get_ptr(reader);
2226     }
2227 
2228     return WC_E_QUOTE;
2229 }
2230 
2231 /* [1  NS] NSAttName ::= PrefixedAttName | DefaultAttName
2232    [2  NS] PrefixedAttName ::= 'xmlns:' NCName
2233    [3  NS] DefaultAttName  ::= 'xmlns'
2234    [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2235 static HRESULT reader_parse_attribute(xmlreader *reader)
2236 {
2237     struct reader_position position = reader->position;
2238     strval prefix, local, qname, value;
2239     enum attribute_flags flags = 0;
2240     HRESULT hr;
2241 
2242     hr = reader_parse_qname(reader, &prefix, &local, &qname);
2243     if (FAILED(hr)) return hr;
2244 
2245     if (strval_eq(reader, &prefix, &strval_xmlns))
2246         flags |= ATTRIBUTE_NS_DEFINITION;
2247 
2248     if (strval_eq(reader, &qname, &strval_xmlns))
2249         flags |= ATTRIBUTE_DEFAULT_NS_DEFINITION;
2250 
2251     hr = reader_parse_eq(reader);
2252     if (FAILED(hr)) return hr;
2253 
2254     hr = reader_parse_attvalue(reader, &value);
2255     if (FAILED(hr)) return hr;
2256 
2257     if (flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
2258         reader_push_ns(reader, &local, &value, !!(flags & ATTRIBUTE_DEFAULT_NS_DEFINITION));
2259 
2260     TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2261     return reader_add_attr(reader, &prefix, &local, &qname, &value, &position, flags);
2262 }
2263 
2264 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2265    [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2266 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname)
2267 {
2268     struct reader_position position = reader->position;
2269     HRESULT hr;
2270 
2271     hr = reader_parse_qname(reader, prefix, local, qname);
2272     if (FAILED(hr)) return hr;
2273 
2274     for (;;)
2275     {
2276         static const WCHAR endW[] = {'/','>',0};
2277 
2278         reader_skipspaces(reader);
2279 
2280         /* empty element */
2281         if ((reader->is_empty_element = !reader_cmp(reader, endW)))
2282         {
2283             struct element *element = &reader->empty_element;
2284 
2285             /* skip '/>' */
2286             reader_skipn(reader, 2);
2287 
2288             reader_free_strvalued(reader, &element->qname);
2289             reader_free_strvalued(reader, &element->localname);
2290 
2291             element->prefix = *prefix;
2292             reader_strvaldup(reader, qname, &element->qname);
2293             reader_strvaldup(reader, local, &element->localname);
2294             element->position = position;
2295             reader_mark_ns_nodes(reader, element);
2296             return S_OK;
2297         }
2298 
2299         /* got a start tag */
2300         if (!reader_cmp(reader, gtW))
2301         {
2302             /* skip '>' */
2303             reader_skipn(reader, 1);
2304             return reader_push_element(reader, prefix, local, qname, &position);
2305         }
2306 
2307         hr = reader_parse_attribute(reader);
2308         if (FAILED(hr)) return hr;
2309     }
2310 
2311     return S_OK;
2312 }
2313 
2314 /* [39] element ::= EmptyElemTag | STag content ETag */
2315 static HRESULT reader_parse_element(xmlreader *reader)
2316 {
2317     HRESULT hr;
2318 
2319     switch (reader->resumestate)
2320     {
2321     case XmlReadResumeState_Initial:
2322         /* check if we are really on element */
2323         if (reader_cmp(reader, ltW)) return S_FALSE;
2324 
2325         /* skip '<' */
2326         reader_skipn(reader, 1);
2327 
2328         reader_shrink(reader);
2329         reader->resumestate = XmlReadResumeState_STag;
2330     case XmlReadResumeState_STag:
2331     {
2332         strval qname, prefix, local;
2333 
2334         /* this handles empty elements too */
2335         hr = reader_parse_stag(reader, &prefix, &local, &qname);
2336         if (FAILED(hr)) return hr;
2337 
2338         /* FIXME: need to check for defined namespace to reject invalid prefix */
2339 
2340         /* if we got empty element and stack is empty go straight to Misc */
2341         if (reader->is_empty_element && list_empty(&reader->elements))
2342             reader->instate = XmlReadInState_MiscEnd;
2343         else
2344             reader->instate = XmlReadInState_Content;
2345 
2346         reader->nodetype = XmlNodeType_Element;
2347         reader->resumestate = XmlReadResumeState_Initial;
2348         reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2349         reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2350         reader_set_strvalue(reader, StringValue_Value, &strval_empty);
2351         break;
2352     }
2353     default:
2354         hr = E_FAIL;
2355     }
2356 
2357     return hr;
2358 }
2359 
2360 /* [13 NS] ETag ::= '</' QName S? '>' */
2361 static HRESULT reader_parse_endtag(xmlreader *reader)
2362 {
2363     struct reader_position position;
2364     strval prefix, local, qname;
2365     struct element *element;
2366     HRESULT hr;
2367 
2368     /* skip '</' */
2369     reader_skipn(reader, 2);
2370 
2371     position = reader->position;
2372     hr = reader_parse_qname(reader, &prefix, &local, &qname);
2373     if (FAILED(hr)) return hr;
2374 
2375     reader_skipspaces(reader);
2376 
2377     if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2378 
2379     /* skip '>' */
2380     reader_skipn(reader, 1);
2381 
2382     /* Element stack should never be empty at this point, cause we shouldn't get to
2383        content parsing if it's empty. */
2384     element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2385     if (!strval_eq(reader, &element->qname, &qname)) return WC_E_ELEMENTMATCH;
2386 
2387     /* update position stored for start tag, we won't be using it */
2388     element->position = position;
2389 
2390     reader->nodetype = XmlNodeType_EndElement;
2391     reader->is_empty_element = FALSE;
2392     reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2393 
2394     return S_OK;
2395 }
2396 
2397 /* [18] CDSect ::= CDStart CData CDEnd
2398    [19] CDStart ::= '<![CDATA['
2399    [20] CData ::= (Char* - (Char* ']]>' Char*))
2400    [21] CDEnd ::= ']]>' */
2401 static HRESULT reader_parse_cdata(xmlreader *reader)
2402 {
2403     WCHAR *ptr;
2404     UINT start;
2405 
2406     if (reader->resumestate == XmlReadResumeState_CDATA)
2407     {
2408         start = reader->resume[XmlReadResume_Body];
2409         ptr = reader_get_ptr(reader);
2410     }
2411     else
2412     {
2413         /* skip markup '<![CDATA[' */
2414         reader_skipn(reader, 9);
2415         reader_shrink(reader);
2416         ptr = reader_get_ptr(reader);
2417         start = reader_get_cur(reader);
2418         reader->nodetype = XmlNodeType_CDATA;
2419         reader->resume[XmlReadResume_Body] = start;
2420         reader->resumestate = XmlReadResumeState_CDATA;
2421         reader_set_strvalue(reader, StringValue_Value, NULL);
2422     }
2423 
2424     while (*ptr)
2425     {
2426         if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2427         {
2428             strval value;
2429 
2430             reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2431 
2432             /* skip ']]>' */
2433             reader_skipn(reader, 3);
2434             TRACE("%s\n", debug_strval(reader, &value));
2435 
2436             reader_set_strvalue(reader, StringValue_Value, &value);
2437             reader->resume[XmlReadResume_Body] = 0;
2438             reader->resumestate = XmlReadResumeState_Initial;
2439             return S_OK;
2440         }
2441         else
2442         {
2443             reader_skipn(reader, 1);
2444             ptr++;
2445         }
2446     }
2447 
2448     return S_OK;
2449 }
2450 
2451 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2452 static HRESULT reader_parse_chardata(xmlreader *reader)
2453 {
2454     struct reader_position position;
2455     WCHAR *ptr;
2456     UINT start;
2457 
2458     if (reader->resumestate == XmlReadResumeState_CharData)
2459     {
2460         start = reader->resume[XmlReadResume_Body];
2461         ptr = reader_get_ptr(reader);
2462     }
2463     else
2464     {
2465         reader_shrink(reader);
2466         ptr = reader_get_ptr(reader);
2467         start = reader_get_cur(reader);
2468         /* There's no text */
2469         if (!*ptr || *ptr == '<') return S_OK;
2470         reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2471         reader->resume[XmlReadResume_Body] = start;
2472         reader->resumestate = XmlReadResumeState_CharData;
2473         reader_set_strvalue(reader, StringValue_Value, NULL);
2474     }
2475 
2476     position = reader->position;
2477     while (*ptr)
2478     {
2479         static const WCHAR ampW[] = {'&',0};
2480 
2481         /* CDATA closing sequence ']]>' is not allowed */
2482         if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2483             return WC_E_CDSECTEND;
2484 
2485         /* Found next markup part */
2486         if (ptr[0] == '<')
2487         {
2488             strval value;
2489 
2490             reader->empty_element.position = position;
2491             reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2492             reader_set_strvalue(reader, StringValue_Value, &value);
2493             reader->resume[XmlReadResume_Body] = 0;
2494             reader->resumestate = XmlReadResumeState_Initial;
2495             return S_OK;
2496         }
2497 
2498         /* this covers a case when text has leading whitespace chars */
2499         if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2500 
2501         if (!reader_cmp(reader, ampW))
2502             reader_parse_reference(reader);
2503         else
2504             reader_skipn(reader, 1);
2505 
2506         ptr = reader_get_ptr(reader);
2507     }
2508 
2509     return S_OK;
2510 }
2511 
2512 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2513 static HRESULT reader_parse_content(xmlreader *reader)
2514 {
2515     static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2516     static const WCHAR etagW[] = {'<','/',0};
2517 
2518     if (reader->resumestate != XmlReadResumeState_Initial)
2519     {
2520         switch (reader->resumestate)
2521         {
2522         case XmlReadResumeState_CDATA:
2523             return reader_parse_cdata(reader);
2524         case XmlReadResumeState_Comment:
2525             return reader_parse_comment(reader);
2526         case XmlReadResumeState_PIBody:
2527         case XmlReadResumeState_PITarget:
2528             return reader_parse_pi(reader);
2529         case XmlReadResumeState_CharData:
2530             return reader_parse_chardata(reader);
2531         default:
2532             ERR("unknown resume state %d\n", reader->resumestate);
2533         }
2534     }
2535 
2536     reader_shrink(reader);
2537 
2538     /* handle end tag here, it indicates end of content as well */
2539     if (!reader_cmp(reader, etagW))
2540         return reader_parse_endtag(reader);
2541 
2542     if (!reader_cmp(reader, commentW))
2543         return reader_parse_comment(reader);
2544 
2545     if (!reader_cmp(reader, piW))
2546         return reader_parse_pi(reader);
2547 
2548     if (!reader_cmp(reader, cdstartW))
2549         return reader_parse_cdata(reader);
2550 
2551     if (!reader_cmp(reader, ltW))
2552         return reader_parse_element(reader);
2553 
2554     /* what's left must be CharData */
2555     return reader_parse_chardata(reader);
2556 }
2557 
2558 static HRESULT reader_parse_nextnode(xmlreader *reader)
2559 {
2560     XmlNodeType nodetype = reader_get_nodetype(reader);
2561     HRESULT hr;
2562 
2563     if (!is_reader_pending(reader))
2564     {
2565         reader->chunk_read_off = 0;
2566         reader_clear_attrs(reader);
2567     }
2568 
2569     /* When moving from EndElement or empty element, pop its own namespace definitions */
2570     switch (nodetype)
2571     {
2572     case XmlNodeType_Attribute:
2573         reader_dec_depth(reader);
2574         /* fallthrough */
2575     case XmlNodeType_Element:
2576         if (reader->is_empty_element)
2577             reader_pop_ns_nodes(reader, &reader->empty_element);
2578         else if (FAILED(hr = reader_inc_depth(reader)))
2579             return hr;
2580         break;
2581     case XmlNodeType_EndElement:
2582         reader_pop_element(reader);
2583         reader_dec_depth(reader);
2584         break;
2585     default:
2586         ;
2587     }
2588 
2589     for (;;)
2590     {
2591         switch (reader->instate)
2592         {
2593         /* if it's a first call for a new input we need to detect stream encoding */
2594         case XmlReadInState_Initial:
2595             {
2596                 xml_encoding enc;
2597 
2598                 hr = readerinput_growraw(reader->input);
2599                 if (FAILED(hr)) return hr;
2600 
2601                 reader->position.line_number = 1;
2602                 reader->position.line_position = 1;
2603 
2604                 /* try to detect encoding by BOM or data and set input code page */
2605                 hr = readerinput_detectencoding(reader->input, &enc);
2606                 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" :
2607                         debugstr_w(xml_encoding_map[enc].name), hr);
2608                 if (FAILED(hr)) return hr;
2609 
2610                 /* always switch first time cause we have to put something in */
2611                 readerinput_switchencoding(reader->input, enc);
2612 
2613                 /* parse xml declaration */
2614                 hr = reader_parse_xmldecl(reader);
2615                 if (FAILED(hr)) return hr;
2616 
2617                 readerinput_shrinkraw(reader->input, -1);
2618                 reader->instate = XmlReadInState_Misc_DTD;
2619                 if (hr == S_OK) return hr;
2620             }
2621             break;
2622         case XmlReadInState_Misc_DTD:
2623             hr = reader_parse_misc(reader);
2624             if (FAILED(hr)) return hr;
2625 
2626             if (hr == S_FALSE)
2627                 reader->instate = XmlReadInState_DTD;
2628             else
2629                 return hr;
2630             break;
2631         case XmlReadInState_DTD:
2632             hr = reader_parse_dtd(reader);
2633             if (FAILED(hr)) return hr;
2634 
2635             if (hr == S_OK)
2636             {
2637                 reader->instate = XmlReadInState_DTD_Misc;
2638                 return hr;
2639             }
2640             else
2641                 reader->instate = XmlReadInState_Element;
2642             break;
2643         case XmlReadInState_DTD_Misc:
2644             hr = reader_parse_misc(reader);
2645             if (FAILED(hr)) return hr;
2646 
2647             if (hr == S_FALSE)
2648                 reader->instate = XmlReadInState_Element;
2649             else
2650                 return hr;
2651             break;
2652         case XmlReadInState_Element:
2653             return reader_parse_element(reader);
2654         case XmlReadInState_Content:
2655             return reader_parse_content(reader);
2656         case XmlReadInState_MiscEnd:
2657             hr = reader_parse_misc(reader);
2658             if (hr != S_FALSE) return hr;
2659 
2660             if (*reader_get_ptr(reader))
2661             {
2662                 WARN("found garbage in the end of XML\n");
2663                 return WC_E_SYNTAX;
2664             }
2665 
2666             reader->instate = XmlReadInState_Eof;
2667             reader->state = XmlReadState_EndOfFile;
2668             reader->nodetype = XmlNodeType_None;
2669             return hr;
2670         case XmlReadInState_Eof:
2671             return S_FALSE;
2672         default:
2673             FIXME("internal state %d not handled\n", reader->instate);
2674             return E_NOTIMPL;
2675         }
2676     }
2677 
2678     return E_NOTIMPL;
2679 }
2680 
2681 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2682 {
2683     xmlreader *This = impl_from_IXmlReader(iface);
2684 
2685     TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2686 
2687     if (IsEqualGUID(riid, &IID_IUnknown) ||
2688         IsEqualGUID(riid, &IID_IXmlReader))
2689     {
2690         *ppvObject = iface;
2691     }
2692     else
2693     {
2694         FIXME("interface %s not implemented\n", debugstr_guid(riid));
2695         *ppvObject = NULL;
2696         return E_NOINTERFACE;
2697     }
2698 
2699     IXmlReader_AddRef(iface);
2700 
2701     return S_OK;
2702 }
2703 
2704 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2705 {
2706     xmlreader *This = impl_from_IXmlReader(iface);
2707     ULONG ref = InterlockedIncrement(&This->ref);
2708     TRACE("(%p)->(%d)\n", This, ref);
2709     return ref;
2710 }
2711 
2712 static void reader_clear_ns(xmlreader *reader)
2713 {
2714     struct ns *ns, *ns2;
2715 
2716     LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2717         list_remove(&ns->entry);
2718         reader_free_strvalued(reader, &ns->prefix);
2719         reader_free_strvalued(reader, &ns->uri);
2720         reader_free(reader, ns);
2721     }
2722 
2723     LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2724         list_remove(&ns->entry);
2725         reader_free_strvalued(reader, &ns->uri);
2726         reader_free(reader, ns);
2727     }
2728 }
2729 
2730 static void reader_reset_parser(xmlreader *reader)
2731 {
2732     reader->position.line_number = 0;
2733     reader->position.line_position = 0;
2734 
2735     reader_clear_elements(reader);
2736     reader_clear_attrs(reader);
2737     reader_clear_ns(reader);
2738     reader_free_strvalues(reader);
2739 
2740     reader->depth = 0;
2741     reader->nodetype = XmlNodeType_None;
2742     reader->resumestate = XmlReadResumeState_Initial;
2743     memset(reader->resume, 0, sizeof(reader->resume));
2744     reader->is_empty_element = FALSE;
2745 }
2746 
2747 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2748 {
2749     xmlreader *This = impl_from_IXmlReader(iface);
2750     LONG ref = InterlockedDecrement(&This->ref);
2751 
2752     TRACE("(%p)->(%d)\n", This, ref);
2753 
2754     if (ref == 0)
2755     {
2756         IMalloc *imalloc = This->imalloc;
2757         reader_reset_parser(This);
2758         if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2759         if (This->resolver) IXmlResolver_Release(This->resolver);
2760         if (This->mlang) IUnknown_Release(This->mlang);
2761         reader_free(This, This);
2762         if (imalloc) IMalloc_Release(imalloc);
2763     }
2764 
2765     return ref;
2766 }
2767 
2768 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2769 {
2770     xmlreader *This = impl_from_IXmlReader(iface);
2771     IXmlReaderInput *readerinput;
2772     HRESULT hr;
2773 
2774     TRACE("(%p)->(%p)\n", This, input);
2775 
2776     if (This->input)
2777     {
2778         readerinput_release_stream(This->input);
2779         IUnknown_Release(&This->input->IXmlReaderInput_iface);
2780         This->input = NULL;
2781     }
2782 
2783     reader_reset_parser(This);
2784 
2785     /* just reset current input */
2786     if (!input)
2787     {
2788         This->state = XmlReadState_Initial;
2789         return S_OK;
2790     }
2791 
2792     /* now try IXmlReaderInput, ISequentialStream, IStream */
2793     hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2794     if (hr == S_OK)
2795     {
2796         if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2797             This->input = impl_from_IXmlReaderInput(readerinput);
2798         else
2799         {
2800             ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2801                 readerinput, readerinput->lpVtbl);
2802             IUnknown_Release(readerinput);
2803             return E_FAIL;
2804 
2805         }
2806     }
2807 
2808     if (hr != S_OK || !readerinput)
2809     {
2810         /* create IXmlReaderInput basing on supplied interface */
2811         hr = CreateXmlReaderInputWithEncodingName(input,
2812                                          This->imalloc, NULL, FALSE, NULL, &readerinput);
2813         if (hr != S_OK) return hr;
2814         This->input = impl_from_IXmlReaderInput(readerinput);
2815     }
2816 
2817     /* set stream for supplied IXmlReaderInput */
2818     hr = readerinput_query_for_stream(This->input);
2819     if (hr == S_OK)
2820     {
2821         This->state = XmlReadState_Initial;
2822         This->instate = XmlReadInState_Initial;
2823     }
2824     return hr;
2825 }
2826 
2827 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2828 {
2829     xmlreader *This = impl_from_IXmlReader(iface);
2830 
2831     TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2832 
2833     if (!value) return E_INVALIDARG;
2834 
2835     switch (property)
2836     {
2837         case XmlReaderProperty_MultiLanguage:
2838             *value = (LONG_PTR)This->mlang;
2839             if (This->mlang)
2840                 IUnknown_AddRef(This->mlang);
2841             break;
2842         case XmlReaderProperty_XmlResolver:
2843             *value = (LONG_PTR)This->resolver;
2844             if (This->resolver)
2845                 IXmlResolver_AddRef(This->resolver);
2846             break;
2847         case XmlReaderProperty_DtdProcessing:
2848             *value = This->dtdmode;
2849             break;
2850         case XmlReaderProperty_ReadState:
2851             *value = This->state;
2852             break;
2853         case XmlReaderProperty_MaxElementDepth:
2854             *value = This->max_depth;
2855             break;
2856         default:
2857             FIXME("Unimplemented property (%u)\n", property);
2858             return E_NOTIMPL;
2859     }
2860 
2861     return S_OK;
2862 }
2863 
2864 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2865 {
2866     xmlreader *This = impl_from_IXmlReader(iface);
2867 
2868     TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2869 
2870     switch (property)
2871     {
2872         case XmlReaderProperty_MultiLanguage:
2873             if (This->mlang)
2874                 IUnknown_Release(This->mlang);
2875             This->mlang = (IUnknown*)value;
2876             if (This->mlang)
2877                 IUnknown_AddRef(This->mlang);
2878             if (This->mlang)
2879                 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2880             break;
2881         case XmlReaderProperty_XmlResolver:
2882             if (This->resolver)
2883                 IXmlResolver_Release(This->resolver);
2884             This->resolver = (IXmlResolver*)value;
2885             if (This->resolver)
2886                 IXmlResolver_AddRef(This->resolver);
2887             break;
2888         case XmlReaderProperty_DtdProcessing:
2889             if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2890             This->dtdmode = value;
2891             break;
2892         case XmlReaderProperty_MaxElementDepth:
2893             This->max_depth = value;
2894             break;
2895         default:
2896             FIXME("Unimplemented property (%u)\n", property);
2897             return E_NOTIMPL;
2898     }
2899 
2900     return S_OK;
2901 }
2902 
2903 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2904 {
2905     xmlreader *This = impl_from_IXmlReader(iface);
2906     XmlNodeType oldtype = This->nodetype;
2907     XmlNodeType type;
2908     HRESULT hr;
2909 
2910     TRACE("(%p)->(%p)\n", This, nodetype);
2911 
2912     if (!nodetype)
2913         nodetype = &type;
2914 
2915     switch (This->state)
2916     {
2917     case XmlReadState_Closed:
2918         hr = S_FALSE;
2919         break;
2920     case XmlReadState_Error:
2921         hr = This->error;
2922         break;
2923     default:
2924         hr = reader_parse_nextnode(This);
2925         if (SUCCEEDED(hr) && oldtype == XmlNodeType_None && This->nodetype != oldtype)
2926             This->state = XmlReadState_Interactive;
2927 
2928         if (FAILED(hr))
2929         {
2930             This->state = XmlReadState_Error;
2931             This->nodetype = XmlNodeType_None;
2932             This->depth = 0;
2933             This->error = hr;
2934         }
2935     }
2936 
2937     TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2938     *nodetype = This->nodetype;
2939 
2940     return hr;
2941 }
2942 
2943 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2944 {
2945     xmlreader *This = impl_from_IXmlReader(iface);
2946 
2947     TRACE("(%p)->(%p)\n", This, node_type);
2948 
2949     if (!node_type)
2950         return E_INVALIDARG;
2951 
2952     *node_type = reader_get_nodetype(This);
2953     return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2954 }
2955 
2956 static void reader_set_current_attribute(xmlreader *reader, struct attribute *attr)
2957 {
2958     reader->attr = attr;
2959     reader->chunk_read_off = 0;
2960     reader_set_strvalue(reader, StringValue_Prefix, &attr->prefix);
2961     reader_set_strvalue(reader, StringValue_QualifiedName, &attr->qname);
2962     reader_set_strvalue(reader, StringValue_Value, &attr->value);
2963 }
2964 
2965 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2966 {
2967     if (!reader->attr_count)
2968         return S_FALSE;
2969 
2970     if (!reader->attr)
2971         reader_inc_depth(reader);
2972 
2973     reader_set_current_attribute(reader, LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry));
2974 
2975     return S_OK;
2976 }
2977 
2978 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2979 {
2980     xmlreader *This = impl_from_IXmlReader(iface);
2981 
2982     TRACE("(%p)\n", This);
2983 
2984     return reader_move_to_first_attribute(This);
2985 }
2986 
2987 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2988 {
2989     xmlreader *This = impl_from_IXmlReader(iface);
2990     const struct list *next;
2991 
2992     TRACE("(%p)\n", This);
2993 
2994     if (!This->attr_count) return S_FALSE;
2995 
2996     if (!This->attr)
2997         return reader_move_to_first_attribute(This);
2998 
2999     next = list_next(&This->attrs, &This->attr->entry);
3000     if (next)
3001         reader_set_current_attribute(This, LIST_ENTRY(next, struct attribute, entry));
3002 
3003     return next ? S_OK : S_FALSE;
3004 }
3005 
3006 static void reader_get_attribute_ns_uri(xmlreader *reader, struct attribute *attr, const WCHAR **uri, UINT *len)
3007 {
3008     static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3009             '2','0','0','0','/','x','m','l','n','s','/',0};
3010     static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3011             'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
3012 
3013     /* Check for reserved prefixes first */
3014     if ((strval_eq(reader, &attr->prefix, &strval_empty) && strval_eq(reader, &attr->localname, &strval_xmlns)) ||
3015             strval_eq(reader, &attr->prefix, &strval_xmlns))
3016     {
3017         *uri = xmlns_uriW;
3018         *len = ARRAY_SIZE(xmlns_uriW) - 1;
3019     }
3020     else if (strval_eq(reader, &attr->prefix, &strval_xml))
3021     {
3022         *uri = xml_uriW;
3023         *len = ARRAY_SIZE(xml_uriW) - 1;
3024     }
3025     else
3026     {
3027         *uri = NULL;
3028         *len = 0;
3029     }
3030 
3031     if (!*uri)
3032     {
3033         struct ns *ns;
3034 
3035         if ((ns = reader_lookup_ns(reader, &attr->prefix)))
3036         {
3037             *uri = ns->uri.str;
3038             *len = ns->uri.len;
3039         }
3040         else
3041         {
3042             *uri = emptyW;
3043             *len = 0;
3044         }
3045     }
3046 }
3047 
3048 static void reader_get_attribute_local_name(xmlreader *reader, struct attribute *attr, const WCHAR **name, UINT *len)
3049 {
3050     if (attr->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3051     {
3052         *name = xmlnsW;
3053         *len = 5;
3054     }
3055     else if (attr->flags & ATTRIBUTE_NS_DEFINITION)
3056     {
3057         const struct ns *ns = reader_lookup_ns(reader, &attr->localname);
3058         *name = ns->prefix.str;
3059         *len = ns->prefix.len;
3060     }
3061     else
3062     {
3063         *name = attr->localname.str;
3064         *len = attr->localname.len;
3065     }
3066 }
3067 
3068 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
3069     const WCHAR *local_name, const WCHAR *namespace_uri)
3070 {
3071     xmlreader *This = impl_from_IXmlReader(iface);
3072     UINT target_name_len, target_uri_len;
3073     struct attribute *attr;
3074 
3075     TRACE("(%p)->(%s %s)\n", This, debugstr_w(local_name), debugstr_w(namespace_uri));
3076 
3077     if (!local_name)
3078         return E_INVALIDARG;
3079 
3080     if (!This->attr_count)
3081         return S_FALSE;
3082 
3083     if (!namespace_uri)
3084         namespace_uri = emptyW;
3085 
3086     target_name_len = lstrlenW(local_name);
3087     target_uri_len = lstrlenW(namespace_uri);
3088 
3089     LIST_FOR_EACH_ENTRY(attr, &This->attrs, struct attribute, entry)
3090     {
3091         UINT name_len, uri_len;
3092         const WCHAR *name, *uri;
3093 
3094         reader_get_attribute_local_name(This, attr, &name, &name_len);
3095         reader_get_attribute_ns_uri(This, attr, &uri, &uri_len);
3096 
3097         if (name_len == target_name_len && uri_len == target_uri_len &&
3098                 !wcscmp(name, local_name) && !wcscmp(uri, namespace_uri))
3099         {
3100             reader_set_current_attribute(This, attr);
3101             return S_OK;
3102         }
3103     }
3104 
3105     return S_FALSE;
3106 }
3107 
3108 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
3109 {
3110     xmlreader *This = impl_from_IXmlReader(iface);
3111 
3112     TRACE("(%p)\n", This);
3113 
3114     if (!This->attr_count) return S_FALSE;
3115 
3116     if (This->attr)
3117         reader_dec_depth(This);
3118 
3119     This->attr = NULL;
3120 
3121     /* FIXME: support other node types with 'attributes' like DTD */
3122     if (This->is_empty_element) {
3123         reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix);
3124         reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
3125     }
3126     else {
3127         struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
3128         if (element) {
3129             reader_set_strvalue(This, StringValue_Prefix, &element->prefix);
3130             reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
3131         }
3132     }
3133     This->chunk_read_off = 0;
3134     reader_set_strvalue(This, StringValue_Value, &strval_empty);
3135 
3136     return S_OK;
3137 }
3138 
3139 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3140 {
3141     xmlreader *This = impl_from_IXmlReader(iface);
3142     struct attribute *attribute = This->attr;
3143     struct element *element;
3144     UINT length;
3145 
3146     TRACE("(%p)->(%p %p)\n", This, name, len);
3147 
3148     if (!len)
3149         len = &length;
3150 
3151     switch (reader_get_nodetype(This))
3152     {
3153     case XmlNodeType_Text:
3154     case XmlNodeType_CDATA:
3155     case XmlNodeType_Comment:
3156     case XmlNodeType_Whitespace:
3157         *name = emptyW;
3158         *len = 0;
3159         break;
3160     case XmlNodeType_Element:
3161     case XmlNodeType_EndElement:
3162         element = reader_get_element(This);
3163         if (element->prefix.len)
3164         {
3165             *name = element->qname.str;
3166             *len = element->qname.len;
3167         }
3168         else
3169         {
3170             *name = element->localname.str;
3171             *len = element->localname.len;
3172         }
3173         break;
3174     case XmlNodeType_Attribute:
3175         if (attribute->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3176         {
3177             *name = xmlnsW;
3178             *len = 5;
3179         } else if (attribute->prefix.len)
3180         {
3181             *name = This->strvalues[StringValue_QualifiedName].str;
3182             *len = This->strvalues[StringValue_QualifiedName].len;
3183         }
3184         else
3185         {
3186             *name = attribute->localname.str;
3187             *len = attribute->localname.len;
3188         }
3189         break;
3190     default:
3191         *name = This->strvalues[StringValue_QualifiedName].str;
3192         *len = This->strvalues[StringValue_QualifiedName].len;
3193         break;
3194     }
3195 
3196     return S_OK;
3197 }
3198 
3199 static struct ns *reader_lookup_nsdef(xmlreader *reader)
3200 {
3201     if (list_empty(&reader->nsdef))
3202         return NULL;
3203 
3204     return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
3205 }
3206 
3207 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
3208 {
3209     xmlreader *This = impl_from_IXmlReader(iface);
3210     const strval *prefix = &This->strvalues[StringValue_Prefix];
3211     XmlNodeType nodetype;
3212     struct ns *ns;
3213     UINT length;
3214 
3215     TRACE("(%p %p %p)\n", iface, uri, len);
3216 
3217     if (!len)
3218         len = &length;
3219 
3220     switch ((nodetype = reader_get_nodetype(This)))
3221     {
3222     case XmlNodeType_Attribute:
3223         reader_get_attribute_ns_uri(This, This->attr, uri, len);
3224         break;
3225     case XmlNodeType_Element:
3226     case XmlNodeType_EndElement:
3227         {
3228             ns = reader_lookup_ns(This, prefix);
3229 
3230             /* pick top default ns if any */
3231             if (!ns)
3232                 ns = reader_lookup_nsdef(This);
3233 
3234             if (ns) {
3235                 *uri = ns->uri.str;
3236                 *len = ns->uri.len;
3237             }
3238             else {
3239                 *uri = emptyW;
3240                 *len = 0;
3241             }
3242         }
3243         break;
3244     case XmlNodeType_Text:
3245     case XmlNodeType_CDATA:
3246     case XmlNodeType_ProcessingInstruction:
3247     case XmlNodeType_Comment:
3248     case XmlNodeType_Whitespace:
3249     case XmlNodeType_XmlDeclaration:
3250         *uri = emptyW;
3251         *len = 0;
3252         break;
3253     default:
3254         FIXME("Unhandled node type %d\n", nodetype);
3255         *uri = NULL;
3256         *len = 0;
3257         return E_NOTIMPL;
3258     }
3259 
3260     return S_OK;
3261 }
3262 
3263 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3264 {
3265     xmlreader *This = impl_from_IXmlReader(iface);
3266     struct element *element;
3267     UINT length;
3268 
3269     TRACE("(%p)->(%p %p)\n", This, name, len);
3270 
3271     if (!len)
3272         len = &length;
3273 
3274     switch (reader_get_nodetype(This))
3275     {
3276     case XmlNodeType_Text:
3277     case XmlNodeType_CDATA:
3278     case XmlNodeType_Comment:
3279     case XmlNodeType_Whitespace:
3280         *name = emptyW;
3281         *len = 0;
3282         break;
3283     case XmlNodeType_Element:
3284     case XmlNodeType_EndElement:
3285         element = reader_get_element(This);
3286         *name = element->localname.str;
3287         *len = element->localname.len;
3288         break;
3289     case XmlNodeType_Attribute:
3290         reader_get_attribute_local_name(This, This->attr, name, len);
3291         break;
3292     default:
3293         *name = This->strvalues[StringValue_LocalName].str;
3294         *len = This->strvalues[StringValue_LocalName].len;
3295         break;
3296     }
3297 
3298     return S_OK;
3299 }
3300 
3301 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, const WCHAR **ret, UINT *len)
3302 {
3303     xmlreader *This = impl_from_IXmlReader(iface);
3304     XmlNodeType nodetype;
3305     UINT length;
3306 
3307     TRACE("(%p)->(%p %p)\n", This, ret, len);
3308 
3309     if (!len)
3310         len = &length;
3311 
3312     *ret = emptyW;
3313     *len = 0;
3314 
3315     switch ((nodetype = reader_get_nodetype(This)))
3316     {
3317     case XmlNodeType_Element:
3318     case XmlNodeType_EndElement:
3319     case XmlNodeType_Attribute:
3320     {
3321         const strval *prefix = &This->strvalues[StringValue_Prefix];
3322         struct ns *ns;
3323 
3324         if (strval_eq(This, prefix, &strval_xml))
3325         {
3326             *ret = xmlW;
3327             *len = 3;
3328         }
3329         else if (strval_eq(This, prefix, &strval_xmlns))
3330         {
3331             *ret = xmlnsW;
3332             *len = 5;
3333         }
3334         else if ((ns = reader_lookup_ns(This, prefix)))
3335         {
3336             *ret = ns->prefix.str;
3337             *len = ns->prefix.len;
3338         }
3339 
3340         break;
3341     }
3342     default:
3343         ;
3344     }
3345 
3346     return S_OK;
3347 }
3348 
3349 static const strval *reader_get_value(xmlreader *reader, BOOL ensure_allocated)
3350 {
3351     strval *val;
3352 
3353     switch (reader_get_nodetype(reader))
3354     {
3355     case XmlNodeType_XmlDeclaration:
3356     case XmlNodeType_EndElement:
3357     case XmlNodeType_None:
3358         return &strval_empty;
3359     case XmlNodeType_Attribute:
3360         /* For namespace definition attributes return values from namespace list */
3361         if (reader->attr->flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
3362         {
3363             struct ns *ns;
3364 
3365             if (!(ns = reader_lookup_ns(reader, &reader->attr->localname)))
3366                 ns = reader_lookup_nsdef(reader);
3367 
3368             return &ns->uri;
3369         }
3370         return &reader->attr->value;
3371     default:
3372         break;
3373     }
3374 
3375     val = &reader->strvalues[StringValue_Value];
3376     if (!val->str && ensure_allocated)
3377     {
3378         WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3379         if (!ptr) return NULL;
3380         memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3381         ptr[val->len] = 0;
3382         val->str = ptr;
3383     }
3384 
3385     return val;
3386 }
3387 
3388 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3389 {
3390     xmlreader *reader = impl_from_IXmlReader(iface);
3391     const strval *val = &reader->strvalues[StringValue_Value];
3392     UINT off;
3393 
3394     TRACE("(%p)->(%p %p)\n", reader, value, len);
3395 
3396     *value = NULL;
3397 
3398     if ((reader->nodetype == XmlNodeType_Comment && !val->str && !val->len) || is_reader_pending(reader))
3399     {
3400         XmlNodeType type;
3401         HRESULT hr;
3402 
3403         hr = IXmlReader_Read(iface, &type);
3404         if (FAILED(hr)) return hr;
3405 
3406         /* return if still pending, partially read values are not reported */
3407         if (is_reader_pending(reader)) return E_PENDING;
3408     }
3409 
3410     val = reader_get_value(reader, TRUE);
3411     if (!val)
3412         return E_OUTOFMEMORY;
3413 
3414     off = abs(reader->chunk_read_off);
3415     assert(off <= val->len);
3416     *value = val->str + off;
3417     if (len) *len = val->len - off;
3418     reader->chunk_read_off = -off;
3419     return S_OK;
3420 }
3421 
3422 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3423 {
3424     xmlreader *reader = impl_from_IXmlReader(iface);
3425     const strval *val;
3426     UINT len = 0;
3427 
3428     TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3429 
3430     val = reader_get_value(reader, FALSE);
3431 
3432     /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3433     if (reader->chunk_read_off >= 0)
3434     {
3435         assert(reader->chunk_read_off <= val->len);
3436         len = min(val->len - reader->chunk_read_off, chunk_size);
3437     }
3438     if (read) *read = len;
3439 
3440     if (len)
3441     {
3442         memcpy(buffer, reader_get_strptr(reader, val) + reader->chunk_read_off, len*sizeof(WCHAR));
3443         reader->chunk_read_off += len;
3444     }
3445 
3446     return len || !chunk_size ? S_OK : S_FALSE;
3447 }
3448 
3449 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3450                                            LPCWSTR *baseUri,
3451                                            UINT *baseUri_length)
3452 {
3453     FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3454     return E_NOTIMPL;
3455 }
3456 
3457 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3458 {
3459     FIXME("(%p): stub\n", iface);
3460     return FALSE;
3461 }
3462 
3463 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3464 {
3465     xmlreader *This = impl_from_IXmlReader(iface);
3466     TRACE("(%p)\n", This);
3467     /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3468        when current node is start tag of an element */
3469     return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3470 }
3471 
3472 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *line_number)
3473 {
3474     xmlreader *This = impl_from_IXmlReader(iface);
3475     const struct element *element;
3476 
3477     TRACE("(%p %p)\n", This, line_number);
3478 
3479     if (!line_number)
3480         return E_INVALIDARG;
3481 
3482     switch (reader_get_nodetype(This))
3483     {
3484     case XmlNodeType_Element:
3485     case XmlNodeType_EndElement:
3486         element = reader_get_element(This);
3487         *line_number = element->position.line_number;
3488         break;
3489     case XmlNodeType_Attribute:
3490         *line_number = This->attr->position.line_number;
3491         break;
3492     case XmlNodeType_Whitespace:
3493     case XmlNodeType_XmlDeclaration:
3494         *line_number = This->empty_element.position.line_number;
3495         break;
3496     default:
3497         *line_number = This->position.line_number;
3498         break;
3499     }
3500 
3501     return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3502 }
3503 
3504 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *line_position)
3505 {
3506     xmlreader *This = impl_from_IXmlReader(iface);
3507     const struct element *element;
3508 
3509     TRACE("(%p %p)\n", This, line_position);
3510 
3511     if (!line_position)
3512         return E_INVALIDARG;
3513 
3514     switch (reader_get_nodetype(This))
3515     {
3516     case XmlNodeType_Element:
3517     case XmlNodeType_EndElement:
3518         element = reader_get_element(This);
3519         *line_position = element->position.line_position;
3520         break;
3521     case XmlNodeType_Attribute:
3522         *line_position = This->attr->position.line_position;
3523         break;
3524     case XmlNodeType_Whitespace:
3525     case XmlNodeType_XmlDeclaration:
3526         *line_position = This->empty_element.position.line_position;
3527         break;
3528     default:
3529         *line_position = This->position.line_position;
3530         break;
3531     }
3532 
3533     return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3534 }
3535 
3536 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3537 {
3538     xmlreader *This = impl_from_IXmlReader(iface);
3539 
3540     TRACE("(%p)->(%p)\n", This, count);
3541 
3542     if (!count) return E_INVALIDARG;
3543 
3544     *count = This->attr_count;
3545     return S_OK;
3546 }
3547 
3548 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3549 {
3550     xmlreader *This = impl_from_IXmlReader(iface);
3551     TRACE("(%p)->(%p)\n", This, depth);
3552     *depth = This->depth;
3553     return S_OK;
3554 }
3555 
3556 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3557 {
3558     xmlreader *This = impl_from_IXmlReader(iface);
3559     TRACE("(%p)\n", iface);
3560     return This->state == XmlReadState_EndOfFile;
3561 }
3562 
3563 static const struct IXmlReaderVtbl xmlreader_vtbl =
3564 {
3565     xmlreader_QueryInterface,
3566     xmlreader_AddRef,
3567     xmlreader_Release,
3568     xmlreader_SetInput,
3569     xmlreader_GetProperty,
3570     xmlreader_SetProperty,
3571     xmlreader_Read,
3572     xmlreader_GetNodeType,
3573     xmlreader_MoveToFirstAttribute,
3574     xmlreader_MoveToNextAttribute,
3575     xmlreader_MoveToAttributeByName,
3576     xmlreader_MoveToElement,
3577     xmlreader_GetQualifiedName,
3578     xmlreader_GetNamespaceUri,
3579     xmlreader_GetLocalName,
3580     xmlreader_GetPrefix,
3581     xmlreader_GetValue,
3582     xmlreader_ReadValueChunk,
3583     xmlreader_GetBaseUri,
3584     xmlreader_IsDefault,
3585     xmlreader_IsEmptyElement,
3586     xmlreader_GetLineNumber,
3587     xmlreader_GetLinePosition,
3588     xmlreader_GetAttributeCount,
3589     xmlreader_GetDepth,
3590     xmlreader_IsEOF
3591 };
3592 
3593 /** IXmlReaderInput **/
3594 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3595 {
3596     xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3597 
3598     TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3599 
3600     if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3601         IsEqualGUID(riid, &IID_IUnknown))
3602     {
3603         *ppvObject = iface;
3604     }
3605     else
3606     {
3607         WARN("interface %s not implemented\n", debugstr_guid(riid));
3608         *ppvObject = NULL;
3609         return E_NOINTERFACE;
3610     }
3611 
3612     IUnknown_AddRef(iface);
3613 
3614     return S_OK;
3615 }
3616 
3617 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3618 {
3619     xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3620     ULONG ref = InterlockedIncrement(&This->ref);
3621     TRACE("(%p)->(%d)\n", This, ref);
3622     return ref;
3623 }
3624 
3625 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3626 {
3627     xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3628     LONG ref = InterlockedDecrement(&This->ref);
3629 
3630     TRACE("(%p)->(%d)\n", This, ref);
3631 
3632     if (ref == 0)
3633     {
3634         IMalloc *imalloc = This->imalloc;
3635         if (This->input) IUnknown_Release(This->input);
3636         if (This->stream) ISequentialStream_Release(This->stream);
3637         if (This->buffer) free_input_buffer(This->buffer);
3638         readerinput_free(This, This->baseuri);
3639         readerinput_free(This, This);
3640         if (imalloc) IMalloc_Release(imalloc);
3641     }
3642 
3643     return ref;
3644 }
3645 
3646 static const struct IUnknownVtbl xmlreaderinputvtbl =
3647 {
3648     xmlreaderinput_QueryInterface,
3649     xmlreaderinput_AddRef,
3650     xmlreaderinput_Release
3651 };
3652 
3653 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3654 {
3655     xmlreader *reader;
3656     HRESULT hr;
3657     int i;
3658 
3659     TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3660 
3661     if (imalloc)
3662         reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3663     else
3664         reader = heap_alloc(sizeof(*reader));
3665     if (!reader)
3666         return E_OUTOFMEMORY;
3667 
3668     memset(reader, 0, sizeof(*reader));
3669     reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3670     reader->ref = 1;
3671     reader->state = XmlReadState_Closed;
3672     reader->instate = XmlReadInState_Initial;
3673     reader->resumestate = XmlReadResumeState_Initial;
3674     reader->dtdmode = DtdProcessing_Prohibit;
3675     reader->imalloc = imalloc;
3676     if (imalloc) IMalloc_AddRef(imalloc);
3677     reader->nodetype = XmlNodeType_None;
3678     list_init(&reader->attrs);
3679     list_init(&reader->nsdef);
3680     list_init(&reader->ns);
3681     list_init(&reader->elements);
3682     reader->max_depth = 256;
3683 
3684     reader->chunk_read_off = 0;
3685     for (i = 0; i < StringValue_Last; i++)
3686         reader->strvalues[i] = strval_empty;
3687 
3688     hr = IXmlReader_QueryInterface(&reader->IXmlReader_iface, riid, obj);
3689     IXmlReader_Release(&reader->IXmlReader_iface);
3690 
3691     TRACE("returning iface %p, hr %#x\n", *obj, hr);
3692 
3693     return hr;
3694 }
3695 
3696 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3697                                                     IMalloc *imalloc,
3698                                                     LPCWSTR encoding,
3699                                                     BOOL hint,
3700                                                     LPCWSTR base_uri,
3701                                                     IXmlReaderInput **ppInput)
3702 {
3703     xmlreaderinput *readerinput;
3704     HRESULT hr;
3705 
3706     TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3707                                        hint, wine_dbgstr_w(base_uri), ppInput);
3708 
3709     if (!stream || !ppInput) return E_INVALIDARG;
3710 
3711     if (imalloc)
3712         readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3713     else
3714         readerinput = heap_alloc(sizeof(*readerinput));
3715     if(!readerinput) return E_OUTOFMEMORY;
3716 
3717     readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3718     readerinput->ref = 1;
3719     readerinput->imalloc = imalloc;
3720     readerinput->stream = NULL;
3721     if (imalloc) IMalloc_AddRef(imalloc);
3722     readerinput->encoding = parse_encoding_name(encoding, -1);
3723     readerinput->hint = hint;
3724     readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3725     readerinput->pending = 0;
3726 
3727     hr = alloc_input_buffer(readerinput);
3728     if (hr != S_OK)
3729     {
3730         readerinput_free(readerinput, readerinput->baseuri);
3731         readerinput_free(readerinput, readerinput);
3732         if (imalloc) IMalloc_Release(imalloc);
3733         return hr;
3734     }
3735     IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3736 
3737     *ppInput = &readerinput->IXmlReaderInput_iface;
3738 
3739     TRACE("returning iface %p\n", *ppInput);
3740 
3741     return S_OK;
3742 }
3743