xref: /reactos/dll/win32/xmllite/reader.c (revision 8540ab04)
1 /*
2  * IXmlReader implementation
3  *
4  * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  */
20 
21 #define COBJMACROS
22 
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include "windef.h"
27 #include "winbase.h"
28 #include "initguid.h"
29 #include "objbase.h"
30 #include "xmllite.h"
31 #include "xmllite_private.h"
32 
33 #include "wine/debug.h"
34 #include "wine/list.h"
35 #include "wine/unicode.h"
36 
37 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
38 
39 /* not defined in public headers */
40 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
41 
42 typedef enum
43 {
44     XmlReadInState_Initial,
45     XmlReadInState_XmlDecl,
46     XmlReadInState_Misc_DTD,
47     XmlReadInState_DTD,
48     XmlReadInState_DTD_Misc,
49     XmlReadInState_Element,
50     XmlReadInState_Content,
51     XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
52     XmlReadInState_Eof
53 } XmlReaderInternalState;
54 
55 /* This state denotes where parsing was interrupted by input problem.
56    Reader resumes parsing using this information. */
57 typedef enum
58 {
59     XmlReadResumeState_Initial,
60     XmlReadResumeState_PITarget,
61     XmlReadResumeState_PIBody,
62     XmlReadResumeState_CDATA,
63     XmlReadResumeState_Comment,
64     XmlReadResumeState_STag,
65     XmlReadResumeState_CharData,
66     XmlReadResumeState_Whitespace
67 } XmlReaderResumeState;
68 
69 /* saved pointer index to resume from particular input position */
70 typedef enum
71 {
72     XmlReadResume_Name,  /* PITarget, name for NCName, prefix for QName */
73     XmlReadResume_Local, /* local for QName */
74     XmlReadResume_Body,  /* PI body, comment text, CDATA text, CharData text */
75     XmlReadResume_Last
76 } XmlReaderResume;
77 
78 typedef enum
79 {
80     StringValue_LocalName,
81     StringValue_Prefix,
82     StringValue_QualifiedName,
83     StringValue_Value,
84     StringValue_Last
85 } XmlReaderStringValue;
86 
87 static const WCHAR usasciiW[] = {'U','S','-','A','S','C','I','I',0};
88 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
89 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
90 
91 static const WCHAR dblquoteW[] = {'\"',0};
92 static const WCHAR quoteW[] = {'\'',0};
93 static const WCHAR ltW[] = {'<',0};
94 static const WCHAR gtW[] = {'>',0};
95 static const WCHAR commentW[] = {'<','!','-','-',0};
96 static const WCHAR piW[] = {'<','?',0};
97 
98 static BOOL is_namestartchar(WCHAR ch);
99 
100 static const char *debugstr_nodetype(XmlNodeType nodetype)
101 {
102     static const char * const type_names[] =
103     {
104         "None",
105         "Element",
106         "Attribute",
107         "Text",
108         "CDATA",
109         "",
110         "",
111         "ProcessingInstruction",
112         "Comment",
113         "",
114         "DocumentType",
115         "",
116         "",
117         "Whitespace",
118         "",
119         "EndElement",
120         "",
121         "XmlDeclaration"
122     };
123 
124     if (nodetype > _XmlNodeType_Last)
125         return wine_dbg_sprintf("unknown type=%d", nodetype);
126 
127     return type_names[nodetype];
128 }
129 
130 static const char *debugstr_reader_prop(XmlReaderProperty prop)
131 {
132     static const char * const prop_names[] =
133     {
134         "MultiLanguage",
135         "ConformanceLevel",
136         "RandomAccess",
137         "XmlResolver",
138         "DtdProcessing",
139         "ReadState",
140         "MaxElementDepth",
141         "MaxEntityExpansion"
142     };
143 
144     if (prop > _XmlReaderProperty_Last)
145         return wine_dbg_sprintf("unknown property=%d", prop);
146 
147     return prop_names[prop];
148 }
149 
150 struct xml_encoding_data
151 {
152     const WCHAR *name;
153     xml_encoding enc;
154     UINT cp;
155 };
156 
157 static const struct xml_encoding_data xml_encoding_map[] = {
158     { usasciiW, XmlEncoding_USASCII, 20127 },
159     { utf16W, XmlEncoding_UTF16, 1200 },
160     { utf8W,  XmlEncoding_UTF8,  CP_UTF8 },
161 };
162 
163 const WCHAR *get_encoding_name(xml_encoding encoding)
164 {
165     return xml_encoding_map[encoding].name;
166 }
167 
168 xml_encoding get_encoding_from_codepage(UINT codepage)
169 {
170     int i;
171     for (i = 0; i < ARRAY_SIZE(xml_encoding_map); i++)
172     {
173         if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
174     }
175     return XmlEncoding_Unknown;
176 }
177 
178 typedef struct
179 {
180     char *data;
181     UINT  cur;
182     unsigned int allocated;
183     unsigned int written;
184     BOOL prev_cr;
185 } encoded_buffer;
186 
187 typedef struct input_buffer input_buffer;
188 
189 typedef struct
190 {
191     IXmlReaderInput IXmlReaderInput_iface;
192     LONG ref;
193     /* reference passed on IXmlReaderInput creation, is kept when input is created */
194     IUnknown *input;
195     IMalloc *imalloc;
196     xml_encoding encoding;
197     BOOL hint;
198     WCHAR *baseuri;
199     /* stream reference set after SetInput() call from reader,
200        stored as sequential stream, cause currently
201        optimizations possible with IStream aren't implemented */
202     ISequentialStream *stream;
203     input_buffer *buffer;
204     unsigned int pending : 1;
205 } xmlreaderinput;
206 
207 static const struct IUnknownVtbl xmlreaderinputvtbl;
208 
209 /* Structure to hold parsed string of specific length.
210 
211    Reader stores node value as 'start' pointer, on request
212    a null-terminated version of it is allocated.
213 
214    To init a strval variable use reader_init_strval(),
215    to set strval as a reader value use reader_set_strval().
216  */
217 typedef struct
218 {
219     WCHAR *str;   /* allocated null-terminated string */
220     UINT   len;   /* length in WCHARs, altered after ReadValueChunk */
221     UINT   start; /* input position where value starts */
222 } strval;
223 
224 static WCHAR emptyW[] = {0};
225 static WCHAR xmlW[] = {'x','m','l',0};
226 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
227 static const strval strval_empty = { emptyW };
228 static const strval strval_xml = { xmlW, 3 };
229 static const strval strval_xmlns = { xmlnsW, 5 };
230 
231 struct reader_position
232 {
233     UINT line_number;
234     UINT line_position;
235 };
236 
237 enum attribute_flags
238 {
239     ATTRIBUTE_NS_DEFINITION = 0x1,
240     ATTRIBUTE_DEFAULT_NS_DEFINITION = 0x2,
241 };
242 
243 struct attribute
244 {
245     struct list entry;
246     strval prefix;
247     strval localname;
248     strval qname;
249     strval value;
250     struct reader_position position;
251     unsigned int flags;
252 };
253 
254 struct element
255 {
256     struct list entry;
257     strval prefix;
258     strval localname;
259     strval qname;
260     struct reader_position position;
261 };
262 
263 struct ns
264 {
265     struct list entry;
266     strval prefix;
267     strval uri;
268     struct element *element;
269 };
270 
271 typedef struct
272 {
273     IXmlReader IXmlReader_iface;
274     LONG ref;
275     xmlreaderinput *input;
276     IMalloc *imalloc;
277     XmlReadState state;
278     HRESULT error; /* error set on XmlReadState_Error */
279     XmlReaderInternalState instate;
280     XmlReaderResumeState resumestate;
281     XmlNodeType nodetype;
282     DtdProcessing dtdmode;
283     IXmlResolver *resolver;
284     IUnknown *mlang;
285     struct reader_position position;
286     struct list attrs; /* attributes list for current node */
287     struct attribute *attr; /* current attribute */
288     UINT attr_count;
289     struct list nsdef;
290     struct list ns;
291     struct list elements;
292     int chunk_read_off;
293     strval strvalues[StringValue_Last];
294     UINT depth;
295     UINT max_depth;
296     BOOL is_empty_element;
297     struct element empty_element; /* used for empty elements without end tag <a />,
298                                      and to keep <?xml reader position */
299     UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
300 } xmlreader;
301 
302 struct input_buffer
303 {
304     encoded_buffer utf16;
305     encoded_buffer encoded;
306     UINT code_page;
307     xmlreaderinput *input;
308 };
309 
310 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
311 {
312     return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
313 }
314 
315 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
316 {
317     return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
318 }
319 
320 /* reader memory allocation functions */
321 static inline void *reader_alloc(xmlreader *reader, size_t len)
322 {
323     return m_alloc(reader->imalloc, len);
324 }
325 
326 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
327 {
328     void *ret = reader_alloc(reader, len);
329     if (ret)
330         memset(ret, 0, len);
331     return ret;
332 }
333 
334 static inline void reader_free(xmlreader *reader, void *mem)
335 {
336     m_free(reader->imalloc, mem);
337 }
338 
339 /* Just return pointer from offset, no attempt to read more. */
340 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
341 {
342     encoded_buffer *buffer = &reader->input->buffer->utf16;
343     return (WCHAR*)buffer->data + offset;
344 }
345 
346 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
347 {
348     return v->str ? v->str : reader_get_ptr2(reader, v->start);
349 }
350 
351 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
352 {
353     *dest = *src;
354 
355     if (src->str != strval_empty.str)
356     {
357         dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
358         if (!dest->str) return E_OUTOFMEMORY;
359         memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
360         dest->str[dest->len] = 0;
361         dest->start = 0;
362     }
363 
364     return S_OK;
365 }
366 
367 /* reader input memory allocation functions */
368 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
369 {
370     return m_alloc(input->imalloc, len);
371 }
372 
373 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
374 {
375     return m_realloc(input->imalloc, mem, len);
376 }
377 
378 static inline void readerinput_free(xmlreaderinput *input, void *mem)
379 {
380     m_free(input->imalloc, mem);
381 }
382 
383 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
384 {
385     LPWSTR ret = NULL;
386 
387     if(str) {
388         DWORD size;
389 
390         size = (strlenW(str)+1)*sizeof(WCHAR);
391         ret = readerinput_alloc(input, size);
392         if (ret) memcpy(ret, str, size);
393     }
394 
395     return ret;
396 }
397 
398 /* This one frees stored string value if needed */
399 static void reader_free_strvalued(xmlreader *reader, strval *v)
400 {
401     if (v->str != strval_empty.str)
402     {
403         reader_free(reader, v->str);
404         *v = strval_empty;
405     }
406 }
407 
408 static void reader_clear_attrs(xmlreader *reader)
409 {
410     struct attribute *attr, *attr2;
411     LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
412     {
413         reader_free_strvalued(reader, &attr->localname);
414         reader_free_strvalued(reader, &attr->value);
415         reader_free(reader, attr);
416     }
417     list_init(&reader->attrs);
418     reader->attr_count = 0;
419     reader->attr = NULL;
420 }
421 
422 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
423    while we are on a node with attributes */
424 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *qname,
425     strval *value, const struct reader_position *position, unsigned int flags)
426 {
427     struct attribute *attr;
428     HRESULT hr;
429 
430     attr = reader_alloc(reader, sizeof(*attr));
431     if (!attr) return E_OUTOFMEMORY;
432 
433     hr = reader_strvaldup(reader, localname, &attr->localname);
434     if (hr == S_OK)
435     {
436         hr = reader_strvaldup(reader, value, &attr->value);
437         if (hr != S_OK)
438             reader_free_strvalued(reader, &attr->value);
439     }
440     if (hr != S_OK)
441     {
442         reader_free(reader, attr);
443         return hr;
444     }
445 
446     if (prefix)
447         attr->prefix = *prefix;
448     else
449         memset(&attr->prefix, 0, sizeof(attr->prefix));
450     attr->qname = qname ? *qname : *localname;
451     attr->position = *position;
452     attr->flags = flags;
453     list_add_tail(&reader->attrs, &attr->entry);
454     reader->attr_count++;
455 
456     return S_OK;
457 }
458 
459 /* Returns current element, doesn't check if reader is actually positioned on it. */
460 static struct element *reader_get_element(xmlreader *reader)
461 {
462     if (reader->is_empty_element)
463         return &reader->empty_element;
464 
465     return LIST_ENTRY(list_head(&reader->elements), struct element, entry);
466 }
467 
468 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
469 {
470     v->start = start;
471     v->len = len;
472     v->str = NULL;
473 }
474 
475 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
476 {
477     return debugstr_wn(reader_get_strptr(reader, v), v->len);
478 }
479 
480 /* used to initialize from constant string */
481 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
482 {
483     v->start = 0;
484     v->len = len;
485     v->str = str;
486 }
487 
488 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
489 {
490     reader_free_strvalued(reader, &reader->strvalues[type]);
491 }
492 
493 static void reader_free_strvalues(xmlreader *reader)
494 {
495     int type;
496     for (type = 0; type < StringValue_Last; type++)
497         reader_free_strvalue(reader, type);
498 }
499 
500 /* This helper should only be used to test if strings are the same,
501    it doesn't try to sort. */
502 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
503 {
504     if (str1->len != str2->len) return 0;
505     return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
506 }
507 
508 static void reader_clear_elements(xmlreader *reader)
509 {
510     struct element *elem, *elem2;
511     LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
512     {
513         reader_free_strvalued(reader, &elem->prefix);
514         reader_free_strvalued(reader, &elem->localname);
515         reader_free_strvalued(reader, &elem->qname);
516         reader_free(reader, elem);
517     }
518     list_init(&reader->elements);
519     reader_free_strvalued(reader, &reader->empty_element.localname);
520     reader_free_strvalued(reader, &reader->empty_element.qname);
521     reader->is_empty_element = FALSE;
522 }
523 
524 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
525 {
526     struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
527     struct ns *ns;
528 
529     LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
530         if (strval_eq(reader, prefix, &ns->prefix))
531             return ns;
532     }
533 
534     return NULL;
535 }
536 
537 static HRESULT reader_inc_depth(xmlreader *reader)
538 {
539     return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK;
540 }
541 
542 static void reader_dec_depth(xmlreader *reader)
543 {
544     if (reader->depth)
545         reader->depth--;
546 }
547 
548 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
549 {
550     struct ns *ns;
551     HRESULT hr;
552 
553     ns = reader_alloc(reader, sizeof(*ns));
554     if (!ns) return E_OUTOFMEMORY;
555 
556     if (def)
557         memset(&ns->prefix, 0, sizeof(ns->prefix));
558     else {
559         hr = reader_strvaldup(reader, prefix, &ns->prefix);
560         if (FAILED(hr)) {
561             reader_free(reader, ns);
562             return hr;
563         }
564     }
565 
566     hr = reader_strvaldup(reader, uri, &ns->uri);
567     if (FAILED(hr)) {
568         reader_free_strvalued(reader, &ns->prefix);
569         reader_free(reader, ns);
570         return hr;
571     }
572 
573     ns->element = NULL;
574     list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
575     return hr;
576 }
577 
578 static void reader_free_element(xmlreader *reader, struct element *element)
579 {
580     reader_free_strvalued(reader, &element->prefix);
581     reader_free_strvalued(reader, &element->localname);
582     reader_free_strvalued(reader, &element->qname);
583     reader_free(reader, element);
584 }
585 
586 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
587 {
588     struct ns *ns;
589 
590     LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
591         if (ns->element)
592             break;
593         ns->element = element;
594     }
595 
596     LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
597         if (ns->element)
598             break;
599         ns->element = element;
600     }
601 }
602 
603 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
604     strval *qname, const struct reader_position *position)
605 {
606     struct element *element;
607     HRESULT hr;
608 
609     element = reader_alloc_zero(reader, sizeof(*element));
610     if (!element)
611         return E_OUTOFMEMORY;
612 
613     if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) == S_OK &&
614             (hr = reader_strvaldup(reader, localname, &element->localname)) == S_OK &&
615             (hr = reader_strvaldup(reader, qname, &element->qname)) == S_OK)
616     {
617         list_add_head(&reader->elements, &element->entry);
618         reader_mark_ns_nodes(reader, element);
619         reader->is_empty_element = FALSE;
620         element->position = *position;
621     }
622     else
623         reader_free_element(reader, element);
624 
625     return hr;
626 }
627 
628 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
629 {
630     struct ns *ns, *ns2;
631 
632     LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
633         if (ns->element != element)
634             break;
635 
636         list_remove(&ns->entry);
637         reader_free_strvalued(reader, &ns->prefix);
638         reader_free_strvalued(reader, &ns->uri);
639         reader_free(reader, ns);
640     }
641 
642     if (!list_empty(&reader->nsdef)) {
643         ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
644         if (ns->element == element) {
645             list_remove(&ns->entry);
646             reader_free_strvalued(reader, &ns->prefix);
647             reader_free_strvalued(reader, &ns->uri);
648             reader_free(reader, ns);
649         }
650     }
651 }
652 
653 static void reader_pop_element(xmlreader *reader)
654 {
655     struct element *element;
656 
657     if (list_empty(&reader->elements))
658         return;
659 
660     element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
661     list_remove(&element->entry);
662 
663     reader_pop_ns_nodes(reader, element);
664     reader_free_element(reader, element);
665 
666     /* It was a root element, the rest is expected as Misc */
667     if (list_empty(&reader->elements))
668         reader->instate = XmlReadInState_MiscEnd;
669 }
670 
671 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
672    means node value is to be determined. */
673 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
674 {
675     strval *v = &reader->strvalues[type];
676 
677     reader_free_strvalue(reader, type);
678     if (!value)
679     {
680         v->str = NULL;
681         v->start = 0;
682         v->len = 0;
683         return;
684     }
685 
686     if (value->str == strval_empty.str)
687         *v = *value;
688     else
689     {
690         if (type == StringValue_Value)
691         {
692             /* defer allocation for value string */
693             v->str = NULL;
694             v->start = value->start;
695             v->len = value->len;
696         }
697         else
698         {
699             v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
700             memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
701             v->str[value->len] = 0;
702             v->len = value->len;
703         }
704     }
705 }
706 
707 static inline int is_reader_pending(xmlreader *reader)
708 {
709     return reader->input->pending;
710 }
711 
712 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
713 {
714     const int initial_len = 0x2000;
715     buffer->data = readerinput_alloc(input, initial_len);
716     if (!buffer->data) return E_OUTOFMEMORY;
717 
718     memset(buffer->data, 0, 4);
719     buffer->cur = 0;
720     buffer->allocated = initial_len;
721     buffer->written = 0;
722     buffer->prev_cr = FALSE;
723 
724     return S_OK;
725 }
726 
727 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
728 {
729     readerinput_free(input, buffer->data);
730 }
731 
732 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
733 {
734     if (encoding == XmlEncoding_Unknown)
735     {
736         FIXME("unsupported encoding %d\n", encoding);
737         return E_NOTIMPL;
738     }
739 
740     *cp = xml_encoding_map[encoding].cp;
741 
742     return S_OK;
743 }
744 
745 xml_encoding parse_encoding_name(const WCHAR *name, int len)
746 {
747     int min, max, n, c;
748 
749     if (!name) return XmlEncoding_Unknown;
750 
751     min = 0;
752     max = ARRAY_SIZE(xml_encoding_map) - 1;
753 
754     while (min <= max)
755     {
756         n = (min+max)/2;
757 
758         if (len != -1)
759             c = strncmpiW(xml_encoding_map[n].name, name, len);
760         else
761             c = strcmpiW(xml_encoding_map[n].name, name);
762         if (!c)
763             return xml_encoding_map[n].enc;
764 
765         if (c > 0)
766             max = n-1;
767         else
768             min = n+1;
769     }
770 
771     return XmlEncoding_Unknown;
772 }
773 
774 static HRESULT alloc_input_buffer(xmlreaderinput *input)
775 {
776     input_buffer *buffer;
777     HRESULT hr;
778 
779     input->buffer = NULL;
780 
781     buffer = readerinput_alloc(input, sizeof(*buffer));
782     if (!buffer) return E_OUTOFMEMORY;
783 
784     buffer->input = input;
785     buffer->code_page = ~0; /* code page is unknown at this point */
786     hr = init_encoded_buffer(input, &buffer->utf16);
787     if (hr != S_OK) {
788         readerinput_free(input, buffer);
789         return hr;
790     }
791 
792     hr = init_encoded_buffer(input, &buffer->encoded);
793     if (hr != S_OK) {
794         free_encoded_buffer(input, &buffer->utf16);
795         readerinput_free(input, buffer);
796         return hr;
797     }
798 
799     input->buffer = buffer;
800     return S_OK;
801 }
802 
803 static void free_input_buffer(input_buffer *buffer)
804 {
805     free_encoded_buffer(buffer->input, &buffer->encoded);
806     free_encoded_buffer(buffer->input, &buffer->utf16);
807     readerinput_free(buffer->input, buffer);
808 }
809 
810 static void readerinput_release_stream(xmlreaderinput *readerinput)
811 {
812     if (readerinput->stream) {
813         ISequentialStream_Release(readerinput->stream);
814         readerinput->stream = NULL;
815     }
816 }
817 
818 /* Queries already stored interface for IStream/ISequentialStream.
819    Interface supplied on creation will be overwritten */
820 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
821 {
822     HRESULT hr;
823 
824     readerinput_release_stream(readerinput);
825     hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
826     if (hr != S_OK)
827         hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
828 
829     return hr;
830 }
831 
832 /* reads a chunk to raw buffer */
833 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
834 {
835     encoded_buffer *buffer = &readerinput->buffer->encoded;
836     /* to make sure aligned length won't exceed allocated length */
837     ULONG len = buffer->allocated - buffer->written - 4;
838     ULONG read;
839     HRESULT hr;
840 
841     /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
842        variable width encodings like UTF-8 */
843     len = (len + 3) & ~3;
844     /* try to use allocated space or grow */
845     if (buffer->allocated - buffer->written < len)
846     {
847         buffer->allocated *= 2;
848         buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
849         len = buffer->allocated - buffer->written;
850     }
851 
852     read = 0;
853     hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
854     TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
855     readerinput->pending = hr == E_PENDING;
856     if (FAILED(hr)) return hr;
857     buffer->written += read;
858 
859     return hr;
860 }
861 
862 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
863 static void readerinput_grow(xmlreaderinput *readerinput, int length)
864 {
865     encoded_buffer *buffer = &readerinput->buffer->utf16;
866 
867     length *= sizeof(WCHAR);
868     /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
869     if (buffer->allocated < buffer->written + length + 4)
870     {
871         int grown_size = max(2*buffer->allocated, buffer->allocated + length);
872         buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
873         buffer->allocated = grown_size;
874     }
875 }
876 
877 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
878 {
879     static const char startA[] = {'<','?'};
880     static const char commentA[] = {'<','!'};
881     encoded_buffer *buffer = &readerinput->buffer->encoded;
882     unsigned char *ptr = (unsigned char*)buffer->data;
883 
884     return !memcmp(buffer->data, startA, sizeof(startA)) ||
885            !memcmp(buffer->data, commentA, sizeof(commentA)) ||
886            /* test start byte */
887            (ptr[0] == '<' &&
888             (
889              (ptr[1] && (ptr[1] <= 0x7f)) ||
890              (buffer->data[1] >> 5) == 0x6  || /* 2 bytes */
891              (buffer->data[1] >> 4) == 0xe  || /* 3 bytes */
892              (buffer->data[1] >> 3) == 0x1e)   /* 4 bytes */
893            );
894 }
895 
896 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
897 {
898     encoded_buffer *buffer = &readerinput->buffer->encoded;
899     static const char utf8bom[] = {0xef,0xbb,0xbf};
900     static const char utf16lebom[] = {0xff,0xfe};
901     WCHAR *ptrW;
902 
903     *enc = XmlEncoding_Unknown;
904 
905     if (buffer->written <= 3)
906     {
907         HRESULT hr = readerinput_growraw(readerinput);
908         if (FAILED(hr)) return hr;
909         if (buffer->written < 3) return MX_E_INPUTEND;
910     }
911 
912     ptrW = (WCHAR *)buffer->data;
913     /* try start symbols if we have enough data to do that, input buffer should contain
914        first chunk already */
915     if (readerinput_is_utf8(readerinput))
916         *enc = XmlEncoding_UTF8;
917     else if (*ptrW == '<')
918     {
919         ptrW++;
920         if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW))
921             *enc = XmlEncoding_UTF16;
922     }
923     /* try with BOM now */
924     else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
925     {
926         buffer->cur += sizeof(utf8bom);
927         *enc = XmlEncoding_UTF8;
928     }
929     else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
930     {
931         buffer->cur += sizeof(utf16lebom);
932         *enc = XmlEncoding_UTF16;
933     }
934 
935     return S_OK;
936 }
937 
938 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
939 {
940     encoded_buffer *buffer = &readerinput->buffer->encoded;
941     int len = buffer->written;
942 
943     /* complete single byte char */
944     if (!(buffer->data[len-1] & 0x80)) return len;
945 
946     /* find start byte of multibyte char */
947     while (--len && !(buffer->data[len] & 0xc0))
948         ;
949 
950     return len;
951 }
952 
953 /* Returns byte length of complete char sequence for buffer code page,
954    it's relative to current buffer position which is currently used for BOM handling
955    only. */
956 static int readerinput_get_convlen(xmlreaderinput *readerinput)
957 {
958     encoded_buffer *buffer = &readerinput->buffer->encoded;
959     int len;
960 
961     if (readerinput->buffer->code_page == CP_UTF8)
962         len = readerinput_get_utf8_convlen(readerinput);
963     else
964         len = buffer->written;
965 
966     TRACE("%d\n", len - buffer->cur);
967     return len - buffer->cur;
968 }
969 
970 /* It's possible that raw buffer has some leftovers from last conversion - some char
971    sequence that doesn't represent a full code point. Length argument should be calculated with
972    readerinput_get_convlen(), if it's -1 it will be calculated here. */
973 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
974 {
975     encoded_buffer *buffer = &readerinput->buffer->encoded;
976 
977     if (len == -1)
978         len = readerinput_get_convlen(readerinput);
979 
980     memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
981     /* everything below cur is lost too */
982     buffer->written -= len + buffer->cur;
983     /* after this point we don't need cur offset really,
984        it's used only to mark where actual data begins when first chunk is read */
985     buffer->cur = 0;
986 }
987 
988 static void fixup_buffer_cr(encoded_buffer *buffer, int off)
989 {
990     BOOL prev_cr = buffer->prev_cr;
991     const WCHAR *src;
992     WCHAR *dest;
993 
994     src = dest = (WCHAR*)buffer->data + off;
995     while ((const char*)src < buffer->data + buffer->written)
996     {
997         if (*src == '\r')
998         {
999             *dest++ = '\n';
1000             src++;
1001             prev_cr = TRUE;
1002             continue;
1003         }
1004         if(prev_cr && *src == '\n')
1005             src++;
1006         else
1007             *dest++ = *src++;
1008         prev_cr = FALSE;
1009     }
1010 
1011     buffer->written = (char*)dest - buffer->data;
1012     buffer->prev_cr = prev_cr;
1013     *dest = 0;
1014 }
1015 
1016 /* note that raw buffer content is kept */
1017 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
1018 {
1019     encoded_buffer *src = &readerinput->buffer->encoded;
1020     encoded_buffer *dest = &readerinput->buffer->utf16;
1021     int len, dest_len;
1022     HRESULT hr;
1023     WCHAR *ptr;
1024     UINT cp;
1025 
1026     hr = get_code_page(enc, &cp);
1027     if (FAILED(hr)) return;
1028 
1029     readerinput->buffer->code_page = cp;
1030     len = readerinput_get_convlen(readerinput);
1031 
1032     TRACE("switching to cp %d\n", cp);
1033 
1034     /* just copy in this case */
1035     if (enc == XmlEncoding_UTF16)
1036     {
1037         readerinput_grow(readerinput, len);
1038         memcpy(dest->data, src->data + src->cur, len);
1039         dest->written += len*sizeof(WCHAR);
1040     }
1041     else
1042     {
1043         dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1044         readerinput_grow(readerinput, dest_len);
1045         ptr = (WCHAR*)dest->data;
1046         MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1047         ptr[dest_len] = 0;
1048         dest->written += dest_len*sizeof(WCHAR);
1049     }
1050 
1051     fixup_buffer_cr(dest, 0);
1052 }
1053 
1054 /* shrinks parsed data a buffer begins with */
1055 static void reader_shrink(xmlreader *reader)
1056 {
1057     encoded_buffer *buffer = &reader->input->buffer->utf16;
1058 
1059     /* avoid to move too often using threshold shrink length */
1060     if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
1061     {
1062         buffer->written -= buffer->cur*sizeof(WCHAR);
1063         memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
1064         buffer->cur = 0;
1065         *(WCHAR*)&buffer->data[buffer->written] = 0;
1066     }
1067 }
1068 
1069 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1070    It won't attempt to shrink but will grow destination buffer if needed */
1071 static HRESULT reader_more(xmlreader *reader)
1072 {
1073     xmlreaderinput *readerinput = reader->input;
1074     encoded_buffer *src = &readerinput->buffer->encoded;
1075     encoded_buffer *dest = &readerinput->buffer->utf16;
1076     UINT cp = readerinput->buffer->code_page;
1077     int len, dest_len, prev_len;
1078     HRESULT hr;
1079     WCHAR *ptr;
1080 
1081     /* get some raw data from stream first */
1082     hr = readerinput_growraw(readerinput);
1083     len = readerinput_get_convlen(readerinput);
1084     prev_len = dest->written / sizeof(WCHAR);
1085 
1086     /* just copy for UTF-16 case */
1087     if (cp == 1200)
1088     {
1089         readerinput_grow(readerinput, len);
1090         memcpy(dest->data + dest->written, src->data + src->cur, len);
1091         dest->written += len*sizeof(WCHAR);
1092     }
1093     else
1094     {
1095         dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1096         readerinput_grow(readerinput, dest_len);
1097         ptr = (WCHAR*)(dest->data + dest->written);
1098         MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1099         ptr[dest_len] = 0;
1100         dest->written += dest_len*sizeof(WCHAR);
1101         /* get rid of processed data */
1102         readerinput_shrinkraw(readerinput, len);
1103     }
1104 
1105     fixup_buffer_cr(dest, prev_len);
1106     return hr;
1107 }
1108 
1109 static inline UINT reader_get_cur(xmlreader *reader)
1110 {
1111     return reader->input->buffer->utf16.cur;
1112 }
1113 
1114 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1115 {
1116     encoded_buffer *buffer = &reader->input->buffer->utf16;
1117     WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1118     if (!*ptr) reader_more(reader);
1119     return (WCHAR*)buffer->data + buffer->cur;
1120 }
1121 
1122 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1123 {
1124     int i=0;
1125     const WCHAR *ptr = reader_get_ptr(reader);
1126     while (str[i])
1127     {
1128         if (!ptr[i])
1129         {
1130             reader_more(reader);
1131             ptr = reader_get_ptr(reader);
1132         }
1133         if (str[i] != ptr[i])
1134             return ptr[i] - str[i];
1135         i++;
1136     }
1137     return 0;
1138 }
1139 
1140 static void reader_update_position(xmlreader *reader, WCHAR ch)
1141 {
1142     if (ch == '\r')
1143         reader->position.line_position = 1;
1144     else if (ch == '\n')
1145     {
1146         reader->position.line_number++;
1147         reader->position.line_position = 1;
1148     }
1149     else
1150         reader->position.line_position++;
1151 }
1152 
1153 /* moves cursor n WCHARs forward */
1154 static void reader_skipn(xmlreader *reader, int n)
1155 {
1156     encoded_buffer *buffer = &reader->input->buffer->utf16;
1157     const WCHAR *ptr;
1158 
1159     while (*(ptr = reader_get_ptr(reader)) && n--)
1160     {
1161         reader_update_position(reader, *ptr);
1162         buffer->cur++;
1163     }
1164 }
1165 
1166 static inline BOOL is_wchar_space(WCHAR ch)
1167 {
1168     return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1169 }
1170 
1171 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1172 static int reader_skipspaces(xmlreader *reader)
1173 {
1174     const WCHAR *ptr = reader_get_ptr(reader);
1175     UINT start = reader_get_cur(reader);
1176 
1177     while (is_wchar_space(*ptr))
1178     {
1179         reader_skipn(reader, 1);
1180         ptr = reader_get_ptr(reader);
1181     }
1182 
1183     return reader_get_cur(reader) - start;
1184 }
1185 
1186 /* [26] VersionNum ::= '1.' [0-9]+ */
1187 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1188 {
1189     static const WCHAR onedotW[] = {'1','.',0};
1190     WCHAR *ptr, *ptr2;
1191     UINT start;
1192 
1193     if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1194 
1195     start = reader_get_cur(reader);
1196     /* skip "1." */
1197     reader_skipn(reader, 2);
1198 
1199     ptr2 = ptr = reader_get_ptr(reader);
1200     while (*ptr >= '0' && *ptr <= '9')
1201     {
1202         reader_skipn(reader, 1);
1203         ptr = reader_get_ptr(reader);
1204     }
1205 
1206     if (ptr2 == ptr) return WC_E_DIGIT;
1207     reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1208     TRACE("version=%s\n", debug_strval(reader, val));
1209     return S_OK;
1210 }
1211 
1212 /* [25] Eq ::= S? '=' S? */
1213 static HRESULT reader_parse_eq(xmlreader *reader)
1214 {
1215     static const WCHAR eqW[] = {'=',0};
1216     reader_skipspaces(reader);
1217     if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1218     /* skip '=' */
1219     reader_skipn(reader, 1);
1220     reader_skipspaces(reader);
1221     return S_OK;
1222 }
1223 
1224 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1225 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1226 {
1227     static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1228     struct reader_position position;
1229     strval val, name;
1230     HRESULT hr;
1231 
1232     if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1233 
1234     position = reader->position;
1235     if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1236     reader_init_strvalue(reader_get_cur(reader), 7, &name);
1237     /* skip 'version' */
1238     reader_skipn(reader, 7);
1239 
1240     hr = reader_parse_eq(reader);
1241     if (FAILED(hr)) return hr;
1242 
1243     if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1244         return WC_E_QUOTE;
1245     /* skip "'"|'"' */
1246     reader_skipn(reader, 1);
1247 
1248     hr = reader_parse_versionnum(reader, &val);
1249     if (FAILED(hr)) return hr;
1250 
1251     if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1252         return WC_E_QUOTE;
1253 
1254     /* skip "'"|'"' */
1255     reader_skipn(reader, 1);
1256 
1257     return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1258 }
1259 
1260 /* ([A-Za-z0-9._] | '-') */
1261 static inline BOOL is_wchar_encname(WCHAR ch)
1262 {
1263     return ((ch >= 'A' && ch <= 'Z') ||
1264             (ch >= 'a' && ch <= 'z') ||
1265             (ch >= '0' && ch <= '9') ||
1266             (ch == '.') || (ch == '_') ||
1267             (ch == '-'));
1268 }
1269 
1270 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1271 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1272 {
1273     WCHAR *start = reader_get_ptr(reader), *ptr;
1274     xml_encoding enc;
1275     int len;
1276 
1277     if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1278         return WC_E_ENCNAME;
1279 
1280     val->start = reader_get_cur(reader);
1281 
1282     ptr = start;
1283     while (is_wchar_encname(*++ptr))
1284         ;
1285 
1286     len = ptr - start;
1287     enc = parse_encoding_name(start, len);
1288     TRACE("encoding name %s\n", debugstr_wn(start, len));
1289     val->str = start;
1290     val->len = len;
1291 
1292     if (enc == XmlEncoding_Unknown)
1293         return WC_E_ENCNAME;
1294 
1295     /* skip encoding name */
1296     reader_skipn(reader, len);
1297     return S_OK;
1298 }
1299 
1300 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1301 static HRESULT reader_parse_encdecl(xmlreader *reader)
1302 {
1303     static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1304     struct reader_position position;
1305     strval name, val;
1306     HRESULT hr;
1307 
1308     if (!reader_skipspaces(reader)) return S_FALSE;
1309 
1310     position = reader->position;
1311     if (reader_cmp(reader, encodingW)) return S_FALSE;
1312     name.str = reader_get_ptr(reader);
1313     name.start = reader_get_cur(reader);
1314     name.len = 8;
1315     /* skip 'encoding' */
1316     reader_skipn(reader, 8);
1317 
1318     hr = reader_parse_eq(reader);
1319     if (FAILED(hr)) return hr;
1320 
1321     if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1322         return WC_E_QUOTE;
1323     /* skip "'"|'"' */
1324     reader_skipn(reader, 1);
1325 
1326     hr = reader_parse_encname(reader, &val);
1327     if (FAILED(hr)) return hr;
1328 
1329     if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1330         return WC_E_QUOTE;
1331 
1332     /* skip "'"|'"' */
1333     reader_skipn(reader, 1);
1334 
1335     return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1336 }
1337 
1338 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1339 static HRESULT reader_parse_sddecl(xmlreader *reader)
1340 {
1341     static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1342     static const WCHAR yesW[] = {'y','e','s',0};
1343     static const WCHAR noW[] = {'n','o',0};
1344     struct reader_position position;
1345     strval name, val;
1346     UINT start;
1347     HRESULT hr;
1348 
1349     if (!reader_skipspaces(reader)) return S_FALSE;
1350 
1351     position = reader->position;
1352     if (reader_cmp(reader, standaloneW)) return S_FALSE;
1353     reader_init_strvalue(reader_get_cur(reader), 10, &name);
1354     /* skip 'standalone' */
1355     reader_skipn(reader, 10);
1356 
1357     hr = reader_parse_eq(reader);
1358     if (FAILED(hr)) return hr;
1359 
1360     if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1361         return WC_E_QUOTE;
1362     /* skip "'"|'"' */
1363     reader_skipn(reader, 1);
1364 
1365     if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1366         return WC_E_XMLDECL;
1367 
1368     start = reader_get_cur(reader);
1369     /* skip 'yes'|'no' */
1370     reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1371     reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1372     TRACE("standalone=%s\n", debug_strval(reader, &val));
1373 
1374     if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1375         return WC_E_QUOTE;
1376     /* skip "'"|'"' */
1377     reader_skipn(reader, 1);
1378 
1379     return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1380 }
1381 
1382 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1383 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1384 {
1385     static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1386     static const WCHAR declcloseW[] = {'?','>',0};
1387     struct reader_position position;
1388     HRESULT hr;
1389 
1390     /* check if we have "<?xml " */
1391     if (reader_cmp(reader, xmldeclW))
1392         return S_FALSE;
1393 
1394     reader_skipn(reader, 2);
1395     position = reader->position;
1396     reader_skipn(reader, 3);
1397     hr = reader_parse_versioninfo(reader);
1398     if (FAILED(hr))
1399         return hr;
1400 
1401     hr = reader_parse_encdecl(reader);
1402     if (FAILED(hr))
1403         return hr;
1404 
1405     hr = reader_parse_sddecl(reader);
1406     if (FAILED(hr))
1407         return hr;
1408 
1409     reader_skipspaces(reader);
1410     if (reader_cmp(reader, declcloseW))
1411         return WC_E_XMLDECL;
1412 
1413     /* skip '?>' */
1414     reader_skipn(reader, 2);
1415 
1416     reader->nodetype = XmlNodeType_XmlDeclaration;
1417     reader->empty_element.position = position;
1418     reader_set_strvalue(reader, StringValue_LocalName, &strval_xml);
1419     reader_set_strvalue(reader, StringValue_QualifiedName, &strval_xml);
1420 
1421     return S_OK;
1422 }
1423 
1424 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1425 static HRESULT reader_parse_comment(xmlreader *reader)
1426 {
1427     WCHAR *ptr;
1428     UINT start;
1429 
1430     if (reader->resumestate == XmlReadResumeState_Comment)
1431     {
1432         start = reader->resume[XmlReadResume_Body];
1433         ptr = reader_get_ptr(reader);
1434     }
1435     else
1436     {
1437         /* skip '<!--' */
1438         reader_skipn(reader, 4);
1439         reader_shrink(reader);
1440         ptr = reader_get_ptr(reader);
1441         start = reader_get_cur(reader);
1442         reader->nodetype = XmlNodeType_Comment;
1443         reader->resume[XmlReadResume_Body] = start;
1444         reader->resumestate = XmlReadResumeState_Comment;
1445         reader_set_strvalue(reader, StringValue_Value, NULL);
1446     }
1447 
1448     /* will exit when there's no more data, it won't attempt to
1449        read more from stream */
1450     while (*ptr)
1451     {
1452         if (ptr[0] == '-')
1453         {
1454             if (ptr[1] == '-')
1455             {
1456                 if (ptr[2] == '>')
1457                 {
1458                     strval value;
1459 
1460                     reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1461                     TRACE("%s\n", debug_strval(reader, &value));
1462 
1463                     /* skip rest of markup '->' */
1464                     reader_skipn(reader, 3);
1465 
1466                     reader_set_strvalue(reader, StringValue_Value, &value);
1467                     reader->resume[XmlReadResume_Body] = 0;
1468                     reader->resumestate = XmlReadResumeState_Initial;
1469                     return S_OK;
1470                 }
1471                 else
1472                     return WC_E_COMMENT;
1473             }
1474         }
1475 
1476         reader_skipn(reader, 1);
1477         ptr++;
1478     }
1479 
1480     return S_OK;
1481 }
1482 
1483 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1484 static inline BOOL is_char(WCHAR ch)
1485 {
1486     return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1487            (ch >= 0x20 && ch <= 0xd7ff) ||
1488            (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1489            (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1490            (ch >= 0xe000 && ch <= 0xfffd);
1491 }
1492 
1493 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1494 static inline BOOL is_pubchar(WCHAR ch)
1495 {
1496     return (ch == ' ') ||
1497            (ch >= 'a' && ch <= 'z') ||
1498            (ch >= 'A' && ch <= 'Z') ||
1499            (ch >= '0' && ch <= '9') ||
1500            (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1501            (ch == '=') || (ch == '?') ||
1502            (ch == '@') || (ch == '!') ||
1503            (ch >= '#' && ch <= '%') || /* #$% */
1504            (ch == '_') || (ch == '\r') || (ch == '\n');
1505 }
1506 
1507 static inline BOOL is_namestartchar(WCHAR ch)
1508 {
1509     return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1510            (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1511            (ch >= 0xc0   && ch <= 0xd6)   ||
1512            (ch >= 0xd8   && ch <= 0xf6)   ||
1513            (ch >= 0xf8   && ch <= 0x2ff)  ||
1514            (ch >= 0x370  && ch <= 0x37d)  ||
1515            (ch >= 0x37f  && ch <= 0x1fff) ||
1516            (ch >= 0x200c && ch <= 0x200d) ||
1517            (ch >= 0x2070 && ch <= 0x218f) ||
1518            (ch >= 0x2c00 && ch <= 0x2fef) ||
1519            (ch >= 0x3001 && ch <= 0xd7ff) ||
1520            (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1521            (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1522            (ch >= 0xf900 && ch <= 0xfdcf) ||
1523            (ch >= 0xfdf0 && ch <= 0xfffd);
1524 }
1525 
1526 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1527 static inline BOOL is_ncnamechar(WCHAR ch)
1528 {
1529     return (ch >= 'A' && ch <= 'Z') ||
1530            (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1531            (ch == '-') || (ch == '.') ||
1532            (ch >= '0'    && ch <= '9')    ||
1533            (ch == 0xb7)                   ||
1534            (ch >= 0xc0   && ch <= 0xd6)   ||
1535            (ch >= 0xd8   && ch <= 0xf6)   ||
1536            (ch >= 0xf8   && ch <= 0x2ff)  ||
1537            (ch >= 0x300  && ch <= 0x36f)  ||
1538            (ch >= 0x370  && ch <= 0x37d)  ||
1539            (ch >= 0x37f  && ch <= 0x1fff) ||
1540            (ch >= 0x200c && ch <= 0x200d) ||
1541            (ch >= 0x203f && ch <= 0x2040) ||
1542            (ch >= 0x2070 && ch <= 0x218f) ||
1543            (ch >= 0x2c00 && ch <= 0x2fef) ||
1544            (ch >= 0x3001 && ch <= 0xd7ff) ||
1545            (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1546            (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1547            (ch >= 0xf900 && ch <= 0xfdcf) ||
1548            (ch >= 0xfdf0 && ch <= 0xfffd);
1549 }
1550 
1551 static inline BOOL is_namechar(WCHAR ch)
1552 {
1553     return (ch == ':') || is_ncnamechar(ch);
1554 }
1555 
1556 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1557 {
1558     /* When we're on attribute always return attribute type, container node type is kept.
1559        Note that container is not necessarily an element, and attribute doesn't mean it's
1560        an attribute in XML spec terms. */
1561     return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1562 }
1563 
1564 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1565                             [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1566                             [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1567    [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1568    [5]  Name     ::= NameStartChar (NameChar)* */
1569 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1570 {
1571     WCHAR *ptr;
1572     UINT start;
1573 
1574     if (reader->resume[XmlReadResume_Name])
1575     {
1576         start = reader->resume[XmlReadResume_Name];
1577         ptr = reader_get_ptr(reader);
1578     }
1579     else
1580     {
1581         ptr = reader_get_ptr(reader);
1582         start = reader_get_cur(reader);
1583         if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1584     }
1585 
1586     while (is_namechar(*ptr))
1587     {
1588         reader_skipn(reader, 1);
1589         ptr = reader_get_ptr(reader);
1590     }
1591 
1592     if (is_reader_pending(reader))
1593     {
1594         reader->resume[XmlReadResume_Name] = start;
1595         return E_PENDING;
1596     }
1597     else
1598         reader->resume[XmlReadResume_Name] = 0;
1599 
1600     reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1601     TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1602 
1603     return S_OK;
1604 }
1605 
1606 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1607 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1608 {
1609     static const WCHAR xmlW[] = {'x','m','l'};
1610     static const strval xmlval = { (WCHAR*)xmlW, 3 };
1611     strval name;
1612     WCHAR *ptr;
1613     HRESULT hr;
1614     UINT i;
1615 
1616     hr = reader_parse_name(reader, &name);
1617     if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1618 
1619     /* now that we got name check for illegal content */
1620     if (strval_eq(reader, &name, &xmlval))
1621         return WC_E_LEADINGXML;
1622 
1623     /* PITarget can't be a qualified name */
1624     ptr = reader_get_strptr(reader, &name);
1625     for (i = 0; i < name.len; i++)
1626         if (ptr[i] == ':')
1627             return i ? NC_E_NAMECOLON : WC_E_PI;
1628 
1629     TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1630     *target = name;
1631     return S_OK;
1632 }
1633 
1634 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1635 static HRESULT reader_parse_pi(xmlreader *reader)
1636 {
1637     strval target;
1638     WCHAR *ptr;
1639     UINT start;
1640     HRESULT hr;
1641 
1642     switch (reader->resumestate)
1643     {
1644     case XmlReadResumeState_Initial:
1645         /* skip '<?' */
1646         reader_skipn(reader, 2);
1647         reader_shrink(reader);
1648         reader->resumestate = XmlReadResumeState_PITarget;
1649     case XmlReadResumeState_PITarget:
1650         hr = reader_parse_pitarget(reader, &target);
1651         if (FAILED(hr)) return hr;
1652         reader_set_strvalue(reader, StringValue_LocalName, &target);
1653         reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1654         reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1655         reader->resumestate = XmlReadResumeState_PIBody;
1656         reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1657     default:
1658         ;
1659     }
1660 
1661     start = reader->resume[XmlReadResume_Body];
1662     ptr = reader_get_ptr(reader);
1663     while (*ptr)
1664     {
1665         if (ptr[0] == '?')
1666         {
1667             if (ptr[1] == '>')
1668             {
1669                 UINT cur = reader_get_cur(reader);
1670                 strval value;
1671 
1672                 /* strip all leading whitespace chars */
1673                 while (start < cur)
1674                 {
1675                     ptr = reader_get_ptr2(reader, start);
1676                     if (!is_wchar_space(*ptr)) break;
1677                     start++;
1678                 }
1679 
1680                 reader_init_strvalue(start, cur-start, &value);
1681 
1682                 /* skip '?>' */
1683                 reader_skipn(reader, 2);
1684                 TRACE("%s\n", debug_strval(reader, &value));
1685                 reader->nodetype = XmlNodeType_ProcessingInstruction;
1686                 reader->resumestate = XmlReadResumeState_Initial;
1687                 reader->resume[XmlReadResume_Body] = 0;
1688                 reader_set_strvalue(reader, StringValue_Value, &value);
1689                 return S_OK;
1690             }
1691         }
1692 
1693         reader_skipn(reader, 1);
1694         ptr = reader_get_ptr(reader);
1695     }
1696 
1697     return S_OK;
1698 }
1699 
1700 /* This one is used to parse significant whitespace nodes, like in Misc production */
1701 static HRESULT reader_parse_whitespace(xmlreader *reader)
1702 {
1703     switch (reader->resumestate)
1704     {
1705     case XmlReadResumeState_Initial:
1706         reader_shrink(reader);
1707         reader->resumestate = XmlReadResumeState_Whitespace;
1708         reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1709         reader->nodetype = XmlNodeType_Whitespace;
1710         reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1711         reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1712         reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1713         /* fallthrough */
1714     case XmlReadResumeState_Whitespace:
1715     {
1716         strval value;
1717         UINT start;
1718 
1719         reader_skipspaces(reader);
1720         if (is_reader_pending(reader)) return S_OK;
1721 
1722         start = reader->resume[XmlReadResume_Body];
1723         reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1724         reader_set_strvalue(reader, StringValue_Value, &value);
1725         TRACE("%s\n", debug_strval(reader, &value));
1726         reader->resumestate = XmlReadResumeState_Initial;
1727     }
1728     default:
1729         ;
1730     }
1731 
1732     return S_OK;
1733 }
1734 
1735 /* [27] Misc ::= Comment | PI | S */
1736 static HRESULT reader_parse_misc(xmlreader *reader)
1737 {
1738     HRESULT hr = S_FALSE;
1739 
1740     if (reader->resumestate != XmlReadResumeState_Initial)
1741     {
1742         hr = reader_more(reader);
1743         if (FAILED(hr)) return hr;
1744 
1745         /* finish current node */
1746         switch (reader->resumestate)
1747         {
1748         case XmlReadResumeState_PITarget:
1749         case XmlReadResumeState_PIBody:
1750             return reader_parse_pi(reader);
1751         case XmlReadResumeState_Comment:
1752             return reader_parse_comment(reader);
1753         case XmlReadResumeState_Whitespace:
1754             return reader_parse_whitespace(reader);
1755         default:
1756             ERR("unknown resume state %d\n", reader->resumestate);
1757         }
1758     }
1759 
1760     while (1)
1761     {
1762         const WCHAR *cur = reader_get_ptr(reader);
1763 
1764         if (is_wchar_space(*cur))
1765             hr = reader_parse_whitespace(reader);
1766         else if (!reader_cmp(reader, commentW))
1767             hr = reader_parse_comment(reader);
1768         else if (!reader_cmp(reader, piW))
1769             hr = reader_parse_pi(reader);
1770         else
1771             break;
1772 
1773         if (hr != S_FALSE) return hr;
1774     }
1775 
1776     return hr;
1777 }
1778 
1779 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1780 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1781 {
1782     WCHAR *cur = reader_get_ptr(reader), quote;
1783     UINT start;
1784 
1785     if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1786 
1787     quote = *cur;
1788     reader_skipn(reader, 1);
1789 
1790     cur = reader_get_ptr(reader);
1791     start = reader_get_cur(reader);
1792     while (is_char(*cur) && *cur != quote)
1793     {
1794         reader_skipn(reader, 1);
1795         cur = reader_get_ptr(reader);
1796     }
1797     reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1798     if (*cur == quote) reader_skipn(reader, 1);
1799 
1800     TRACE("%s\n", debug_strval(reader, literal));
1801     return S_OK;
1802 }
1803 
1804 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1805    [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1806 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1807 {
1808     WCHAR *cur = reader_get_ptr(reader), quote;
1809     UINT start;
1810 
1811     if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1812 
1813     quote = *cur;
1814     reader_skipn(reader, 1);
1815 
1816     start = reader_get_cur(reader);
1817     cur = reader_get_ptr(reader);
1818     while (is_pubchar(*cur) && *cur != quote)
1819     {
1820         reader_skipn(reader, 1);
1821         cur = reader_get_ptr(reader);
1822     }
1823     reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1824     if (*cur == quote) reader_skipn(reader, 1);
1825 
1826     TRACE("%s\n", debug_strval(reader, literal));
1827     return S_OK;
1828 }
1829 
1830 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1831 static HRESULT reader_parse_externalid(xmlreader *reader)
1832 {
1833     static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1834     static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1835     struct reader_position position = reader->position;
1836     strval name, sys;
1837     HRESULT hr;
1838     int cnt;
1839 
1840     if (!reader_cmp(reader, publicW)) {
1841         strval pub;
1842 
1843         /* public id */
1844         reader_skipn(reader, 6);
1845         cnt = reader_skipspaces(reader);
1846         if (!cnt) return WC_E_WHITESPACE;
1847 
1848         hr = reader_parse_pub_literal(reader, &pub);
1849         if (FAILED(hr)) return hr;
1850 
1851         reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1852         hr = reader_add_attr(reader, NULL, &name, NULL, &pub, &position, 0);
1853         if (FAILED(hr)) return hr;
1854 
1855         cnt = reader_skipspaces(reader);
1856         if (!cnt) return S_OK;
1857 
1858         /* optional system id */
1859         hr = reader_parse_sys_literal(reader, &sys);
1860         if (FAILED(hr)) return S_OK;
1861 
1862         reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1863         hr = reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1864         if (FAILED(hr)) return hr;
1865 
1866         return S_OK;
1867     } else if (!reader_cmp(reader, systemW)) {
1868         /* system id */
1869         reader_skipn(reader, 6);
1870         cnt = reader_skipspaces(reader);
1871         if (!cnt) return WC_E_WHITESPACE;
1872 
1873         hr = reader_parse_sys_literal(reader, &sys);
1874         if (FAILED(hr)) return hr;
1875 
1876         reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1877         return reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1878     }
1879 
1880     return S_FALSE;
1881 }
1882 
1883 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1884 static HRESULT reader_parse_dtd(xmlreader *reader)
1885 {
1886     static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1887     strval name;
1888     WCHAR *cur;
1889     HRESULT hr;
1890 
1891     /* check if we have "<!DOCTYPE" */
1892     if (reader_cmp(reader, doctypeW)) return S_FALSE;
1893     reader_shrink(reader);
1894 
1895     /* DTD processing is not allowed by default */
1896     if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1897 
1898     reader_skipn(reader, 9);
1899     if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1900 
1901     /* name */
1902     hr = reader_parse_name(reader, &name);
1903     if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1904 
1905     reader_skipspaces(reader);
1906 
1907     hr = reader_parse_externalid(reader);
1908     if (FAILED(hr)) return hr;
1909 
1910     reader_skipspaces(reader);
1911 
1912     cur = reader_get_ptr(reader);
1913     if (*cur != '>')
1914     {
1915         FIXME("internal subset parsing not implemented\n");
1916         return E_NOTIMPL;
1917     }
1918 
1919     /* skip '>' */
1920     reader_skipn(reader, 1);
1921 
1922     reader->nodetype = XmlNodeType_DocumentType;
1923     reader_set_strvalue(reader, StringValue_LocalName, &name);
1924     reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1925 
1926     return S_OK;
1927 }
1928 
1929 /* [11 NS] LocalPart ::= NCName */
1930 static HRESULT reader_parse_local(xmlreader *reader, strval *local, BOOL check_for_separator)
1931 {
1932     WCHAR *ptr;
1933     UINT start;
1934 
1935     if (reader->resume[XmlReadResume_Local])
1936     {
1937         start = reader->resume[XmlReadResume_Local];
1938         ptr = reader_get_ptr(reader);
1939     }
1940     else
1941     {
1942         ptr = reader_get_ptr(reader);
1943         start = reader_get_cur(reader);
1944     }
1945 
1946     while (is_ncnamechar(*ptr))
1947     {
1948         reader_skipn(reader, 1);
1949         ptr = reader_get_ptr(reader);
1950     }
1951 
1952     if (check_for_separator && *ptr == ':')
1953         return NC_E_QNAMECOLON;
1954 
1955     if (is_reader_pending(reader))
1956     {
1957          reader->resume[XmlReadResume_Local] = start;
1958          return E_PENDING;
1959     }
1960     else
1961          reader->resume[XmlReadResume_Local] = 0;
1962 
1963     reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1964 
1965     return S_OK;
1966 }
1967 
1968 /* [7 NS]  QName ::= PrefixedName | UnprefixedName
1969    [8 NS]  PrefixedName ::= Prefix ':' LocalPart
1970    [9 NS]  UnprefixedName ::= LocalPart
1971    [10 NS] Prefix ::= NCName */
1972 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1973 {
1974     WCHAR *ptr;
1975     UINT start;
1976     HRESULT hr;
1977 
1978     if (reader->resume[XmlReadResume_Name])
1979     {
1980         start = reader->resume[XmlReadResume_Name];
1981         ptr = reader_get_ptr(reader);
1982     }
1983     else
1984     {
1985         ptr = reader_get_ptr(reader);
1986         start = reader_get_cur(reader);
1987         reader->resume[XmlReadResume_Name] = start;
1988         if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1989     }
1990 
1991     if (reader->resume[XmlReadResume_Local])
1992     {
1993         hr = reader_parse_local(reader, local, FALSE);
1994         if (FAILED(hr)) return hr;
1995 
1996         reader_init_strvalue(reader->resume[XmlReadResume_Name],
1997                              local->start - reader->resume[XmlReadResume_Name] - 1,
1998                              prefix);
1999     }
2000     else
2001     {
2002         /* skip prefix part */
2003         while (is_ncnamechar(*ptr))
2004         {
2005             reader_skipn(reader, 1);
2006             ptr = reader_get_ptr(reader);
2007         }
2008 
2009         if (is_reader_pending(reader)) return E_PENDING;
2010 
2011         /* got a qualified name */
2012         if (*ptr == ':')
2013         {
2014             reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
2015 
2016             /* skip ':' */
2017             reader_skipn(reader, 1);
2018             hr = reader_parse_local(reader, local, TRUE);
2019             if (FAILED(hr)) return hr;
2020         }
2021         else
2022         {
2023             reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
2024             reader_init_strvalue(0, 0, prefix);
2025         }
2026     }
2027 
2028     if (prefix->len)
2029         TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
2030     else
2031         TRACE("ncname %s\n", debug_strval(reader, local));
2032 
2033     reader_init_strvalue(prefix->len ? prefix->start : local->start,
2034                         /* count ':' too */
2035                         (prefix->len ? prefix->len + 1 : 0) + local->len,
2036                          qname);
2037 
2038     reader->resume[XmlReadResume_Name] = 0;
2039     reader->resume[XmlReadResume_Local] = 0;
2040 
2041     return S_OK;
2042 }
2043 
2044 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
2045 {
2046     static const WCHAR entltW[]   = {'l','t'};
2047     static const WCHAR entgtW[]   = {'g','t'};
2048     static const WCHAR entampW[]  = {'a','m','p'};
2049     static const WCHAR entaposW[] = {'a','p','o','s'};
2050     static const WCHAR entquotW[] = {'q','u','o','t'};
2051     static const strval lt   = { (WCHAR*)entltW,   2 };
2052     static const strval gt   = { (WCHAR*)entgtW,   2 };
2053     static const strval amp  = { (WCHAR*)entampW,  3 };
2054     static const strval apos = { (WCHAR*)entaposW, 4 };
2055     static const strval quot = { (WCHAR*)entquotW, 4 };
2056     WCHAR *str = reader_get_strptr(reader, name);
2057 
2058     switch (*str)
2059     {
2060     case 'l':
2061         if (strval_eq(reader, name, &lt)) return '<';
2062         break;
2063     case 'g':
2064         if (strval_eq(reader, name, &gt)) return '>';
2065         break;
2066     case 'a':
2067         if (strval_eq(reader, name, &amp))
2068             return '&';
2069         else if (strval_eq(reader, name, &apos))
2070             return '\'';
2071         break;
2072     case 'q':
2073         if (strval_eq(reader, name, &quot)) return '\"';
2074         break;
2075     default:
2076         ;
2077     }
2078 
2079     return 0;
2080 }
2081 
2082 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2083    [67] Reference ::= EntityRef | CharRef
2084    [68] EntityRef ::= '&' Name ';' */
2085 static HRESULT reader_parse_reference(xmlreader *reader)
2086 {
2087     encoded_buffer *buffer = &reader->input->buffer->utf16;
2088     WCHAR *start = reader_get_ptr(reader), *ptr;
2089     UINT cur = reader_get_cur(reader);
2090     WCHAR ch = 0;
2091     int len;
2092 
2093     /* skip '&' */
2094     reader_skipn(reader, 1);
2095     ptr = reader_get_ptr(reader);
2096 
2097     if (*ptr == '#')
2098     {
2099         reader_skipn(reader, 1);
2100         ptr = reader_get_ptr(reader);
2101 
2102         /* hex char or decimal */
2103         if (*ptr == 'x')
2104         {
2105             reader_skipn(reader, 1);
2106             ptr = reader_get_ptr(reader);
2107 
2108             while (*ptr != ';')
2109             {
2110                 if ((*ptr >= '0' && *ptr <= '9'))
2111                     ch = ch*16 + *ptr - '0';
2112                 else if ((*ptr >= 'a' && *ptr <= 'f'))
2113                     ch = ch*16 + *ptr - 'a' + 10;
2114                 else if ((*ptr >= 'A' && *ptr <= 'F'))
2115                     ch = ch*16 + *ptr - 'A' + 10;
2116                 else
2117                     return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2118                 reader_skipn(reader, 1);
2119                 ptr = reader_get_ptr(reader);
2120             }
2121         }
2122         else
2123         {
2124             while (*ptr != ';')
2125             {
2126                 if ((*ptr >= '0' && *ptr <= '9'))
2127                 {
2128                     ch = ch*10 + *ptr - '0';
2129                     reader_skipn(reader, 1);
2130                     ptr = reader_get_ptr(reader);
2131                 }
2132                 else
2133                     return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2134             }
2135         }
2136 
2137         if (!is_char(ch)) return WC_E_XMLCHARACTER;
2138 
2139         /* normalize */
2140         if (is_wchar_space(ch)) ch = ' ';
2141 
2142         ptr = reader_get_ptr(reader);
2143         start = reader_get_ptr2(reader, cur);
2144         len = buffer->written - ((char *)ptr - buffer->data);
2145         memmove(start + 1, ptr + 1, len);
2146 
2147         buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR);
2148         buffer->cur = cur + 1;
2149 
2150         *start = ch;
2151     }
2152     else
2153     {
2154         strval name;
2155         HRESULT hr;
2156 
2157         hr = reader_parse_name(reader, &name);
2158         if (FAILED(hr)) return hr;
2159 
2160         ptr = reader_get_ptr(reader);
2161         if (*ptr != ';') return WC_E_SEMICOLON;
2162 
2163         /* predefined entities resolve to a single character */
2164         ch = get_predefined_entity(reader, &name);
2165         if (ch)
2166         {
2167             len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2168             memmove(start+1, ptr+1, len);
2169             buffer->cur = cur + 1;
2170             buffer->written -= (ptr - start) * sizeof(WCHAR);
2171 
2172             *start = ch;
2173         }
2174         else
2175         {
2176             FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2177             return WC_E_UNDECLAREDENTITY;
2178         }
2179 
2180     }
2181 
2182     return S_OK;
2183 }
2184 
2185 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2186 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2187 {
2188     WCHAR *ptr, quote;
2189     UINT start;
2190 
2191     ptr = reader_get_ptr(reader);
2192 
2193     /* skip opening quote */
2194     quote = *ptr;
2195     if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2196     reader_skipn(reader, 1);
2197 
2198     ptr = reader_get_ptr(reader);
2199     start = reader_get_cur(reader);
2200     while (*ptr)
2201     {
2202         if (*ptr == '<') return WC_E_LESSTHAN;
2203 
2204         if (*ptr == quote)
2205         {
2206             reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2207             /* skip closing quote */
2208             reader_skipn(reader, 1);
2209             return S_OK;
2210         }
2211 
2212         if (*ptr == '&')
2213         {
2214             HRESULT hr = reader_parse_reference(reader);
2215             if (FAILED(hr)) return hr;
2216         }
2217         else
2218         {
2219             /* replace all whitespace chars with ' ' */
2220             if (is_wchar_space(*ptr)) *ptr = ' ';
2221             reader_skipn(reader, 1);
2222         }
2223         ptr = reader_get_ptr(reader);
2224     }
2225 
2226     return WC_E_QUOTE;
2227 }
2228 
2229 /* [1  NS] NSAttName ::= PrefixedAttName | DefaultAttName
2230    [2  NS] PrefixedAttName ::= 'xmlns:' NCName
2231    [3  NS] DefaultAttName  ::= 'xmlns'
2232    [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2233 static HRESULT reader_parse_attribute(xmlreader *reader)
2234 {
2235     struct reader_position position = reader->position;
2236     strval prefix, local, qname, value;
2237     enum attribute_flags flags = 0;
2238     HRESULT hr;
2239 
2240     hr = reader_parse_qname(reader, &prefix, &local, &qname);
2241     if (FAILED(hr)) return hr;
2242 
2243     if (strval_eq(reader, &prefix, &strval_xmlns))
2244         flags |= ATTRIBUTE_NS_DEFINITION;
2245 
2246     if (strval_eq(reader, &qname, &strval_xmlns))
2247         flags |= ATTRIBUTE_DEFAULT_NS_DEFINITION;
2248 
2249     hr = reader_parse_eq(reader);
2250     if (FAILED(hr)) return hr;
2251 
2252     hr = reader_parse_attvalue(reader, &value);
2253     if (FAILED(hr)) return hr;
2254 
2255     if (flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
2256         reader_push_ns(reader, &local, &value, !!(flags & ATTRIBUTE_DEFAULT_NS_DEFINITION));
2257 
2258     TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2259     return reader_add_attr(reader, &prefix, &local, &qname, &value, &position, flags);
2260 }
2261 
2262 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2263    [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2264 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname)
2265 {
2266     struct reader_position position = reader->position;
2267     HRESULT hr;
2268 
2269     hr = reader_parse_qname(reader, prefix, local, qname);
2270     if (FAILED(hr)) return hr;
2271 
2272     for (;;)
2273     {
2274         static const WCHAR endW[] = {'/','>',0};
2275 
2276         reader_skipspaces(reader);
2277 
2278         /* empty element */
2279         if ((reader->is_empty_element = !reader_cmp(reader, endW)))
2280         {
2281             struct element *element = &reader->empty_element;
2282 
2283             /* skip '/>' */
2284             reader_skipn(reader, 2);
2285 
2286             reader_free_strvalued(reader, &element->qname);
2287             reader_free_strvalued(reader, &element->localname);
2288 
2289             element->prefix = *prefix;
2290             reader_strvaldup(reader, qname, &element->qname);
2291             reader_strvaldup(reader, local, &element->localname);
2292             element->position = position;
2293             reader_mark_ns_nodes(reader, element);
2294             return S_OK;
2295         }
2296 
2297         /* got a start tag */
2298         if (!reader_cmp(reader, gtW))
2299         {
2300             /* skip '>' */
2301             reader_skipn(reader, 1);
2302             return reader_push_element(reader, prefix, local, qname, &position);
2303         }
2304 
2305         hr = reader_parse_attribute(reader);
2306         if (FAILED(hr)) return hr;
2307     }
2308 
2309     return S_OK;
2310 }
2311 
2312 /* [39] element ::= EmptyElemTag | STag content ETag */
2313 static HRESULT reader_parse_element(xmlreader *reader)
2314 {
2315     HRESULT hr;
2316 
2317     switch (reader->resumestate)
2318     {
2319     case XmlReadResumeState_Initial:
2320         /* check if we are really on element */
2321         if (reader_cmp(reader, ltW)) return S_FALSE;
2322 
2323         /* skip '<' */
2324         reader_skipn(reader, 1);
2325 
2326         reader_shrink(reader);
2327         reader->resumestate = XmlReadResumeState_STag;
2328     case XmlReadResumeState_STag:
2329     {
2330         strval qname, prefix, local;
2331 
2332         /* this handles empty elements too */
2333         hr = reader_parse_stag(reader, &prefix, &local, &qname);
2334         if (FAILED(hr)) return hr;
2335 
2336         /* FIXME: need to check for defined namespace to reject invalid prefix */
2337 
2338         /* if we got empty element and stack is empty go straight to Misc */
2339         if (reader->is_empty_element && list_empty(&reader->elements))
2340             reader->instate = XmlReadInState_MiscEnd;
2341         else
2342             reader->instate = XmlReadInState_Content;
2343 
2344         reader->nodetype = XmlNodeType_Element;
2345         reader->resumestate = XmlReadResumeState_Initial;
2346         reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2347         reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2348         reader_set_strvalue(reader, StringValue_Value, &strval_empty);
2349         break;
2350     }
2351     default:
2352         hr = E_FAIL;
2353     }
2354 
2355     return hr;
2356 }
2357 
2358 /* [13 NS] ETag ::= '</' QName S? '>' */
2359 static HRESULT reader_parse_endtag(xmlreader *reader)
2360 {
2361     struct reader_position position;
2362     strval prefix, local, qname;
2363     struct element *element;
2364     HRESULT hr;
2365 
2366     /* skip '</' */
2367     reader_skipn(reader, 2);
2368 
2369     position = reader->position;
2370     hr = reader_parse_qname(reader, &prefix, &local, &qname);
2371     if (FAILED(hr)) return hr;
2372 
2373     reader_skipspaces(reader);
2374 
2375     if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2376 
2377     /* skip '>' */
2378     reader_skipn(reader, 1);
2379 
2380     /* Element stack should never be empty at this point, cause we shouldn't get to
2381        content parsing if it's empty. */
2382     element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2383     if (!strval_eq(reader, &element->qname, &qname)) return WC_E_ELEMENTMATCH;
2384 
2385     /* update position stored for start tag, we won't be using it */
2386     element->position = position;
2387 
2388     reader->nodetype = XmlNodeType_EndElement;
2389     reader->is_empty_element = FALSE;
2390     reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2391 
2392     return S_OK;
2393 }
2394 
2395 /* [18] CDSect ::= CDStart CData CDEnd
2396    [19] CDStart ::= '<![CDATA['
2397    [20] CData ::= (Char* - (Char* ']]>' Char*))
2398    [21] CDEnd ::= ']]>' */
2399 static HRESULT reader_parse_cdata(xmlreader *reader)
2400 {
2401     WCHAR *ptr;
2402     UINT start;
2403 
2404     if (reader->resumestate == XmlReadResumeState_CDATA)
2405     {
2406         start = reader->resume[XmlReadResume_Body];
2407         ptr = reader_get_ptr(reader);
2408     }
2409     else
2410     {
2411         /* skip markup '<![CDATA[' */
2412         reader_skipn(reader, 9);
2413         reader_shrink(reader);
2414         ptr = reader_get_ptr(reader);
2415         start = reader_get_cur(reader);
2416         reader->nodetype = XmlNodeType_CDATA;
2417         reader->resume[XmlReadResume_Body] = start;
2418         reader->resumestate = XmlReadResumeState_CDATA;
2419         reader_set_strvalue(reader, StringValue_Value, NULL);
2420     }
2421 
2422     while (*ptr)
2423     {
2424         if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2425         {
2426             strval value;
2427 
2428             reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2429 
2430             /* skip ']]>' */
2431             reader_skipn(reader, 3);
2432             TRACE("%s\n", debug_strval(reader, &value));
2433 
2434             reader_set_strvalue(reader, StringValue_Value, &value);
2435             reader->resume[XmlReadResume_Body] = 0;
2436             reader->resumestate = XmlReadResumeState_Initial;
2437             return S_OK;
2438         }
2439         else
2440         {
2441             reader_skipn(reader, 1);
2442             ptr++;
2443         }
2444     }
2445 
2446     return S_OK;
2447 }
2448 
2449 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2450 static HRESULT reader_parse_chardata(xmlreader *reader)
2451 {
2452     struct reader_position position;
2453     WCHAR *ptr;
2454     UINT start;
2455 
2456     if (reader->resumestate == XmlReadResumeState_CharData)
2457     {
2458         start = reader->resume[XmlReadResume_Body];
2459         ptr = reader_get_ptr(reader);
2460     }
2461     else
2462     {
2463         reader_shrink(reader);
2464         ptr = reader_get_ptr(reader);
2465         start = reader_get_cur(reader);
2466         /* There's no text */
2467         if (!*ptr || *ptr == '<') return S_OK;
2468         reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2469         reader->resume[XmlReadResume_Body] = start;
2470         reader->resumestate = XmlReadResumeState_CharData;
2471         reader_set_strvalue(reader, StringValue_Value, NULL);
2472     }
2473 
2474     position = reader->position;
2475     while (*ptr)
2476     {
2477         static const WCHAR ampW[] = {'&',0};
2478 
2479         /* CDATA closing sequence ']]>' is not allowed */
2480         if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2481             return WC_E_CDSECTEND;
2482 
2483         /* Found next markup part */
2484         if (ptr[0] == '<')
2485         {
2486             strval value;
2487 
2488             reader->empty_element.position = position;
2489             reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2490             reader_set_strvalue(reader, StringValue_Value, &value);
2491             reader->resume[XmlReadResume_Body] = 0;
2492             reader->resumestate = XmlReadResumeState_Initial;
2493             return S_OK;
2494         }
2495 
2496         /* this covers a case when text has leading whitespace chars */
2497         if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2498 
2499         if (!reader_cmp(reader, ampW))
2500             reader_parse_reference(reader);
2501         else
2502             reader_skipn(reader, 1);
2503 
2504         ptr = reader_get_ptr(reader);
2505     }
2506 
2507     return S_OK;
2508 }
2509 
2510 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2511 static HRESULT reader_parse_content(xmlreader *reader)
2512 {
2513     static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2514     static const WCHAR etagW[] = {'<','/',0};
2515 
2516     if (reader->resumestate != XmlReadResumeState_Initial)
2517     {
2518         switch (reader->resumestate)
2519         {
2520         case XmlReadResumeState_CDATA:
2521             return reader_parse_cdata(reader);
2522         case XmlReadResumeState_Comment:
2523             return reader_parse_comment(reader);
2524         case XmlReadResumeState_PIBody:
2525         case XmlReadResumeState_PITarget:
2526             return reader_parse_pi(reader);
2527         case XmlReadResumeState_CharData:
2528             return reader_parse_chardata(reader);
2529         default:
2530             ERR("unknown resume state %d\n", reader->resumestate);
2531         }
2532     }
2533 
2534     reader_shrink(reader);
2535 
2536     /* handle end tag here, it indicates end of content as well */
2537     if (!reader_cmp(reader, etagW))
2538         return reader_parse_endtag(reader);
2539 
2540     if (!reader_cmp(reader, commentW))
2541         return reader_parse_comment(reader);
2542 
2543     if (!reader_cmp(reader, piW))
2544         return reader_parse_pi(reader);
2545 
2546     if (!reader_cmp(reader, cdstartW))
2547         return reader_parse_cdata(reader);
2548 
2549     if (!reader_cmp(reader, ltW))
2550         return reader_parse_element(reader);
2551 
2552     /* what's left must be CharData */
2553     return reader_parse_chardata(reader);
2554 }
2555 
2556 static HRESULT reader_parse_nextnode(xmlreader *reader)
2557 {
2558     XmlNodeType nodetype = reader_get_nodetype(reader);
2559     HRESULT hr;
2560 
2561     if (!is_reader_pending(reader))
2562     {
2563         reader->chunk_read_off = 0;
2564         reader_clear_attrs(reader);
2565     }
2566 
2567     /* When moving from EndElement or empty element, pop its own namespace definitions */
2568     switch (nodetype)
2569     {
2570     case XmlNodeType_Attribute:
2571         reader_dec_depth(reader);
2572         /* fallthrough */
2573     case XmlNodeType_Element:
2574         if (reader->is_empty_element)
2575             reader_pop_ns_nodes(reader, &reader->empty_element);
2576         else if (FAILED(hr = reader_inc_depth(reader)))
2577             return hr;
2578         break;
2579     case XmlNodeType_EndElement:
2580         reader_pop_element(reader);
2581         reader_dec_depth(reader);
2582         break;
2583     default:
2584         ;
2585     }
2586 
2587     for (;;)
2588     {
2589         switch (reader->instate)
2590         {
2591         /* if it's a first call for a new input we need to detect stream encoding */
2592         case XmlReadInState_Initial:
2593             {
2594                 xml_encoding enc;
2595 
2596                 hr = readerinput_growraw(reader->input);
2597                 if (FAILED(hr)) return hr;
2598 
2599                 reader->position.line_number = 1;
2600                 reader->position.line_position = 1;
2601 
2602                 /* try to detect encoding by BOM or data and set input code page */
2603                 hr = readerinput_detectencoding(reader->input, &enc);
2604                 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" :
2605                         debugstr_w(xml_encoding_map[enc].name), hr);
2606                 if (FAILED(hr)) return hr;
2607 
2608                 /* always switch first time cause we have to put something in */
2609                 readerinput_switchencoding(reader->input, enc);
2610 
2611                 /* parse xml declaration */
2612                 hr = reader_parse_xmldecl(reader);
2613                 if (FAILED(hr)) return hr;
2614 
2615                 readerinput_shrinkraw(reader->input, -1);
2616                 reader->instate = XmlReadInState_Misc_DTD;
2617                 if (hr == S_OK) return hr;
2618             }
2619             break;
2620         case XmlReadInState_Misc_DTD:
2621             hr = reader_parse_misc(reader);
2622             if (FAILED(hr)) return hr;
2623 
2624             if (hr == S_FALSE)
2625                 reader->instate = XmlReadInState_DTD;
2626             else
2627                 return hr;
2628             break;
2629         case XmlReadInState_DTD:
2630             hr = reader_parse_dtd(reader);
2631             if (FAILED(hr)) return hr;
2632 
2633             if (hr == S_OK)
2634             {
2635                 reader->instate = XmlReadInState_DTD_Misc;
2636                 return hr;
2637             }
2638             else
2639                 reader->instate = XmlReadInState_Element;
2640             break;
2641         case XmlReadInState_DTD_Misc:
2642             hr = reader_parse_misc(reader);
2643             if (FAILED(hr)) return hr;
2644 
2645             if (hr == S_FALSE)
2646                 reader->instate = XmlReadInState_Element;
2647             else
2648                 return hr;
2649             break;
2650         case XmlReadInState_Element:
2651             return reader_parse_element(reader);
2652         case XmlReadInState_Content:
2653             return reader_parse_content(reader);
2654         case XmlReadInState_MiscEnd:
2655             hr = reader_parse_misc(reader);
2656             if (hr != S_FALSE) return hr;
2657 
2658             if (*reader_get_ptr(reader))
2659             {
2660                 WARN("found garbage in the end of XML\n");
2661                 return WC_E_SYNTAX;
2662             }
2663 
2664             reader->instate = XmlReadInState_Eof;
2665             reader->state = XmlReadState_EndOfFile;
2666             reader->nodetype = XmlNodeType_None;
2667             return hr;
2668         case XmlReadInState_Eof:
2669             return S_FALSE;
2670         default:
2671             FIXME("internal state %d not handled\n", reader->instate);
2672             return E_NOTIMPL;
2673         }
2674     }
2675 
2676     return E_NOTIMPL;
2677 }
2678 
2679 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2680 {
2681     xmlreader *This = impl_from_IXmlReader(iface);
2682 
2683     TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2684 
2685     if (IsEqualGUID(riid, &IID_IUnknown) ||
2686         IsEqualGUID(riid, &IID_IXmlReader))
2687     {
2688         *ppvObject = iface;
2689     }
2690     else
2691     {
2692         FIXME("interface %s not implemented\n", debugstr_guid(riid));
2693         *ppvObject = NULL;
2694         return E_NOINTERFACE;
2695     }
2696 
2697     IXmlReader_AddRef(iface);
2698 
2699     return S_OK;
2700 }
2701 
2702 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2703 {
2704     xmlreader *This = impl_from_IXmlReader(iface);
2705     ULONG ref = InterlockedIncrement(&This->ref);
2706     TRACE("(%p)->(%d)\n", This, ref);
2707     return ref;
2708 }
2709 
2710 static void reader_clear_ns(xmlreader *reader)
2711 {
2712     struct ns *ns, *ns2;
2713 
2714     LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2715         list_remove(&ns->entry);
2716         reader_free_strvalued(reader, &ns->prefix);
2717         reader_free_strvalued(reader, &ns->uri);
2718         reader_free(reader, ns);
2719     }
2720 
2721     LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2722         list_remove(&ns->entry);
2723         reader_free_strvalued(reader, &ns->uri);
2724         reader_free(reader, ns);
2725     }
2726 }
2727 
2728 static void reader_reset_parser(xmlreader *reader)
2729 {
2730     reader->position.line_number = 0;
2731     reader->position.line_position = 0;
2732 
2733     reader_clear_elements(reader);
2734     reader_clear_attrs(reader);
2735     reader_clear_ns(reader);
2736     reader_free_strvalues(reader);
2737 
2738     reader->depth = 0;
2739     reader->nodetype = XmlNodeType_None;
2740     reader->resumestate = XmlReadResumeState_Initial;
2741     memset(reader->resume, 0, sizeof(reader->resume));
2742     reader->is_empty_element = FALSE;
2743 }
2744 
2745 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2746 {
2747     xmlreader *This = impl_from_IXmlReader(iface);
2748     LONG ref = InterlockedDecrement(&This->ref);
2749 
2750     TRACE("(%p)->(%d)\n", This, ref);
2751 
2752     if (ref == 0)
2753     {
2754         IMalloc *imalloc = This->imalloc;
2755         reader_reset_parser(This);
2756         if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2757         if (This->resolver) IXmlResolver_Release(This->resolver);
2758         if (This->mlang) IUnknown_Release(This->mlang);
2759         reader_free(This, This);
2760         if (imalloc) IMalloc_Release(imalloc);
2761     }
2762 
2763     return ref;
2764 }
2765 
2766 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2767 {
2768     xmlreader *This = impl_from_IXmlReader(iface);
2769     IXmlReaderInput *readerinput;
2770     HRESULT hr;
2771 
2772     TRACE("(%p)->(%p)\n", This, input);
2773 
2774     if (This->input)
2775     {
2776         readerinput_release_stream(This->input);
2777         IUnknown_Release(&This->input->IXmlReaderInput_iface);
2778         This->input = NULL;
2779     }
2780 
2781     reader_reset_parser(This);
2782 
2783     /* just reset current input */
2784     if (!input)
2785     {
2786         This->state = XmlReadState_Initial;
2787         return S_OK;
2788     }
2789 
2790     /* now try IXmlReaderInput, ISequentialStream, IStream */
2791     hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2792     if (hr == S_OK)
2793     {
2794         if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2795             This->input = impl_from_IXmlReaderInput(readerinput);
2796         else
2797         {
2798             ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2799                 readerinput, readerinput->lpVtbl);
2800             IUnknown_Release(readerinput);
2801             return E_FAIL;
2802 
2803         }
2804     }
2805 
2806     if (hr != S_OK || !readerinput)
2807     {
2808         /* create IXmlReaderInput basing on supplied interface */
2809         hr = CreateXmlReaderInputWithEncodingName(input,
2810                                          This->imalloc, NULL, FALSE, NULL, &readerinput);
2811         if (hr != S_OK) return hr;
2812         This->input = impl_from_IXmlReaderInput(readerinput);
2813     }
2814 
2815     /* set stream for supplied IXmlReaderInput */
2816     hr = readerinput_query_for_stream(This->input);
2817     if (hr == S_OK)
2818     {
2819         This->state = XmlReadState_Initial;
2820         This->instate = XmlReadInState_Initial;
2821     }
2822     return hr;
2823 }
2824 
2825 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2826 {
2827     xmlreader *This = impl_from_IXmlReader(iface);
2828 
2829     TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2830 
2831     if (!value) return E_INVALIDARG;
2832 
2833     switch (property)
2834     {
2835         case XmlReaderProperty_MultiLanguage:
2836             *value = (LONG_PTR)This->mlang;
2837             if (This->mlang)
2838                 IUnknown_AddRef(This->mlang);
2839             break;
2840         case XmlReaderProperty_XmlResolver:
2841             *value = (LONG_PTR)This->resolver;
2842             if (This->resolver)
2843                 IXmlResolver_AddRef(This->resolver);
2844             break;
2845         case XmlReaderProperty_DtdProcessing:
2846             *value = This->dtdmode;
2847             break;
2848         case XmlReaderProperty_ReadState:
2849             *value = This->state;
2850             break;
2851         case XmlReaderProperty_MaxElementDepth:
2852             *value = This->max_depth;
2853             break;
2854         default:
2855             FIXME("Unimplemented property (%u)\n", property);
2856             return E_NOTIMPL;
2857     }
2858 
2859     return S_OK;
2860 }
2861 
2862 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2863 {
2864     xmlreader *This = impl_from_IXmlReader(iface);
2865 
2866     TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2867 
2868     switch (property)
2869     {
2870         case XmlReaderProperty_MultiLanguage:
2871             if (This->mlang)
2872                 IUnknown_Release(This->mlang);
2873             This->mlang = (IUnknown*)value;
2874             if (This->mlang)
2875                 IUnknown_AddRef(This->mlang);
2876             if (This->mlang)
2877                 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2878             break;
2879         case XmlReaderProperty_XmlResolver:
2880             if (This->resolver)
2881                 IXmlResolver_Release(This->resolver);
2882             This->resolver = (IXmlResolver*)value;
2883             if (This->resolver)
2884                 IXmlResolver_AddRef(This->resolver);
2885             break;
2886         case XmlReaderProperty_DtdProcessing:
2887             if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2888             This->dtdmode = value;
2889             break;
2890         case XmlReaderProperty_MaxElementDepth:
2891             This->max_depth = value;
2892             break;
2893         default:
2894             FIXME("Unimplemented property (%u)\n", property);
2895             return E_NOTIMPL;
2896     }
2897 
2898     return S_OK;
2899 }
2900 
2901 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2902 {
2903     xmlreader *This = impl_from_IXmlReader(iface);
2904     XmlNodeType oldtype = This->nodetype;
2905     XmlNodeType type;
2906     HRESULT hr;
2907 
2908     TRACE("(%p)->(%p)\n", This, nodetype);
2909 
2910     if (!nodetype)
2911         nodetype = &type;
2912 
2913     switch (This->state)
2914     {
2915     case XmlReadState_Closed:
2916         hr = S_FALSE;
2917         break;
2918     case XmlReadState_Error:
2919         hr = This->error;
2920         break;
2921     default:
2922         hr = reader_parse_nextnode(This);
2923         if (SUCCEEDED(hr) && oldtype == XmlNodeType_None && This->nodetype != oldtype)
2924             This->state = XmlReadState_Interactive;
2925 
2926         if (FAILED(hr))
2927         {
2928             This->state = XmlReadState_Error;
2929             This->nodetype = XmlNodeType_None;
2930             This->depth = 0;
2931             This->error = hr;
2932         }
2933     }
2934 
2935     TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2936     *nodetype = This->nodetype;
2937 
2938     return hr;
2939 }
2940 
2941 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2942 {
2943     xmlreader *This = impl_from_IXmlReader(iface);
2944 
2945     TRACE("(%p)->(%p)\n", This, node_type);
2946 
2947     if (!node_type)
2948         return E_INVALIDARG;
2949 
2950     *node_type = reader_get_nodetype(This);
2951     return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2952 }
2953 
2954 static void reader_set_current_attribute(xmlreader *reader, struct attribute *attr)
2955 {
2956     reader->attr = attr;
2957     reader->chunk_read_off = 0;
2958     reader_set_strvalue(reader, StringValue_Prefix, &attr->prefix);
2959     reader_set_strvalue(reader, StringValue_QualifiedName, &attr->qname);
2960     reader_set_strvalue(reader, StringValue_Value, &attr->value);
2961 }
2962 
2963 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2964 {
2965     if (!reader->attr_count)
2966         return S_FALSE;
2967 
2968     if (!reader->attr)
2969         reader_inc_depth(reader);
2970 
2971     reader_set_current_attribute(reader, LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry));
2972 
2973     return S_OK;
2974 }
2975 
2976 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2977 {
2978     xmlreader *This = impl_from_IXmlReader(iface);
2979 
2980     TRACE("(%p)\n", This);
2981 
2982     return reader_move_to_first_attribute(This);
2983 }
2984 
2985 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2986 {
2987     xmlreader *This = impl_from_IXmlReader(iface);
2988     const struct list *next;
2989 
2990     TRACE("(%p)\n", This);
2991 
2992     if (!This->attr_count) return S_FALSE;
2993 
2994     if (!This->attr)
2995         return reader_move_to_first_attribute(This);
2996 
2997     next = list_next(&This->attrs, &This->attr->entry);
2998     if (next)
2999         reader_set_current_attribute(This, LIST_ENTRY(next, struct attribute, entry));
3000 
3001     return next ? S_OK : S_FALSE;
3002 }
3003 
3004 static void reader_get_attribute_ns_uri(xmlreader *reader, struct attribute *attr, const WCHAR **uri, UINT *len)
3005 {
3006     static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3007             '2','0','0','0','/','x','m','l','n','s','/',0};
3008     static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3009             'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
3010 
3011     /* Check for reserved prefixes first */
3012     if ((strval_eq(reader, &attr->prefix, &strval_empty) && strval_eq(reader, &attr->localname, &strval_xmlns)) ||
3013             strval_eq(reader, &attr->prefix, &strval_xmlns))
3014     {
3015         *uri = xmlns_uriW;
3016         *len = ARRAY_SIZE(xmlns_uriW) - 1;
3017     }
3018     else if (strval_eq(reader, &attr->prefix, &strval_xml))
3019     {
3020         *uri = xml_uriW;
3021         *len = ARRAY_SIZE(xml_uriW) - 1;
3022     }
3023     else
3024     {
3025         *uri = NULL;
3026         *len = 0;
3027     }
3028 
3029     if (!*uri)
3030     {
3031         struct ns *ns;
3032 
3033         if ((ns = reader_lookup_ns(reader, &attr->prefix)))
3034         {
3035             *uri = ns->uri.str;
3036             *len = ns->uri.len;
3037         }
3038         else
3039         {
3040             *uri = emptyW;
3041             *len = 0;
3042         }
3043     }
3044 }
3045 
3046 static void reader_get_attribute_local_name(xmlreader *reader, struct attribute *attr, const WCHAR **name, UINT *len)
3047 {
3048     if (attr->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3049     {
3050         *name = xmlnsW;
3051         *len = 5;
3052     }
3053     else if (attr->flags & ATTRIBUTE_NS_DEFINITION)
3054     {
3055         const struct ns *ns = reader_lookup_ns(reader, &attr->localname);
3056         *name = ns->prefix.str;
3057         *len = ns->prefix.len;
3058     }
3059     else
3060     {
3061         *name = attr->localname.str;
3062         *len = attr->localname.len;
3063     }
3064 }
3065 
3066 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
3067     const WCHAR *local_name, const WCHAR *namespace_uri)
3068 {
3069     xmlreader *This = impl_from_IXmlReader(iface);
3070     UINT target_name_len, target_uri_len;
3071     struct attribute *attr;
3072 
3073     TRACE("(%p)->(%s %s)\n", This, debugstr_w(local_name), debugstr_w(namespace_uri));
3074 
3075     if (!local_name)
3076         return E_INVALIDARG;
3077 
3078     if (!This->attr_count)
3079         return S_FALSE;
3080 
3081     if (!namespace_uri)
3082         namespace_uri = emptyW;
3083 
3084     target_name_len = strlenW(local_name);
3085     target_uri_len = strlenW(namespace_uri);
3086 
3087     LIST_FOR_EACH_ENTRY(attr, &This->attrs, struct attribute, entry)
3088     {
3089         UINT name_len, uri_len;
3090         const WCHAR *name, *uri;
3091 
3092         reader_get_attribute_local_name(This, attr, &name, &name_len);
3093         reader_get_attribute_ns_uri(This, attr, &uri, &uri_len);
3094 
3095         if (name_len == target_name_len && uri_len == target_uri_len &&
3096                 !strcmpW(name, local_name) && !strcmpW(uri, namespace_uri))
3097         {
3098             reader_set_current_attribute(This, attr);
3099             return S_OK;
3100         }
3101     }
3102 
3103     return S_FALSE;
3104 }
3105 
3106 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
3107 {
3108     xmlreader *This = impl_from_IXmlReader(iface);
3109 
3110     TRACE("(%p)\n", This);
3111 
3112     if (!This->attr_count) return S_FALSE;
3113 
3114     if (This->attr)
3115         reader_dec_depth(This);
3116 
3117     This->attr = NULL;
3118 
3119     /* FIXME: support other node types with 'attributes' like DTD */
3120     if (This->is_empty_element) {
3121         reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix);
3122         reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
3123     }
3124     else {
3125         struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
3126         if (element) {
3127             reader_set_strvalue(This, StringValue_Prefix, &element->prefix);
3128             reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
3129         }
3130     }
3131     This->chunk_read_off = 0;
3132     reader_set_strvalue(This, StringValue_Value, &strval_empty);
3133 
3134     return S_OK;
3135 }
3136 
3137 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3138 {
3139     xmlreader *This = impl_from_IXmlReader(iface);
3140     struct attribute *attribute = This->attr;
3141     struct element *element;
3142     UINT length;
3143 
3144     TRACE("(%p)->(%p %p)\n", This, name, len);
3145 
3146     if (!len)
3147         len = &length;
3148 
3149     switch (reader_get_nodetype(This))
3150     {
3151     case XmlNodeType_Text:
3152     case XmlNodeType_CDATA:
3153     case XmlNodeType_Comment:
3154     case XmlNodeType_Whitespace:
3155         *name = emptyW;
3156         *len = 0;
3157         break;
3158     case XmlNodeType_Element:
3159     case XmlNodeType_EndElement:
3160         element = reader_get_element(This);
3161         if (element->prefix.len)
3162         {
3163             *name = element->qname.str;
3164             *len = element->qname.len;
3165         }
3166         else
3167         {
3168             *name = element->localname.str;
3169             *len = element->localname.len;
3170         }
3171         break;
3172     case XmlNodeType_Attribute:
3173         if (attribute->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3174         {
3175             *name = xmlnsW;
3176             *len = 5;
3177         } else if (attribute->prefix.len)
3178         {
3179             *name = This->strvalues[StringValue_QualifiedName].str;
3180             *len = This->strvalues[StringValue_QualifiedName].len;
3181         }
3182         else
3183         {
3184             *name = attribute->localname.str;
3185             *len = attribute->localname.len;
3186         }
3187         break;
3188     default:
3189         *name = This->strvalues[StringValue_QualifiedName].str;
3190         *len = This->strvalues[StringValue_QualifiedName].len;
3191         break;
3192     }
3193 
3194     return S_OK;
3195 }
3196 
3197 static struct ns *reader_lookup_nsdef(xmlreader *reader)
3198 {
3199     if (list_empty(&reader->nsdef))
3200         return NULL;
3201 
3202     return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
3203 }
3204 
3205 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
3206 {
3207     xmlreader *This = impl_from_IXmlReader(iface);
3208     const strval *prefix = &This->strvalues[StringValue_Prefix];
3209     XmlNodeType nodetype;
3210     struct ns *ns;
3211     UINT length;
3212 
3213     TRACE("(%p %p %p)\n", iface, uri, len);
3214 
3215     if (!len)
3216         len = &length;
3217 
3218     switch ((nodetype = reader_get_nodetype(This)))
3219     {
3220     case XmlNodeType_Attribute:
3221         reader_get_attribute_ns_uri(This, This->attr, uri, len);
3222         break;
3223     case XmlNodeType_Element:
3224     case XmlNodeType_EndElement:
3225         {
3226             ns = reader_lookup_ns(This, prefix);
3227 
3228             /* pick top default ns if any */
3229             if (!ns)
3230                 ns = reader_lookup_nsdef(This);
3231 
3232             if (ns) {
3233                 *uri = ns->uri.str;
3234                 *len = ns->uri.len;
3235             }
3236             else {
3237                 *uri = emptyW;
3238                 *len = 0;
3239             }
3240         }
3241         break;
3242     case XmlNodeType_Text:
3243     case XmlNodeType_CDATA:
3244     case XmlNodeType_ProcessingInstruction:
3245     case XmlNodeType_Comment:
3246     case XmlNodeType_Whitespace:
3247     case XmlNodeType_XmlDeclaration:
3248         *uri = emptyW;
3249         *len = 0;
3250         break;
3251     default:
3252         FIXME("Unhandled node type %d\n", nodetype);
3253         *uri = NULL;
3254         *len = 0;
3255         return E_NOTIMPL;
3256     }
3257 
3258     return S_OK;
3259 }
3260 
3261 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3262 {
3263     xmlreader *This = impl_from_IXmlReader(iface);
3264     struct element *element;
3265     UINT length;
3266 
3267     TRACE("(%p)->(%p %p)\n", This, name, len);
3268 
3269     if (!len)
3270         len = &length;
3271 
3272     switch (reader_get_nodetype(This))
3273     {
3274     case XmlNodeType_Text:
3275     case XmlNodeType_CDATA:
3276     case XmlNodeType_Comment:
3277     case XmlNodeType_Whitespace:
3278         *name = emptyW;
3279         *len = 0;
3280         break;
3281     case XmlNodeType_Element:
3282     case XmlNodeType_EndElement:
3283         element = reader_get_element(This);
3284         *name = element->localname.str;
3285         *len = element->localname.len;
3286         break;
3287     case XmlNodeType_Attribute:
3288         reader_get_attribute_local_name(This, This->attr, name, len);
3289         break;
3290     default:
3291         *name = This->strvalues[StringValue_LocalName].str;
3292         *len = This->strvalues[StringValue_LocalName].len;
3293         break;
3294     }
3295 
3296     return S_OK;
3297 }
3298 
3299 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, const WCHAR **ret, UINT *len)
3300 {
3301     xmlreader *This = impl_from_IXmlReader(iface);
3302     XmlNodeType nodetype;
3303     UINT length;
3304 
3305     TRACE("(%p)->(%p %p)\n", This, ret, len);
3306 
3307     if (!len)
3308         len = &length;
3309 
3310     *ret = emptyW;
3311     *len = 0;
3312 
3313     switch ((nodetype = reader_get_nodetype(This)))
3314     {
3315     case XmlNodeType_Element:
3316     case XmlNodeType_EndElement:
3317     case XmlNodeType_Attribute:
3318     {
3319         const strval *prefix = &This->strvalues[StringValue_Prefix];
3320         struct ns *ns;
3321 
3322         if (strval_eq(This, prefix, &strval_xml))
3323         {
3324             *ret = xmlW;
3325             *len = 3;
3326         }
3327         else if (strval_eq(This, prefix, &strval_xmlns))
3328         {
3329             *ret = xmlnsW;
3330             *len = 5;
3331         }
3332         else if ((ns = reader_lookup_ns(This, prefix)))
3333         {
3334             *ret = ns->prefix.str;
3335             *len = ns->prefix.len;
3336         }
3337 
3338         break;
3339     }
3340     default:
3341         ;
3342     }
3343 
3344     return S_OK;
3345 }
3346 
3347 static const strval *reader_get_value(xmlreader *reader, BOOL ensure_allocated)
3348 {
3349     strval *val;
3350 
3351     switch (reader_get_nodetype(reader))
3352     {
3353     case XmlNodeType_XmlDeclaration:
3354     case XmlNodeType_EndElement:
3355     case XmlNodeType_None:
3356         return &strval_empty;
3357     case XmlNodeType_Attribute:
3358         /* For namespace definition attributes return values from namespace list */
3359         if (reader->attr->flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
3360         {
3361             struct ns *ns;
3362 
3363             if (!(ns = reader_lookup_ns(reader, &reader->attr->localname)))
3364                 ns = reader_lookup_nsdef(reader);
3365 
3366             return &ns->uri;
3367         }
3368         return &reader->attr->value;
3369     default:
3370         break;
3371     }
3372 
3373     val = &reader->strvalues[StringValue_Value];
3374     if (!val->str && ensure_allocated)
3375     {
3376         WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3377         if (!ptr) return NULL;
3378         memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3379         ptr[val->len] = 0;
3380         val->str = ptr;
3381     }
3382 
3383     return val;
3384 }
3385 
3386 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3387 {
3388     xmlreader *reader = impl_from_IXmlReader(iface);
3389     const strval *val = &reader->strvalues[StringValue_Value];
3390     UINT off;
3391 
3392     TRACE("(%p)->(%p %p)\n", reader, value, len);
3393 
3394     *value = NULL;
3395 
3396     if ((reader->nodetype == XmlNodeType_Comment && !val->str && !val->len) || is_reader_pending(reader))
3397     {
3398         XmlNodeType type;
3399         HRESULT hr;
3400 
3401         hr = IXmlReader_Read(iface, &type);
3402         if (FAILED(hr)) return hr;
3403 
3404         /* return if still pending, partially read values are not reported */
3405         if (is_reader_pending(reader)) return E_PENDING;
3406     }
3407 
3408     val = reader_get_value(reader, TRUE);
3409     if (!val)
3410         return E_OUTOFMEMORY;
3411 
3412     off = abs(reader->chunk_read_off);
3413     assert(off <= val->len);
3414     *value = val->str + off;
3415     if (len) *len = val->len - off;
3416     reader->chunk_read_off = -off;
3417     return S_OK;
3418 }
3419 
3420 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3421 {
3422     xmlreader *reader = impl_from_IXmlReader(iface);
3423     const strval *val;
3424     UINT len = 0;
3425 
3426     TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3427 
3428     val = reader_get_value(reader, FALSE);
3429 
3430     /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3431     if (reader->chunk_read_off >= 0)
3432     {
3433         assert(reader->chunk_read_off <= val->len);
3434         len = min(val->len - reader->chunk_read_off, chunk_size);
3435     }
3436     if (read) *read = len;
3437 
3438     if (len)
3439     {
3440         memcpy(buffer, reader_get_strptr(reader, val) + reader->chunk_read_off, len*sizeof(WCHAR));
3441         reader->chunk_read_off += len;
3442     }
3443 
3444     return len || !chunk_size ? S_OK : S_FALSE;
3445 }
3446 
3447 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3448                                            LPCWSTR *baseUri,
3449                                            UINT *baseUri_length)
3450 {
3451     FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3452     return E_NOTIMPL;
3453 }
3454 
3455 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3456 {
3457     FIXME("(%p): stub\n", iface);
3458     return FALSE;
3459 }
3460 
3461 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3462 {
3463     xmlreader *This = impl_from_IXmlReader(iface);
3464     TRACE("(%p)\n", This);
3465     /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3466        when current node is start tag of an element */
3467     return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3468 }
3469 
3470 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *line_number)
3471 {
3472     xmlreader *This = impl_from_IXmlReader(iface);
3473     const struct element *element;
3474 
3475     TRACE("(%p %p)\n", This, line_number);
3476 
3477     if (!line_number)
3478         return E_INVALIDARG;
3479 
3480     switch (reader_get_nodetype(This))
3481     {
3482     case XmlNodeType_Element:
3483     case XmlNodeType_EndElement:
3484         element = reader_get_element(This);
3485         *line_number = element->position.line_number;
3486         break;
3487     case XmlNodeType_Attribute:
3488         *line_number = This->attr->position.line_number;
3489         break;
3490     case XmlNodeType_Whitespace:
3491     case XmlNodeType_XmlDeclaration:
3492         *line_number = This->empty_element.position.line_number;
3493         break;
3494     default:
3495         *line_number = This->position.line_number;
3496         break;
3497     }
3498 
3499     return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3500 }
3501 
3502 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *line_position)
3503 {
3504     xmlreader *This = impl_from_IXmlReader(iface);
3505     const struct element *element;
3506 
3507     TRACE("(%p %p)\n", This, line_position);
3508 
3509     if (!line_position)
3510         return E_INVALIDARG;
3511 
3512     switch (reader_get_nodetype(This))
3513     {
3514     case XmlNodeType_Element:
3515     case XmlNodeType_EndElement:
3516         element = reader_get_element(This);
3517         *line_position = element->position.line_position;
3518         break;
3519     case XmlNodeType_Attribute:
3520         *line_position = This->attr->position.line_position;
3521         break;
3522     case XmlNodeType_Whitespace:
3523     case XmlNodeType_XmlDeclaration:
3524         *line_position = This->empty_element.position.line_position;
3525         break;
3526     default:
3527         *line_position = This->position.line_position;
3528         break;
3529     }
3530 
3531     return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3532 }
3533 
3534 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3535 {
3536     xmlreader *This = impl_from_IXmlReader(iface);
3537 
3538     TRACE("(%p)->(%p)\n", This, count);
3539 
3540     if (!count) return E_INVALIDARG;
3541 
3542     *count = This->attr_count;
3543     return S_OK;
3544 }
3545 
3546 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3547 {
3548     xmlreader *This = impl_from_IXmlReader(iface);
3549     TRACE("(%p)->(%p)\n", This, depth);
3550     *depth = This->depth;
3551     return S_OK;
3552 }
3553 
3554 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3555 {
3556     xmlreader *This = impl_from_IXmlReader(iface);
3557     TRACE("(%p)\n", iface);
3558     return This->state == XmlReadState_EndOfFile;
3559 }
3560 
3561 static const struct IXmlReaderVtbl xmlreader_vtbl =
3562 {
3563     xmlreader_QueryInterface,
3564     xmlreader_AddRef,
3565     xmlreader_Release,
3566     xmlreader_SetInput,
3567     xmlreader_GetProperty,
3568     xmlreader_SetProperty,
3569     xmlreader_Read,
3570     xmlreader_GetNodeType,
3571     xmlreader_MoveToFirstAttribute,
3572     xmlreader_MoveToNextAttribute,
3573     xmlreader_MoveToAttributeByName,
3574     xmlreader_MoveToElement,
3575     xmlreader_GetQualifiedName,
3576     xmlreader_GetNamespaceUri,
3577     xmlreader_GetLocalName,
3578     xmlreader_GetPrefix,
3579     xmlreader_GetValue,
3580     xmlreader_ReadValueChunk,
3581     xmlreader_GetBaseUri,
3582     xmlreader_IsDefault,
3583     xmlreader_IsEmptyElement,
3584     xmlreader_GetLineNumber,
3585     xmlreader_GetLinePosition,
3586     xmlreader_GetAttributeCount,
3587     xmlreader_GetDepth,
3588     xmlreader_IsEOF
3589 };
3590 
3591 /** IXmlReaderInput **/
3592 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3593 {
3594     xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3595 
3596     TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3597 
3598     if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3599         IsEqualGUID(riid, &IID_IUnknown))
3600     {
3601         *ppvObject = iface;
3602     }
3603     else
3604     {
3605         WARN("interface %s not implemented\n", debugstr_guid(riid));
3606         *ppvObject = NULL;
3607         return E_NOINTERFACE;
3608     }
3609 
3610     IUnknown_AddRef(iface);
3611 
3612     return S_OK;
3613 }
3614 
3615 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3616 {
3617     xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3618     ULONG ref = InterlockedIncrement(&This->ref);
3619     TRACE("(%p)->(%d)\n", This, ref);
3620     return ref;
3621 }
3622 
3623 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3624 {
3625     xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3626     LONG ref = InterlockedDecrement(&This->ref);
3627 
3628     TRACE("(%p)->(%d)\n", This, ref);
3629 
3630     if (ref == 0)
3631     {
3632         IMalloc *imalloc = This->imalloc;
3633         if (This->input) IUnknown_Release(This->input);
3634         if (This->stream) ISequentialStream_Release(This->stream);
3635         if (This->buffer) free_input_buffer(This->buffer);
3636         readerinput_free(This, This->baseuri);
3637         readerinput_free(This, This);
3638         if (imalloc) IMalloc_Release(imalloc);
3639     }
3640 
3641     return ref;
3642 }
3643 
3644 static const struct IUnknownVtbl xmlreaderinputvtbl =
3645 {
3646     xmlreaderinput_QueryInterface,
3647     xmlreaderinput_AddRef,
3648     xmlreaderinput_Release
3649 };
3650 
3651 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3652 {
3653     xmlreader *reader;
3654     HRESULT hr;
3655     int i;
3656 
3657     TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3658 
3659     if (imalloc)
3660         reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3661     else
3662         reader = heap_alloc(sizeof(*reader));
3663     if (!reader)
3664         return E_OUTOFMEMORY;
3665 
3666     memset(reader, 0, sizeof(*reader));
3667     reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3668     reader->ref = 1;
3669     reader->state = XmlReadState_Closed;
3670     reader->instate = XmlReadInState_Initial;
3671     reader->resumestate = XmlReadResumeState_Initial;
3672     reader->dtdmode = DtdProcessing_Prohibit;
3673     reader->imalloc = imalloc;
3674     if (imalloc) IMalloc_AddRef(imalloc);
3675     reader->nodetype = XmlNodeType_None;
3676     list_init(&reader->attrs);
3677     list_init(&reader->nsdef);
3678     list_init(&reader->ns);
3679     list_init(&reader->elements);
3680     reader->max_depth = 256;
3681 
3682     reader->chunk_read_off = 0;
3683     for (i = 0; i < StringValue_Last; i++)
3684         reader->strvalues[i] = strval_empty;
3685 
3686     hr = IXmlReader_QueryInterface(&reader->IXmlReader_iface, riid, obj);
3687     IXmlReader_Release(&reader->IXmlReader_iface);
3688 
3689     TRACE("returning iface %p, hr %#x\n", *obj, hr);
3690 
3691     return hr;
3692 }
3693 
3694 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3695                                                     IMalloc *imalloc,
3696                                                     LPCWSTR encoding,
3697                                                     BOOL hint,
3698                                                     LPCWSTR base_uri,
3699                                                     IXmlReaderInput **ppInput)
3700 {
3701     xmlreaderinput *readerinput;
3702     HRESULT hr;
3703 
3704     TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3705                                        hint, wine_dbgstr_w(base_uri), ppInput);
3706 
3707     if (!stream || !ppInput) return E_INVALIDARG;
3708 
3709     if (imalloc)
3710         readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3711     else
3712         readerinput = heap_alloc(sizeof(*readerinput));
3713     if(!readerinput) return E_OUTOFMEMORY;
3714 
3715     readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3716     readerinput->ref = 1;
3717     readerinput->imalloc = imalloc;
3718     readerinput->stream = NULL;
3719     if (imalloc) IMalloc_AddRef(imalloc);
3720     readerinput->encoding = parse_encoding_name(encoding, -1);
3721     readerinput->hint = hint;
3722     readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3723     readerinput->pending = 0;
3724 
3725     hr = alloc_input_buffer(readerinput);
3726     if (hr != S_OK)
3727     {
3728         readerinput_free(readerinput, readerinput->baseuri);
3729         readerinput_free(readerinput, readerinput);
3730         if (imalloc) IMalloc_Release(imalloc);
3731         return hr;
3732     }
3733     IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3734 
3735     *ppInput = &readerinput->IXmlReaderInput_iface;
3736 
3737     TRACE("returning iface %p\n", *ppInput);
3738 
3739     return S_OK;
3740 }
3741