1 /* 2 * IXmlReader implementation 3 * 4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 19 */ 20 21 #define COBJMACROS 22 23 #include <stdio.h> 24 #include <stdarg.h> 25 #include <assert.h> 26 #include "windef.h" 27 #include "winbase.h" 28 #include "initguid.h" 29 #include "objbase.h" 30 #include "xmllite.h" 31 #include "xmllite_private.h" 32 33 #include "wine/debug.h" 34 #include "wine/list.h" 35 #include "wine/unicode.h" 36 37 WINE_DEFAULT_DEBUG_CHANNEL(xmllite); 38 39 /* not defined in public headers */ 40 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda); 41 42 typedef enum 43 { 44 XmlReadInState_Initial, 45 XmlReadInState_XmlDecl, 46 XmlReadInState_Misc_DTD, 47 XmlReadInState_DTD, 48 XmlReadInState_DTD_Misc, 49 XmlReadInState_Element, 50 XmlReadInState_Content, 51 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */ 52 XmlReadInState_Eof 53 } XmlReaderInternalState; 54 55 /* This state denotes where parsing was interrupted by input problem. 56 Reader resumes parsing using this information. */ 57 typedef enum 58 { 59 XmlReadResumeState_Initial, 60 XmlReadResumeState_PITarget, 61 XmlReadResumeState_PIBody, 62 XmlReadResumeState_CDATA, 63 XmlReadResumeState_Comment, 64 XmlReadResumeState_STag, 65 XmlReadResumeState_CharData, 66 XmlReadResumeState_Whitespace 67 } XmlReaderResumeState; 68 69 /* saved pointer index to resume from particular input position */ 70 typedef enum 71 { 72 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */ 73 XmlReadResume_Local, /* local for QName */ 74 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */ 75 XmlReadResume_Last 76 } XmlReaderResume; 77 78 typedef enum 79 { 80 StringValue_LocalName, 81 StringValue_Prefix, 82 StringValue_QualifiedName, 83 StringValue_Value, 84 StringValue_Last 85 } XmlReaderStringValue; 86 87 static const WCHAR usasciiW[] = {'U','S','-','A','S','C','I','I',0}; 88 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0}; 89 static const WCHAR utf8W[] = {'U','T','F','-','8',0}; 90 91 static const WCHAR dblquoteW[] = {'\"',0}; 92 static const WCHAR quoteW[] = {'\'',0}; 93 static const WCHAR ltW[] = {'<',0}; 94 static const WCHAR gtW[] = {'>',0}; 95 static const WCHAR commentW[] = {'<','!','-','-',0}; 96 static const WCHAR piW[] = {'<','?',0}; 97 98 static BOOL is_namestartchar(WCHAR ch); 99 100 static const char *debugstr_nodetype(XmlNodeType nodetype) 101 { 102 static const char * const type_names[] = 103 { 104 "None", 105 "Element", 106 "Attribute", 107 "Text", 108 "CDATA", 109 "", 110 "", 111 "ProcessingInstruction", 112 "Comment", 113 "", 114 "DocumentType", 115 "", 116 "", 117 "Whitespace", 118 "", 119 "EndElement", 120 "", 121 "XmlDeclaration" 122 }; 123 124 if (nodetype > _XmlNodeType_Last) 125 return wine_dbg_sprintf("unknown type=%d", nodetype); 126 127 return type_names[nodetype]; 128 } 129 130 static const char *debugstr_reader_prop(XmlReaderProperty prop) 131 { 132 static const char * const prop_names[] = 133 { 134 "MultiLanguage", 135 "ConformanceLevel", 136 "RandomAccess", 137 "XmlResolver", 138 "DtdProcessing", 139 "ReadState", 140 "MaxElementDepth", 141 "MaxEntityExpansion" 142 }; 143 144 if (prop > _XmlReaderProperty_Last) 145 return wine_dbg_sprintf("unknown property=%d", prop); 146 147 return prop_names[prop]; 148 } 149 150 struct xml_encoding_data 151 { 152 const WCHAR *name; 153 xml_encoding enc; 154 UINT cp; 155 }; 156 157 static const struct xml_encoding_data xml_encoding_map[] = { 158 { usasciiW, XmlEncoding_USASCII, 20127 }, 159 { utf16W, XmlEncoding_UTF16, 1200 }, 160 { utf8W, XmlEncoding_UTF8, CP_UTF8 }, 161 }; 162 163 const WCHAR *get_encoding_name(xml_encoding encoding) 164 { 165 return xml_encoding_map[encoding].name; 166 } 167 168 xml_encoding get_encoding_from_codepage(UINT codepage) 169 { 170 int i; 171 for (i = 0; i < ARRAY_SIZE(xml_encoding_map); i++) 172 { 173 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc; 174 } 175 return XmlEncoding_Unknown; 176 } 177 178 typedef struct 179 { 180 char *data; 181 UINT cur; 182 unsigned int allocated; 183 unsigned int written; 184 BOOL prev_cr; 185 } encoded_buffer; 186 187 typedef struct input_buffer input_buffer; 188 189 typedef struct 190 { 191 IXmlReaderInput IXmlReaderInput_iface; 192 LONG ref; 193 /* reference passed on IXmlReaderInput creation, is kept when input is created */ 194 IUnknown *input; 195 IMalloc *imalloc; 196 xml_encoding encoding; 197 BOOL hint; 198 WCHAR *baseuri; 199 /* stream reference set after SetInput() call from reader, 200 stored as sequential stream, cause currently 201 optimizations possible with IStream aren't implemented */ 202 ISequentialStream *stream; 203 input_buffer *buffer; 204 unsigned int pending : 1; 205 } xmlreaderinput; 206 207 static const struct IUnknownVtbl xmlreaderinputvtbl; 208 209 /* Structure to hold parsed string of specific length. 210 211 Reader stores node value as 'start' pointer, on request 212 a null-terminated version of it is allocated. 213 214 To init a strval variable use reader_init_strval(), 215 to set strval as a reader value use reader_set_strval(). 216 */ 217 typedef struct 218 { 219 WCHAR *str; /* allocated null-terminated string */ 220 UINT len; /* length in WCHARs, altered after ReadValueChunk */ 221 UINT start; /* input position where value starts */ 222 } strval; 223 224 static WCHAR emptyW[] = {0}; 225 static WCHAR xmlW[] = {'x','m','l',0}; 226 static WCHAR xmlnsW[] = {'x','m','l','n','s',0}; 227 static const strval strval_empty = { emptyW }; 228 static const strval strval_xml = { xmlW, 3 }; 229 static const strval strval_xmlns = { xmlnsW, 5 }; 230 231 struct reader_position 232 { 233 UINT line_number; 234 UINT line_position; 235 }; 236 237 enum attribute_flags 238 { 239 ATTRIBUTE_NS_DEFINITION = 0x1, 240 ATTRIBUTE_DEFAULT_NS_DEFINITION = 0x2, 241 }; 242 243 struct attribute 244 { 245 struct list entry; 246 strval prefix; 247 strval localname; 248 strval qname; 249 strval value; 250 struct reader_position position; 251 unsigned int flags; 252 }; 253 254 struct element 255 { 256 struct list entry; 257 strval prefix; 258 strval localname; 259 strval qname; 260 struct reader_position position; 261 }; 262 263 struct ns 264 { 265 struct list entry; 266 strval prefix; 267 strval uri; 268 struct element *element; 269 }; 270 271 typedef struct 272 { 273 IXmlReader IXmlReader_iface; 274 LONG ref; 275 xmlreaderinput *input; 276 IMalloc *imalloc; 277 XmlReadState state; 278 HRESULT error; /* error set on XmlReadState_Error */ 279 XmlReaderInternalState instate; 280 XmlReaderResumeState resumestate; 281 XmlNodeType nodetype; 282 DtdProcessing dtdmode; 283 IXmlResolver *resolver; 284 IUnknown *mlang; 285 struct reader_position position; 286 struct list attrs; /* attributes list for current node */ 287 struct attribute *attr; /* current attribute */ 288 UINT attr_count; 289 struct list nsdef; 290 struct list ns; 291 struct list elements; 292 int chunk_read_off; 293 strval strvalues[StringValue_Last]; 294 UINT depth; 295 UINT max_depth; 296 BOOL is_empty_element; 297 struct element empty_element; /* used for empty elements without end tag <a />, 298 and to keep <?xml reader position */ 299 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */ 300 } xmlreader; 301 302 struct input_buffer 303 { 304 encoded_buffer utf16; 305 encoded_buffer encoded; 306 UINT code_page; 307 xmlreaderinput *input; 308 }; 309 310 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface) 311 { 312 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface); 313 } 314 315 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface) 316 { 317 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface); 318 } 319 320 /* reader memory allocation functions */ 321 static inline void *reader_alloc(xmlreader *reader, size_t len) 322 { 323 return m_alloc(reader->imalloc, len); 324 } 325 326 static inline void *reader_alloc_zero(xmlreader *reader, size_t len) 327 { 328 void *ret = reader_alloc(reader, len); 329 if (ret) 330 memset(ret, 0, len); 331 return ret; 332 } 333 334 static inline void reader_free(xmlreader *reader, void *mem) 335 { 336 m_free(reader->imalloc, mem); 337 } 338 339 /* Just return pointer from offset, no attempt to read more. */ 340 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset) 341 { 342 encoded_buffer *buffer = &reader->input->buffer->utf16; 343 return (WCHAR*)buffer->data + offset; 344 } 345 346 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v) 347 { 348 return v->str ? v->str : reader_get_ptr2(reader, v->start); 349 } 350 351 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest) 352 { 353 *dest = *src; 354 355 if (src->str != strval_empty.str) 356 { 357 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR)); 358 if (!dest->str) return E_OUTOFMEMORY; 359 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR)); 360 dest->str[dest->len] = 0; 361 dest->start = 0; 362 } 363 364 return S_OK; 365 } 366 367 /* reader input memory allocation functions */ 368 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len) 369 { 370 return m_alloc(input->imalloc, len); 371 } 372 373 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len) 374 { 375 return m_realloc(input->imalloc, mem, len); 376 } 377 378 static inline void readerinput_free(xmlreaderinput *input, void *mem) 379 { 380 m_free(input->imalloc, mem); 381 } 382 383 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str) 384 { 385 LPWSTR ret = NULL; 386 387 if(str) { 388 DWORD size; 389 390 size = (strlenW(str)+1)*sizeof(WCHAR); 391 ret = readerinput_alloc(input, size); 392 if (ret) memcpy(ret, str, size); 393 } 394 395 return ret; 396 } 397 398 /* This one frees stored string value if needed */ 399 static void reader_free_strvalued(xmlreader *reader, strval *v) 400 { 401 if (v->str != strval_empty.str) 402 { 403 reader_free(reader, v->str); 404 *v = strval_empty; 405 } 406 } 407 408 static void reader_clear_attrs(xmlreader *reader) 409 { 410 struct attribute *attr, *attr2; 411 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry) 412 { 413 reader_free_strvalued(reader, &attr->localname); 414 reader_free_strvalued(reader, &attr->value); 415 reader_free(reader, attr); 416 } 417 list_init(&reader->attrs); 418 reader->attr_count = 0; 419 reader->attr = NULL; 420 } 421 422 /* attribute data holds pointers to buffer data, so buffer shrink is not possible 423 while we are on a node with attributes */ 424 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *qname, 425 strval *value, const struct reader_position *position, unsigned int flags) 426 { 427 struct attribute *attr; 428 HRESULT hr; 429 430 attr = reader_alloc(reader, sizeof(*attr)); 431 if (!attr) return E_OUTOFMEMORY; 432 433 hr = reader_strvaldup(reader, localname, &attr->localname); 434 if (hr == S_OK) 435 { 436 hr = reader_strvaldup(reader, value, &attr->value); 437 if (hr != S_OK) 438 reader_free_strvalued(reader, &attr->value); 439 } 440 if (hr != S_OK) 441 { 442 reader_free(reader, attr); 443 return hr; 444 } 445 446 if (prefix) 447 attr->prefix = *prefix; 448 else 449 memset(&attr->prefix, 0, sizeof(attr->prefix)); 450 attr->qname = qname ? *qname : *localname; 451 attr->position = *position; 452 attr->flags = flags; 453 list_add_tail(&reader->attrs, &attr->entry); 454 reader->attr_count++; 455 456 return S_OK; 457 } 458 459 /* Returns current element, doesn't check if reader is actually positioned on it. */ 460 static struct element *reader_get_element(xmlreader *reader) 461 { 462 if (reader->is_empty_element) 463 return &reader->empty_element; 464 465 return LIST_ENTRY(list_head(&reader->elements), struct element, entry); 466 } 467 468 static inline void reader_init_strvalue(UINT start, UINT len, strval *v) 469 { 470 v->start = start; 471 v->len = len; 472 v->str = NULL; 473 } 474 475 static inline const char* debug_strval(const xmlreader *reader, const strval *v) 476 { 477 return debugstr_wn(reader_get_strptr(reader, v), v->len); 478 } 479 480 /* used to initialize from constant string */ 481 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v) 482 { 483 v->start = 0; 484 v->len = len; 485 v->str = str; 486 } 487 488 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type) 489 { 490 reader_free_strvalued(reader, &reader->strvalues[type]); 491 } 492 493 static void reader_free_strvalues(xmlreader *reader) 494 { 495 int type; 496 for (type = 0; type < StringValue_Last; type++) 497 reader_free_strvalue(reader, type); 498 } 499 500 /* This helper should only be used to test if strings are the same, 501 it doesn't try to sort. */ 502 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2) 503 { 504 if (str1->len != str2->len) return 0; 505 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR)); 506 } 507 508 static void reader_clear_elements(xmlreader *reader) 509 { 510 struct element *elem, *elem2; 511 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry) 512 { 513 reader_free_strvalued(reader, &elem->prefix); 514 reader_free_strvalued(reader, &elem->localname); 515 reader_free_strvalued(reader, &elem->qname); 516 reader_free(reader, elem); 517 } 518 list_init(&reader->elements); 519 reader_free_strvalued(reader, &reader->empty_element.localname); 520 reader_free_strvalued(reader, &reader->empty_element.qname); 521 reader->is_empty_element = FALSE; 522 } 523 524 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix) 525 { 526 struct list *nslist = prefix ? &reader->ns : &reader->nsdef; 527 struct ns *ns; 528 529 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) { 530 if (strval_eq(reader, prefix, &ns->prefix)) 531 return ns; 532 } 533 534 return NULL; 535 } 536 537 static HRESULT reader_inc_depth(xmlreader *reader) 538 { 539 return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK; 540 } 541 542 static void reader_dec_depth(xmlreader *reader) 543 { 544 if (reader->depth) 545 reader->depth--; 546 } 547 548 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def) 549 { 550 struct ns *ns; 551 HRESULT hr; 552 553 ns = reader_alloc(reader, sizeof(*ns)); 554 if (!ns) return E_OUTOFMEMORY; 555 556 if (def) 557 memset(&ns->prefix, 0, sizeof(ns->prefix)); 558 else { 559 hr = reader_strvaldup(reader, prefix, &ns->prefix); 560 if (FAILED(hr)) { 561 reader_free(reader, ns); 562 return hr; 563 } 564 } 565 566 hr = reader_strvaldup(reader, uri, &ns->uri); 567 if (FAILED(hr)) { 568 reader_free_strvalued(reader, &ns->prefix); 569 reader_free(reader, ns); 570 return hr; 571 } 572 573 ns->element = NULL; 574 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry); 575 return hr; 576 } 577 578 static void reader_free_element(xmlreader *reader, struct element *element) 579 { 580 reader_free_strvalued(reader, &element->prefix); 581 reader_free_strvalued(reader, &element->localname); 582 reader_free_strvalued(reader, &element->qname); 583 reader_free(reader, element); 584 } 585 586 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element) 587 { 588 struct ns *ns; 589 590 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) { 591 if (ns->element) 592 break; 593 ns->element = element; 594 } 595 596 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) { 597 if (ns->element) 598 break; 599 ns->element = element; 600 } 601 } 602 603 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname, 604 strval *qname, const struct reader_position *position) 605 { 606 struct element *element; 607 HRESULT hr; 608 609 element = reader_alloc_zero(reader, sizeof(*element)); 610 if (!element) 611 return E_OUTOFMEMORY; 612 613 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) == S_OK && 614 (hr = reader_strvaldup(reader, localname, &element->localname)) == S_OK && 615 (hr = reader_strvaldup(reader, qname, &element->qname)) == S_OK) 616 { 617 list_add_head(&reader->elements, &element->entry); 618 reader_mark_ns_nodes(reader, element); 619 reader->is_empty_element = FALSE; 620 element->position = *position; 621 } 622 else 623 reader_free_element(reader, element); 624 625 return hr; 626 } 627 628 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element) 629 { 630 struct ns *ns, *ns2; 631 632 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) { 633 if (ns->element != element) 634 break; 635 636 list_remove(&ns->entry); 637 reader_free_strvalued(reader, &ns->prefix); 638 reader_free_strvalued(reader, &ns->uri); 639 reader_free(reader, ns); 640 } 641 642 if (!list_empty(&reader->nsdef)) { 643 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry); 644 if (ns->element == element) { 645 list_remove(&ns->entry); 646 reader_free_strvalued(reader, &ns->prefix); 647 reader_free_strvalued(reader, &ns->uri); 648 reader_free(reader, ns); 649 } 650 } 651 } 652 653 static void reader_pop_element(xmlreader *reader) 654 { 655 struct element *element; 656 657 if (list_empty(&reader->elements)) 658 return; 659 660 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry); 661 list_remove(&element->entry); 662 663 reader_pop_ns_nodes(reader, element); 664 reader_free_element(reader, element); 665 666 /* It was a root element, the rest is expected as Misc */ 667 if (list_empty(&reader->elements)) 668 reader->instate = XmlReadInState_MiscEnd; 669 } 670 671 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value' 672 means node value is to be determined. */ 673 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value) 674 { 675 strval *v = &reader->strvalues[type]; 676 677 reader_free_strvalue(reader, type); 678 if (!value) 679 { 680 v->str = NULL; 681 v->start = 0; 682 v->len = 0; 683 return; 684 } 685 686 if (value->str == strval_empty.str) 687 *v = *value; 688 else 689 { 690 if (type == StringValue_Value) 691 { 692 /* defer allocation for value string */ 693 v->str = NULL; 694 v->start = value->start; 695 v->len = value->len; 696 } 697 else 698 { 699 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR)); 700 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR)); 701 v->str[value->len] = 0; 702 v->len = value->len; 703 } 704 } 705 } 706 707 static inline int is_reader_pending(xmlreader *reader) 708 { 709 return reader->input->pending; 710 } 711 712 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer) 713 { 714 const int initial_len = 0x2000; 715 buffer->data = readerinput_alloc(input, initial_len); 716 if (!buffer->data) return E_OUTOFMEMORY; 717 718 memset(buffer->data, 0, 4); 719 buffer->cur = 0; 720 buffer->allocated = initial_len; 721 buffer->written = 0; 722 buffer->prev_cr = FALSE; 723 724 return S_OK; 725 } 726 727 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer) 728 { 729 readerinput_free(input, buffer->data); 730 } 731 732 HRESULT get_code_page(xml_encoding encoding, UINT *cp) 733 { 734 if (encoding == XmlEncoding_Unknown) 735 { 736 FIXME("unsupported encoding %d\n", encoding); 737 return E_NOTIMPL; 738 } 739 740 *cp = xml_encoding_map[encoding].cp; 741 742 return S_OK; 743 } 744 745 xml_encoding parse_encoding_name(const WCHAR *name, int len) 746 { 747 int min, max, n, c; 748 749 if (!name) return XmlEncoding_Unknown; 750 751 min = 0; 752 max = ARRAY_SIZE(xml_encoding_map) - 1; 753 754 while (min <= max) 755 { 756 n = (min+max)/2; 757 758 if (len != -1) 759 c = strncmpiW(xml_encoding_map[n].name, name, len); 760 else 761 c = strcmpiW(xml_encoding_map[n].name, name); 762 if (!c) 763 return xml_encoding_map[n].enc; 764 765 if (c > 0) 766 max = n-1; 767 else 768 min = n+1; 769 } 770 771 return XmlEncoding_Unknown; 772 } 773 774 static HRESULT alloc_input_buffer(xmlreaderinput *input) 775 { 776 input_buffer *buffer; 777 HRESULT hr; 778 779 input->buffer = NULL; 780 781 buffer = readerinput_alloc(input, sizeof(*buffer)); 782 if (!buffer) return E_OUTOFMEMORY; 783 784 buffer->input = input; 785 buffer->code_page = ~0; /* code page is unknown at this point */ 786 hr = init_encoded_buffer(input, &buffer->utf16); 787 if (hr != S_OK) { 788 readerinput_free(input, buffer); 789 return hr; 790 } 791 792 hr = init_encoded_buffer(input, &buffer->encoded); 793 if (hr != S_OK) { 794 free_encoded_buffer(input, &buffer->utf16); 795 readerinput_free(input, buffer); 796 return hr; 797 } 798 799 input->buffer = buffer; 800 return S_OK; 801 } 802 803 static void free_input_buffer(input_buffer *buffer) 804 { 805 free_encoded_buffer(buffer->input, &buffer->encoded); 806 free_encoded_buffer(buffer->input, &buffer->utf16); 807 readerinput_free(buffer->input, buffer); 808 } 809 810 static void readerinput_release_stream(xmlreaderinput *readerinput) 811 { 812 if (readerinput->stream) { 813 ISequentialStream_Release(readerinput->stream); 814 readerinput->stream = NULL; 815 } 816 } 817 818 /* Queries already stored interface for IStream/ISequentialStream. 819 Interface supplied on creation will be overwritten */ 820 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput) 821 { 822 HRESULT hr; 823 824 readerinput_release_stream(readerinput); 825 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream); 826 if (hr != S_OK) 827 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream); 828 829 return hr; 830 } 831 832 /* reads a chunk to raw buffer */ 833 static HRESULT readerinput_growraw(xmlreaderinput *readerinput) 834 { 835 encoded_buffer *buffer = &readerinput->buffer->encoded; 836 /* to make sure aligned length won't exceed allocated length */ 837 ULONG len = buffer->allocated - buffer->written - 4; 838 ULONG read; 839 HRESULT hr; 840 841 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is 842 variable width encodings like UTF-8 */ 843 len = (len + 3) & ~3; 844 /* try to use allocated space or grow */ 845 if (buffer->allocated - buffer->written < len) 846 { 847 buffer->allocated *= 2; 848 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated); 849 len = buffer->allocated - buffer->written; 850 } 851 852 read = 0; 853 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read); 854 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr); 855 readerinput->pending = hr == E_PENDING; 856 if (FAILED(hr)) return hr; 857 buffer->written += read; 858 859 return hr; 860 } 861 862 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */ 863 static void readerinput_grow(xmlreaderinput *readerinput, int length) 864 { 865 encoded_buffer *buffer = &readerinput->buffer->utf16; 866 867 length *= sizeof(WCHAR); 868 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */ 869 if (buffer->allocated < buffer->written + length + 4) 870 { 871 int grown_size = max(2*buffer->allocated, buffer->allocated + length); 872 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size); 873 buffer->allocated = grown_size; 874 } 875 } 876 877 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput) 878 { 879 static const char startA[] = {'<','?'}; 880 static const char commentA[] = {'<','!'}; 881 encoded_buffer *buffer = &readerinput->buffer->encoded; 882 unsigned char *ptr = (unsigned char*)buffer->data; 883 884 return !memcmp(buffer->data, startA, sizeof(startA)) || 885 !memcmp(buffer->data, commentA, sizeof(commentA)) || 886 /* test start byte */ 887 (ptr[0] == '<' && 888 ( 889 (ptr[1] && (ptr[1] <= 0x7f)) || 890 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */ 891 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */ 892 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */ 893 ); 894 } 895 896 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc) 897 { 898 encoded_buffer *buffer = &readerinput->buffer->encoded; 899 static const char utf8bom[] = {0xef,0xbb,0xbf}; 900 static const char utf16lebom[] = {0xff,0xfe}; 901 WCHAR *ptrW; 902 903 *enc = XmlEncoding_Unknown; 904 905 if (buffer->written <= 3) 906 { 907 HRESULT hr = readerinput_growraw(readerinput); 908 if (FAILED(hr)) return hr; 909 if (buffer->written < 3) return MX_E_INPUTEND; 910 } 911 912 ptrW = (WCHAR *)buffer->data; 913 /* try start symbols if we have enough data to do that, input buffer should contain 914 first chunk already */ 915 if (readerinput_is_utf8(readerinput)) 916 *enc = XmlEncoding_UTF8; 917 else if (*ptrW == '<') 918 { 919 ptrW++; 920 if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW)) 921 *enc = XmlEncoding_UTF16; 922 } 923 /* try with BOM now */ 924 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom))) 925 { 926 buffer->cur += sizeof(utf8bom); 927 *enc = XmlEncoding_UTF8; 928 } 929 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom))) 930 { 931 buffer->cur += sizeof(utf16lebom); 932 *enc = XmlEncoding_UTF16; 933 } 934 935 return S_OK; 936 } 937 938 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput) 939 { 940 encoded_buffer *buffer = &readerinput->buffer->encoded; 941 int len = buffer->written; 942 943 /* complete single byte char */ 944 if (!(buffer->data[len-1] & 0x80)) return len; 945 946 /* find start byte of multibyte char */ 947 while (--len && !(buffer->data[len] & 0xc0)) 948 ; 949 950 return len; 951 } 952 953 /* Returns byte length of complete char sequence for buffer code page, 954 it's relative to current buffer position which is currently used for BOM handling 955 only. */ 956 static int readerinput_get_convlen(xmlreaderinput *readerinput) 957 { 958 encoded_buffer *buffer = &readerinput->buffer->encoded; 959 int len; 960 961 if (readerinput->buffer->code_page == CP_UTF8) 962 len = readerinput_get_utf8_convlen(readerinput); 963 else 964 len = buffer->written; 965 966 TRACE("%d\n", len - buffer->cur); 967 return len - buffer->cur; 968 } 969 970 /* It's possible that raw buffer has some leftovers from last conversion - some char 971 sequence that doesn't represent a full code point. Length argument should be calculated with 972 readerinput_get_convlen(), if it's -1 it will be calculated here. */ 973 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len) 974 { 975 encoded_buffer *buffer = &readerinput->buffer->encoded; 976 977 if (len == -1) 978 len = readerinput_get_convlen(readerinput); 979 980 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len); 981 /* everything below cur is lost too */ 982 buffer->written -= len + buffer->cur; 983 /* after this point we don't need cur offset really, 984 it's used only to mark where actual data begins when first chunk is read */ 985 buffer->cur = 0; 986 } 987 988 static void fixup_buffer_cr(encoded_buffer *buffer, int off) 989 { 990 BOOL prev_cr = buffer->prev_cr; 991 const WCHAR *src; 992 WCHAR *dest; 993 994 src = dest = (WCHAR*)buffer->data + off; 995 while ((const char*)src < buffer->data + buffer->written) 996 { 997 if (*src == '\r') 998 { 999 *dest++ = '\n'; 1000 src++; 1001 prev_cr = TRUE; 1002 continue; 1003 } 1004 if(prev_cr && *src == '\n') 1005 src++; 1006 else 1007 *dest++ = *src++; 1008 prev_cr = FALSE; 1009 } 1010 1011 buffer->written = (char*)dest - buffer->data; 1012 buffer->prev_cr = prev_cr; 1013 *dest = 0; 1014 } 1015 1016 /* note that raw buffer content is kept */ 1017 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc) 1018 { 1019 encoded_buffer *src = &readerinput->buffer->encoded; 1020 encoded_buffer *dest = &readerinput->buffer->utf16; 1021 int len, dest_len; 1022 HRESULT hr; 1023 WCHAR *ptr; 1024 UINT cp; 1025 1026 hr = get_code_page(enc, &cp); 1027 if (FAILED(hr)) return; 1028 1029 readerinput->buffer->code_page = cp; 1030 len = readerinput_get_convlen(readerinput); 1031 1032 TRACE("switching to cp %d\n", cp); 1033 1034 /* just copy in this case */ 1035 if (enc == XmlEncoding_UTF16) 1036 { 1037 readerinput_grow(readerinput, len); 1038 memcpy(dest->data, src->data + src->cur, len); 1039 dest->written += len*sizeof(WCHAR); 1040 } 1041 else 1042 { 1043 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0); 1044 readerinput_grow(readerinput, dest_len); 1045 ptr = (WCHAR*)dest->data; 1046 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len); 1047 ptr[dest_len] = 0; 1048 dest->written += dest_len*sizeof(WCHAR); 1049 } 1050 1051 fixup_buffer_cr(dest, 0); 1052 } 1053 1054 /* shrinks parsed data a buffer begins with */ 1055 static void reader_shrink(xmlreader *reader) 1056 { 1057 encoded_buffer *buffer = &reader->input->buffer->utf16; 1058 1059 /* avoid to move too often using threshold shrink length */ 1060 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2) 1061 { 1062 buffer->written -= buffer->cur*sizeof(WCHAR); 1063 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written); 1064 buffer->cur = 0; 1065 *(WCHAR*)&buffer->data[buffer->written] = 0; 1066 } 1067 } 1068 1069 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer. 1070 It won't attempt to shrink but will grow destination buffer if needed */ 1071 static HRESULT reader_more(xmlreader *reader) 1072 { 1073 xmlreaderinput *readerinput = reader->input; 1074 encoded_buffer *src = &readerinput->buffer->encoded; 1075 encoded_buffer *dest = &readerinput->buffer->utf16; 1076 UINT cp = readerinput->buffer->code_page; 1077 int len, dest_len, prev_len; 1078 HRESULT hr; 1079 WCHAR *ptr; 1080 1081 /* get some raw data from stream first */ 1082 hr = readerinput_growraw(readerinput); 1083 len = readerinput_get_convlen(readerinput); 1084 prev_len = dest->written / sizeof(WCHAR); 1085 1086 /* just copy for UTF-16 case */ 1087 if (cp == 1200) 1088 { 1089 readerinput_grow(readerinput, len); 1090 memcpy(dest->data + dest->written, src->data + src->cur, len); 1091 dest->written += len*sizeof(WCHAR); 1092 } 1093 else 1094 { 1095 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0); 1096 readerinput_grow(readerinput, dest_len); 1097 ptr = (WCHAR*)(dest->data + dest->written); 1098 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len); 1099 ptr[dest_len] = 0; 1100 dest->written += dest_len*sizeof(WCHAR); 1101 /* get rid of processed data */ 1102 readerinput_shrinkraw(readerinput, len); 1103 } 1104 1105 fixup_buffer_cr(dest, prev_len); 1106 return hr; 1107 } 1108 1109 static inline UINT reader_get_cur(xmlreader *reader) 1110 { 1111 return reader->input->buffer->utf16.cur; 1112 } 1113 1114 static inline WCHAR *reader_get_ptr(xmlreader *reader) 1115 { 1116 encoded_buffer *buffer = &reader->input->buffer->utf16; 1117 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur; 1118 if (!*ptr) reader_more(reader); 1119 return (WCHAR*)buffer->data + buffer->cur; 1120 } 1121 1122 static int reader_cmp(xmlreader *reader, const WCHAR *str) 1123 { 1124 int i=0; 1125 const WCHAR *ptr = reader_get_ptr(reader); 1126 while (str[i]) 1127 { 1128 if (!ptr[i]) 1129 { 1130 reader_more(reader); 1131 ptr = reader_get_ptr(reader); 1132 } 1133 if (str[i] != ptr[i]) 1134 return ptr[i] - str[i]; 1135 i++; 1136 } 1137 return 0; 1138 } 1139 1140 static void reader_update_position(xmlreader *reader, WCHAR ch) 1141 { 1142 if (ch == '\r') 1143 reader->position.line_position = 1; 1144 else if (ch == '\n') 1145 { 1146 reader->position.line_number++; 1147 reader->position.line_position = 1; 1148 } 1149 else 1150 reader->position.line_position++; 1151 } 1152 1153 /* moves cursor n WCHARs forward */ 1154 static void reader_skipn(xmlreader *reader, int n) 1155 { 1156 encoded_buffer *buffer = &reader->input->buffer->utf16; 1157 const WCHAR *ptr; 1158 1159 while (*(ptr = reader_get_ptr(reader)) && n--) 1160 { 1161 reader_update_position(reader, *ptr); 1162 buffer->cur++; 1163 } 1164 } 1165 1166 static inline BOOL is_wchar_space(WCHAR ch) 1167 { 1168 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'; 1169 } 1170 1171 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ 1172 static int reader_skipspaces(xmlreader *reader) 1173 { 1174 const WCHAR *ptr = reader_get_ptr(reader); 1175 UINT start = reader_get_cur(reader); 1176 1177 while (is_wchar_space(*ptr)) 1178 { 1179 reader_skipn(reader, 1); 1180 ptr = reader_get_ptr(reader); 1181 } 1182 1183 return reader_get_cur(reader) - start; 1184 } 1185 1186 /* [26] VersionNum ::= '1.' [0-9]+ */ 1187 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val) 1188 { 1189 static const WCHAR onedotW[] = {'1','.',0}; 1190 WCHAR *ptr, *ptr2; 1191 UINT start; 1192 1193 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL; 1194 1195 start = reader_get_cur(reader); 1196 /* skip "1." */ 1197 reader_skipn(reader, 2); 1198 1199 ptr2 = ptr = reader_get_ptr(reader); 1200 while (*ptr >= '0' && *ptr <= '9') 1201 { 1202 reader_skipn(reader, 1); 1203 ptr = reader_get_ptr(reader); 1204 } 1205 1206 if (ptr2 == ptr) return WC_E_DIGIT; 1207 reader_init_strvalue(start, reader_get_cur(reader)-start, val); 1208 TRACE("version=%s\n", debug_strval(reader, val)); 1209 return S_OK; 1210 } 1211 1212 /* [25] Eq ::= S? '=' S? */ 1213 static HRESULT reader_parse_eq(xmlreader *reader) 1214 { 1215 static const WCHAR eqW[] = {'=',0}; 1216 reader_skipspaces(reader); 1217 if (reader_cmp(reader, eqW)) return WC_E_EQUAL; 1218 /* skip '=' */ 1219 reader_skipn(reader, 1); 1220 reader_skipspaces(reader); 1221 return S_OK; 1222 } 1223 1224 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */ 1225 static HRESULT reader_parse_versioninfo(xmlreader *reader) 1226 { 1227 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0}; 1228 struct reader_position position; 1229 strval val, name; 1230 HRESULT hr; 1231 1232 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE; 1233 1234 position = reader->position; 1235 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL; 1236 reader_init_strvalue(reader_get_cur(reader), 7, &name); 1237 /* skip 'version' */ 1238 reader_skipn(reader, 7); 1239 1240 hr = reader_parse_eq(reader); 1241 if (FAILED(hr)) return hr; 1242 1243 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) 1244 return WC_E_QUOTE; 1245 /* skip "'"|'"' */ 1246 reader_skipn(reader, 1); 1247 1248 hr = reader_parse_versionnum(reader, &val); 1249 if (FAILED(hr)) return hr; 1250 1251 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) 1252 return WC_E_QUOTE; 1253 1254 /* skip "'"|'"' */ 1255 reader_skipn(reader, 1); 1256 1257 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0); 1258 } 1259 1260 /* ([A-Za-z0-9._] | '-') */ 1261 static inline BOOL is_wchar_encname(WCHAR ch) 1262 { 1263 return ((ch >= 'A' && ch <= 'Z') || 1264 (ch >= 'a' && ch <= 'z') || 1265 (ch >= '0' && ch <= '9') || 1266 (ch == '.') || (ch == '_') || 1267 (ch == '-')); 1268 } 1269 1270 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */ 1271 static HRESULT reader_parse_encname(xmlreader *reader, strval *val) 1272 { 1273 WCHAR *start = reader_get_ptr(reader), *ptr; 1274 xml_encoding enc; 1275 int len; 1276 1277 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z')) 1278 return WC_E_ENCNAME; 1279 1280 val->start = reader_get_cur(reader); 1281 1282 ptr = start; 1283 while (is_wchar_encname(*++ptr)) 1284 ; 1285 1286 len = ptr - start; 1287 enc = parse_encoding_name(start, len); 1288 TRACE("encoding name %s\n", debugstr_wn(start, len)); 1289 val->str = start; 1290 val->len = len; 1291 1292 if (enc == XmlEncoding_Unknown) 1293 return WC_E_ENCNAME; 1294 1295 /* skip encoding name */ 1296 reader_skipn(reader, len); 1297 return S_OK; 1298 } 1299 1300 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */ 1301 static HRESULT reader_parse_encdecl(xmlreader *reader) 1302 { 1303 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0}; 1304 struct reader_position position; 1305 strval name, val; 1306 HRESULT hr; 1307 1308 if (!reader_skipspaces(reader)) return S_FALSE; 1309 1310 position = reader->position; 1311 if (reader_cmp(reader, encodingW)) return S_FALSE; 1312 name.str = reader_get_ptr(reader); 1313 name.start = reader_get_cur(reader); 1314 name.len = 8; 1315 /* skip 'encoding' */ 1316 reader_skipn(reader, 8); 1317 1318 hr = reader_parse_eq(reader); 1319 if (FAILED(hr)) return hr; 1320 1321 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) 1322 return WC_E_QUOTE; 1323 /* skip "'"|'"' */ 1324 reader_skipn(reader, 1); 1325 1326 hr = reader_parse_encname(reader, &val); 1327 if (FAILED(hr)) return hr; 1328 1329 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) 1330 return WC_E_QUOTE; 1331 1332 /* skip "'"|'"' */ 1333 reader_skipn(reader, 1); 1334 1335 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0); 1336 } 1337 1338 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */ 1339 static HRESULT reader_parse_sddecl(xmlreader *reader) 1340 { 1341 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0}; 1342 static const WCHAR yesW[] = {'y','e','s',0}; 1343 static const WCHAR noW[] = {'n','o',0}; 1344 struct reader_position position; 1345 strval name, val; 1346 UINT start; 1347 HRESULT hr; 1348 1349 if (!reader_skipspaces(reader)) return S_FALSE; 1350 1351 position = reader->position; 1352 if (reader_cmp(reader, standaloneW)) return S_FALSE; 1353 reader_init_strvalue(reader_get_cur(reader), 10, &name); 1354 /* skip 'standalone' */ 1355 reader_skipn(reader, 10); 1356 1357 hr = reader_parse_eq(reader); 1358 if (FAILED(hr)) return hr; 1359 1360 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) 1361 return WC_E_QUOTE; 1362 /* skip "'"|'"' */ 1363 reader_skipn(reader, 1); 1364 1365 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW)) 1366 return WC_E_XMLDECL; 1367 1368 start = reader_get_cur(reader); 1369 /* skip 'yes'|'no' */ 1370 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3); 1371 reader_init_strvalue(start, reader_get_cur(reader)-start, &val); 1372 TRACE("standalone=%s\n", debug_strval(reader, &val)); 1373 1374 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) 1375 return WC_E_QUOTE; 1376 /* skip "'"|'"' */ 1377 reader_skipn(reader, 1); 1378 1379 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0); 1380 } 1381 1382 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */ 1383 static HRESULT reader_parse_xmldecl(xmlreader *reader) 1384 { 1385 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0}; 1386 static const WCHAR declcloseW[] = {'?','>',0}; 1387 struct reader_position position; 1388 HRESULT hr; 1389 1390 /* check if we have "<?xml " */ 1391 if (reader_cmp(reader, xmldeclW)) 1392 return S_FALSE; 1393 1394 reader_skipn(reader, 2); 1395 position = reader->position; 1396 reader_skipn(reader, 3); 1397 hr = reader_parse_versioninfo(reader); 1398 if (FAILED(hr)) 1399 return hr; 1400 1401 hr = reader_parse_encdecl(reader); 1402 if (FAILED(hr)) 1403 return hr; 1404 1405 hr = reader_parse_sddecl(reader); 1406 if (FAILED(hr)) 1407 return hr; 1408 1409 reader_skipspaces(reader); 1410 if (reader_cmp(reader, declcloseW)) 1411 return WC_E_XMLDECL; 1412 1413 /* skip '?>' */ 1414 reader_skipn(reader, 2); 1415 1416 reader->nodetype = XmlNodeType_XmlDeclaration; 1417 reader->empty_element.position = position; 1418 reader_set_strvalue(reader, StringValue_LocalName, &strval_xml); 1419 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_xml); 1420 1421 return S_OK; 1422 } 1423 1424 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */ 1425 static HRESULT reader_parse_comment(xmlreader *reader) 1426 { 1427 WCHAR *ptr; 1428 UINT start; 1429 1430 if (reader->resumestate == XmlReadResumeState_Comment) 1431 { 1432 start = reader->resume[XmlReadResume_Body]; 1433 ptr = reader_get_ptr(reader); 1434 } 1435 else 1436 { 1437 /* skip '<!--' */ 1438 reader_skipn(reader, 4); 1439 reader_shrink(reader); 1440 ptr = reader_get_ptr(reader); 1441 start = reader_get_cur(reader); 1442 reader->nodetype = XmlNodeType_Comment; 1443 reader->resume[XmlReadResume_Body] = start; 1444 reader->resumestate = XmlReadResumeState_Comment; 1445 reader_set_strvalue(reader, StringValue_Value, NULL); 1446 } 1447 1448 /* will exit when there's no more data, it won't attempt to 1449 read more from stream */ 1450 while (*ptr) 1451 { 1452 if (ptr[0] == '-') 1453 { 1454 if (ptr[1] == '-') 1455 { 1456 if (ptr[2] == '>') 1457 { 1458 strval value; 1459 1460 reader_init_strvalue(start, reader_get_cur(reader)-start, &value); 1461 TRACE("%s\n", debug_strval(reader, &value)); 1462 1463 /* skip rest of markup '->' */ 1464 reader_skipn(reader, 3); 1465 1466 reader_set_strvalue(reader, StringValue_Value, &value); 1467 reader->resume[XmlReadResume_Body] = 0; 1468 reader->resumestate = XmlReadResumeState_Initial; 1469 return S_OK; 1470 } 1471 else 1472 return WC_E_COMMENT; 1473 } 1474 } 1475 1476 reader_skipn(reader, 1); 1477 ptr++; 1478 } 1479 1480 return S_OK; 1481 } 1482 1483 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */ 1484 static inline BOOL is_char(WCHAR ch) 1485 { 1486 return (ch == '\t') || (ch == '\r') || (ch == '\n') || 1487 (ch >= 0x20 && ch <= 0xd7ff) || 1488 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */ 1489 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */ 1490 (ch >= 0xe000 && ch <= 0xfffd); 1491 } 1492 1493 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */ 1494 static inline BOOL is_pubchar(WCHAR ch) 1495 { 1496 return (ch == ' ') || 1497 (ch >= 'a' && ch <= 'z') || 1498 (ch >= 'A' && ch <= 'Z') || 1499 (ch >= '0' && ch <= '9') || 1500 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */ 1501 (ch == '=') || (ch == '?') || 1502 (ch == '@') || (ch == '!') || 1503 (ch >= '#' && ch <= '%') || /* #$% */ 1504 (ch == '_') || (ch == '\r') || (ch == '\n'); 1505 } 1506 1507 static inline BOOL is_namestartchar(WCHAR ch) 1508 { 1509 return (ch == ':') || (ch >= 'A' && ch <= 'Z') || 1510 (ch == '_') || (ch >= 'a' && ch <= 'z') || 1511 (ch >= 0xc0 && ch <= 0xd6) || 1512 (ch >= 0xd8 && ch <= 0xf6) || 1513 (ch >= 0xf8 && ch <= 0x2ff) || 1514 (ch >= 0x370 && ch <= 0x37d) || 1515 (ch >= 0x37f && ch <= 0x1fff) || 1516 (ch >= 0x200c && ch <= 0x200d) || 1517 (ch >= 0x2070 && ch <= 0x218f) || 1518 (ch >= 0x2c00 && ch <= 0x2fef) || 1519 (ch >= 0x3001 && ch <= 0xd7ff) || 1520 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */ 1521 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */ 1522 (ch >= 0xf900 && ch <= 0xfdcf) || 1523 (ch >= 0xfdf0 && ch <= 0xfffd); 1524 } 1525 1526 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */ 1527 static inline BOOL is_ncnamechar(WCHAR ch) 1528 { 1529 return (ch >= 'A' && ch <= 'Z') || 1530 (ch == '_') || (ch >= 'a' && ch <= 'z') || 1531 (ch == '-') || (ch == '.') || 1532 (ch >= '0' && ch <= '9') || 1533 (ch == 0xb7) || 1534 (ch >= 0xc0 && ch <= 0xd6) || 1535 (ch >= 0xd8 && ch <= 0xf6) || 1536 (ch >= 0xf8 && ch <= 0x2ff) || 1537 (ch >= 0x300 && ch <= 0x36f) || 1538 (ch >= 0x370 && ch <= 0x37d) || 1539 (ch >= 0x37f && ch <= 0x1fff) || 1540 (ch >= 0x200c && ch <= 0x200d) || 1541 (ch >= 0x203f && ch <= 0x2040) || 1542 (ch >= 0x2070 && ch <= 0x218f) || 1543 (ch >= 0x2c00 && ch <= 0x2fef) || 1544 (ch >= 0x3001 && ch <= 0xd7ff) || 1545 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */ 1546 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */ 1547 (ch >= 0xf900 && ch <= 0xfdcf) || 1548 (ch >= 0xfdf0 && ch <= 0xfffd); 1549 } 1550 1551 static inline BOOL is_namechar(WCHAR ch) 1552 { 1553 return (ch == ':') || is_ncnamechar(ch); 1554 } 1555 1556 static XmlNodeType reader_get_nodetype(const xmlreader *reader) 1557 { 1558 /* When we're on attribute always return attribute type, container node type is kept. 1559 Note that container is not necessarily an element, and attribute doesn't mean it's 1560 an attribute in XML spec terms. */ 1561 return reader->attr ? XmlNodeType_Attribute : reader->nodetype; 1562 } 1563 1564 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | 1565 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | 1566 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] 1567 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] 1568 [5] Name ::= NameStartChar (NameChar)* */ 1569 static HRESULT reader_parse_name(xmlreader *reader, strval *name) 1570 { 1571 WCHAR *ptr; 1572 UINT start; 1573 1574 if (reader->resume[XmlReadResume_Name]) 1575 { 1576 start = reader->resume[XmlReadResume_Name]; 1577 ptr = reader_get_ptr(reader); 1578 } 1579 else 1580 { 1581 ptr = reader_get_ptr(reader); 1582 start = reader_get_cur(reader); 1583 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER; 1584 } 1585 1586 while (is_namechar(*ptr)) 1587 { 1588 reader_skipn(reader, 1); 1589 ptr = reader_get_ptr(reader); 1590 } 1591 1592 if (is_reader_pending(reader)) 1593 { 1594 reader->resume[XmlReadResume_Name] = start; 1595 return E_PENDING; 1596 } 1597 else 1598 reader->resume[XmlReadResume_Name] = 0; 1599 1600 reader_init_strvalue(start, reader_get_cur(reader)-start, name); 1601 TRACE("name %s:%d\n", debug_strval(reader, name), name->len); 1602 1603 return S_OK; 1604 } 1605 1606 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */ 1607 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target) 1608 { 1609 static const WCHAR xmlW[] = {'x','m','l'}; 1610 static const strval xmlval = { (WCHAR*)xmlW, 3 }; 1611 strval name; 1612 WCHAR *ptr; 1613 HRESULT hr; 1614 UINT i; 1615 1616 hr = reader_parse_name(reader, &name); 1617 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI; 1618 1619 /* now that we got name check for illegal content */ 1620 if (strval_eq(reader, &name, &xmlval)) 1621 return WC_E_LEADINGXML; 1622 1623 /* PITarget can't be a qualified name */ 1624 ptr = reader_get_strptr(reader, &name); 1625 for (i = 0; i < name.len; i++) 1626 if (ptr[i] == ':') 1627 return i ? NC_E_NAMECOLON : WC_E_PI; 1628 1629 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len); 1630 *target = name; 1631 return S_OK; 1632 } 1633 1634 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */ 1635 static HRESULT reader_parse_pi(xmlreader *reader) 1636 { 1637 strval target; 1638 WCHAR *ptr; 1639 UINT start; 1640 HRESULT hr; 1641 1642 switch (reader->resumestate) 1643 { 1644 case XmlReadResumeState_Initial: 1645 /* skip '<?' */ 1646 reader_skipn(reader, 2); 1647 reader_shrink(reader); 1648 reader->resumestate = XmlReadResumeState_PITarget; 1649 case XmlReadResumeState_PITarget: 1650 hr = reader_parse_pitarget(reader, &target); 1651 if (FAILED(hr)) return hr; 1652 reader_set_strvalue(reader, StringValue_LocalName, &target); 1653 reader_set_strvalue(reader, StringValue_QualifiedName, &target); 1654 reader_set_strvalue(reader, StringValue_Value, &strval_empty); 1655 reader->resumestate = XmlReadResumeState_PIBody; 1656 reader->resume[XmlReadResume_Body] = reader_get_cur(reader); 1657 default: 1658 ; 1659 } 1660 1661 start = reader->resume[XmlReadResume_Body]; 1662 ptr = reader_get_ptr(reader); 1663 while (*ptr) 1664 { 1665 if (ptr[0] == '?') 1666 { 1667 if (ptr[1] == '>') 1668 { 1669 UINT cur = reader_get_cur(reader); 1670 strval value; 1671 1672 /* strip all leading whitespace chars */ 1673 while (start < cur) 1674 { 1675 ptr = reader_get_ptr2(reader, start); 1676 if (!is_wchar_space(*ptr)) break; 1677 start++; 1678 } 1679 1680 reader_init_strvalue(start, cur-start, &value); 1681 1682 /* skip '?>' */ 1683 reader_skipn(reader, 2); 1684 TRACE("%s\n", debug_strval(reader, &value)); 1685 reader->nodetype = XmlNodeType_ProcessingInstruction; 1686 reader->resumestate = XmlReadResumeState_Initial; 1687 reader->resume[XmlReadResume_Body] = 0; 1688 reader_set_strvalue(reader, StringValue_Value, &value); 1689 return S_OK; 1690 } 1691 } 1692 1693 reader_skipn(reader, 1); 1694 ptr = reader_get_ptr(reader); 1695 } 1696 1697 return S_OK; 1698 } 1699 1700 /* This one is used to parse significant whitespace nodes, like in Misc production */ 1701 static HRESULT reader_parse_whitespace(xmlreader *reader) 1702 { 1703 switch (reader->resumestate) 1704 { 1705 case XmlReadResumeState_Initial: 1706 reader_shrink(reader); 1707 reader->resumestate = XmlReadResumeState_Whitespace; 1708 reader->resume[XmlReadResume_Body] = reader_get_cur(reader); 1709 reader->nodetype = XmlNodeType_Whitespace; 1710 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty); 1711 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty); 1712 reader_set_strvalue(reader, StringValue_Value, &strval_empty); 1713 /* fallthrough */ 1714 case XmlReadResumeState_Whitespace: 1715 { 1716 strval value; 1717 UINT start; 1718 1719 reader_skipspaces(reader); 1720 if (is_reader_pending(reader)) return S_OK; 1721 1722 start = reader->resume[XmlReadResume_Body]; 1723 reader_init_strvalue(start, reader_get_cur(reader)-start, &value); 1724 reader_set_strvalue(reader, StringValue_Value, &value); 1725 TRACE("%s\n", debug_strval(reader, &value)); 1726 reader->resumestate = XmlReadResumeState_Initial; 1727 } 1728 default: 1729 ; 1730 } 1731 1732 return S_OK; 1733 } 1734 1735 /* [27] Misc ::= Comment | PI | S */ 1736 static HRESULT reader_parse_misc(xmlreader *reader) 1737 { 1738 HRESULT hr = S_FALSE; 1739 1740 if (reader->resumestate != XmlReadResumeState_Initial) 1741 { 1742 hr = reader_more(reader); 1743 if (FAILED(hr)) return hr; 1744 1745 /* finish current node */ 1746 switch (reader->resumestate) 1747 { 1748 case XmlReadResumeState_PITarget: 1749 case XmlReadResumeState_PIBody: 1750 return reader_parse_pi(reader); 1751 case XmlReadResumeState_Comment: 1752 return reader_parse_comment(reader); 1753 case XmlReadResumeState_Whitespace: 1754 return reader_parse_whitespace(reader); 1755 default: 1756 ERR("unknown resume state %d\n", reader->resumestate); 1757 } 1758 } 1759 1760 while (1) 1761 { 1762 const WCHAR *cur = reader_get_ptr(reader); 1763 1764 if (is_wchar_space(*cur)) 1765 hr = reader_parse_whitespace(reader); 1766 else if (!reader_cmp(reader, commentW)) 1767 hr = reader_parse_comment(reader); 1768 else if (!reader_cmp(reader, piW)) 1769 hr = reader_parse_pi(reader); 1770 else 1771 break; 1772 1773 if (hr != S_FALSE) return hr; 1774 } 1775 1776 return hr; 1777 } 1778 1779 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */ 1780 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal) 1781 { 1782 WCHAR *cur = reader_get_ptr(reader), quote; 1783 UINT start; 1784 1785 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE; 1786 1787 quote = *cur; 1788 reader_skipn(reader, 1); 1789 1790 cur = reader_get_ptr(reader); 1791 start = reader_get_cur(reader); 1792 while (is_char(*cur) && *cur != quote) 1793 { 1794 reader_skipn(reader, 1); 1795 cur = reader_get_ptr(reader); 1796 } 1797 reader_init_strvalue(start, reader_get_cur(reader)-start, literal); 1798 if (*cur == quote) reader_skipn(reader, 1); 1799 1800 TRACE("%s\n", debug_strval(reader, literal)); 1801 return S_OK; 1802 } 1803 1804 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 1805 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */ 1806 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal) 1807 { 1808 WCHAR *cur = reader_get_ptr(reader), quote; 1809 UINT start; 1810 1811 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE; 1812 1813 quote = *cur; 1814 reader_skipn(reader, 1); 1815 1816 start = reader_get_cur(reader); 1817 cur = reader_get_ptr(reader); 1818 while (is_pubchar(*cur) && *cur != quote) 1819 { 1820 reader_skipn(reader, 1); 1821 cur = reader_get_ptr(reader); 1822 } 1823 reader_init_strvalue(start, reader_get_cur(reader)-start, literal); 1824 if (*cur == quote) reader_skipn(reader, 1); 1825 1826 TRACE("%s\n", debug_strval(reader, literal)); 1827 return S_OK; 1828 } 1829 1830 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */ 1831 static HRESULT reader_parse_externalid(xmlreader *reader) 1832 { 1833 static WCHAR systemW[] = {'S','Y','S','T','E','M',0}; 1834 static WCHAR publicW[] = {'P','U','B','L','I','C',0}; 1835 struct reader_position position = reader->position; 1836 strval name, sys; 1837 HRESULT hr; 1838 int cnt; 1839 1840 if (!reader_cmp(reader, publicW)) { 1841 strval pub; 1842 1843 /* public id */ 1844 reader_skipn(reader, 6); 1845 cnt = reader_skipspaces(reader); 1846 if (!cnt) return WC_E_WHITESPACE; 1847 1848 hr = reader_parse_pub_literal(reader, &pub); 1849 if (FAILED(hr)) return hr; 1850 1851 reader_init_cstrvalue(publicW, strlenW(publicW), &name); 1852 hr = reader_add_attr(reader, NULL, &name, NULL, &pub, &position, 0); 1853 if (FAILED(hr)) return hr; 1854 1855 cnt = reader_skipspaces(reader); 1856 if (!cnt) return S_OK; 1857 1858 /* optional system id */ 1859 hr = reader_parse_sys_literal(reader, &sys); 1860 if (FAILED(hr)) return S_OK; 1861 1862 reader_init_cstrvalue(systemW, strlenW(systemW), &name); 1863 hr = reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0); 1864 if (FAILED(hr)) return hr; 1865 1866 return S_OK; 1867 } else if (!reader_cmp(reader, systemW)) { 1868 /* system id */ 1869 reader_skipn(reader, 6); 1870 cnt = reader_skipspaces(reader); 1871 if (!cnt) return WC_E_WHITESPACE; 1872 1873 hr = reader_parse_sys_literal(reader, &sys); 1874 if (FAILED(hr)) return hr; 1875 1876 reader_init_cstrvalue(systemW, strlenW(systemW), &name); 1877 return reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0); 1878 } 1879 1880 return S_FALSE; 1881 } 1882 1883 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */ 1884 static HRESULT reader_parse_dtd(xmlreader *reader) 1885 { 1886 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0}; 1887 strval name; 1888 WCHAR *cur; 1889 HRESULT hr; 1890 1891 /* check if we have "<!DOCTYPE" */ 1892 if (reader_cmp(reader, doctypeW)) return S_FALSE; 1893 reader_shrink(reader); 1894 1895 /* DTD processing is not allowed by default */ 1896 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED; 1897 1898 reader_skipn(reader, 9); 1899 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE; 1900 1901 /* name */ 1902 hr = reader_parse_name(reader, &name); 1903 if (FAILED(hr)) return WC_E_DECLDOCTYPE; 1904 1905 reader_skipspaces(reader); 1906 1907 hr = reader_parse_externalid(reader); 1908 if (FAILED(hr)) return hr; 1909 1910 reader_skipspaces(reader); 1911 1912 cur = reader_get_ptr(reader); 1913 if (*cur != '>') 1914 { 1915 FIXME("internal subset parsing not implemented\n"); 1916 return E_NOTIMPL; 1917 } 1918 1919 /* skip '>' */ 1920 reader_skipn(reader, 1); 1921 1922 reader->nodetype = XmlNodeType_DocumentType; 1923 reader_set_strvalue(reader, StringValue_LocalName, &name); 1924 reader_set_strvalue(reader, StringValue_QualifiedName, &name); 1925 1926 return S_OK; 1927 } 1928 1929 /* [11 NS] LocalPart ::= NCName */ 1930 static HRESULT reader_parse_local(xmlreader *reader, strval *local, BOOL check_for_separator) 1931 { 1932 WCHAR *ptr; 1933 UINT start; 1934 1935 if (reader->resume[XmlReadResume_Local]) 1936 { 1937 start = reader->resume[XmlReadResume_Local]; 1938 ptr = reader_get_ptr(reader); 1939 } 1940 else 1941 { 1942 ptr = reader_get_ptr(reader); 1943 start = reader_get_cur(reader); 1944 } 1945 1946 while (is_ncnamechar(*ptr)) 1947 { 1948 reader_skipn(reader, 1); 1949 ptr = reader_get_ptr(reader); 1950 } 1951 1952 if (check_for_separator && *ptr == ':') 1953 return NC_E_QNAMECOLON; 1954 1955 if (is_reader_pending(reader)) 1956 { 1957 reader->resume[XmlReadResume_Local] = start; 1958 return E_PENDING; 1959 } 1960 else 1961 reader->resume[XmlReadResume_Local] = 0; 1962 1963 reader_init_strvalue(start, reader_get_cur(reader)-start, local); 1964 1965 return S_OK; 1966 } 1967 1968 /* [7 NS] QName ::= PrefixedName | UnprefixedName 1969 [8 NS] PrefixedName ::= Prefix ':' LocalPart 1970 [9 NS] UnprefixedName ::= LocalPart 1971 [10 NS] Prefix ::= NCName */ 1972 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname) 1973 { 1974 WCHAR *ptr; 1975 UINT start; 1976 HRESULT hr; 1977 1978 if (reader->resume[XmlReadResume_Name]) 1979 { 1980 start = reader->resume[XmlReadResume_Name]; 1981 ptr = reader_get_ptr(reader); 1982 } 1983 else 1984 { 1985 ptr = reader_get_ptr(reader); 1986 start = reader_get_cur(reader); 1987 reader->resume[XmlReadResume_Name] = start; 1988 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER; 1989 } 1990 1991 if (reader->resume[XmlReadResume_Local]) 1992 { 1993 hr = reader_parse_local(reader, local, FALSE); 1994 if (FAILED(hr)) return hr; 1995 1996 reader_init_strvalue(reader->resume[XmlReadResume_Name], 1997 local->start - reader->resume[XmlReadResume_Name] - 1, 1998 prefix); 1999 } 2000 else 2001 { 2002 /* skip prefix part */ 2003 while (is_ncnamechar(*ptr)) 2004 { 2005 reader_skipn(reader, 1); 2006 ptr = reader_get_ptr(reader); 2007 } 2008 2009 if (is_reader_pending(reader)) return E_PENDING; 2010 2011 /* got a qualified name */ 2012 if (*ptr == ':') 2013 { 2014 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix); 2015 2016 /* skip ':' */ 2017 reader_skipn(reader, 1); 2018 hr = reader_parse_local(reader, local, TRUE); 2019 if (FAILED(hr)) return hr; 2020 } 2021 else 2022 { 2023 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local); 2024 reader_init_strvalue(0, 0, prefix); 2025 } 2026 } 2027 2028 if (prefix->len) 2029 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local)); 2030 else 2031 TRACE("ncname %s\n", debug_strval(reader, local)); 2032 2033 reader_init_strvalue(prefix->len ? prefix->start : local->start, 2034 /* count ':' too */ 2035 (prefix->len ? prefix->len + 1 : 0) + local->len, 2036 qname); 2037 2038 reader->resume[XmlReadResume_Name] = 0; 2039 reader->resume[XmlReadResume_Local] = 0; 2040 2041 return S_OK; 2042 } 2043 2044 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name) 2045 { 2046 static const WCHAR entltW[] = {'l','t'}; 2047 static const WCHAR entgtW[] = {'g','t'}; 2048 static const WCHAR entampW[] = {'a','m','p'}; 2049 static const WCHAR entaposW[] = {'a','p','o','s'}; 2050 static const WCHAR entquotW[] = {'q','u','o','t'}; 2051 static const strval lt = { (WCHAR*)entltW, 2 }; 2052 static const strval gt = { (WCHAR*)entgtW, 2 }; 2053 static const strval amp = { (WCHAR*)entampW, 3 }; 2054 static const strval apos = { (WCHAR*)entaposW, 4 }; 2055 static const strval quot = { (WCHAR*)entquotW, 4 }; 2056 WCHAR *str = reader_get_strptr(reader, name); 2057 2058 switch (*str) 2059 { 2060 case 'l': 2061 if (strval_eq(reader, name, <)) return '<'; 2062 break; 2063 case 'g': 2064 if (strval_eq(reader, name, >)) return '>'; 2065 break; 2066 case 'a': 2067 if (strval_eq(reader, name, &)) 2068 return '&'; 2069 else if (strval_eq(reader, name, &apos)) 2070 return '\''; 2071 break; 2072 case 'q': 2073 if (strval_eq(reader, name, ")) return '\"'; 2074 break; 2075 default: 2076 ; 2077 } 2078 2079 return 0; 2080 } 2081 2082 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 2083 [67] Reference ::= EntityRef | CharRef 2084 [68] EntityRef ::= '&' Name ';' */ 2085 static HRESULT reader_parse_reference(xmlreader *reader) 2086 { 2087 encoded_buffer *buffer = &reader->input->buffer->utf16; 2088 WCHAR *start = reader_get_ptr(reader), *ptr; 2089 UINT cur = reader_get_cur(reader); 2090 WCHAR ch = 0; 2091 int len; 2092 2093 /* skip '&' */ 2094 reader_skipn(reader, 1); 2095 ptr = reader_get_ptr(reader); 2096 2097 if (*ptr == '#') 2098 { 2099 reader_skipn(reader, 1); 2100 ptr = reader_get_ptr(reader); 2101 2102 /* hex char or decimal */ 2103 if (*ptr == 'x') 2104 { 2105 reader_skipn(reader, 1); 2106 ptr = reader_get_ptr(reader); 2107 2108 while (*ptr != ';') 2109 { 2110 if ((*ptr >= '0' && *ptr <= '9')) 2111 ch = ch*16 + *ptr - '0'; 2112 else if ((*ptr >= 'a' && *ptr <= 'f')) 2113 ch = ch*16 + *ptr - 'a' + 10; 2114 else if ((*ptr >= 'A' && *ptr <= 'F')) 2115 ch = ch*16 + *ptr - 'A' + 10; 2116 else 2117 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT; 2118 reader_skipn(reader, 1); 2119 ptr = reader_get_ptr(reader); 2120 } 2121 } 2122 else 2123 { 2124 while (*ptr != ';') 2125 { 2126 if ((*ptr >= '0' && *ptr <= '9')) 2127 { 2128 ch = ch*10 + *ptr - '0'; 2129 reader_skipn(reader, 1); 2130 ptr = reader_get_ptr(reader); 2131 } 2132 else 2133 return ch ? WC_E_SEMICOLON : WC_E_DIGIT; 2134 } 2135 } 2136 2137 if (!is_char(ch)) return WC_E_XMLCHARACTER; 2138 2139 /* normalize */ 2140 if (is_wchar_space(ch)) ch = ' '; 2141 2142 ptr = reader_get_ptr(reader); 2143 start = reader_get_ptr2(reader, cur); 2144 len = buffer->written - ((char *)ptr - buffer->data); 2145 memmove(start + 1, ptr + 1, len); 2146 2147 buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR); 2148 buffer->cur = cur + 1; 2149 2150 *start = ch; 2151 } 2152 else 2153 { 2154 strval name; 2155 HRESULT hr; 2156 2157 hr = reader_parse_name(reader, &name); 2158 if (FAILED(hr)) return hr; 2159 2160 ptr = reader_get_ptr(reader); 2161 if (*ptr != ';') return WC_E_SEMICOLON; 2162 2163 /* predefined entities resolve to a single character */ 2164 ch = get_predefined_entity(reader, &name); 2165 if (ch) 2166 { 2167 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR); 2168 memmove(start+1, ptr+1, len); 2169 buffer->cur = cur + 1; 2170 buffer->written -= (ptr - start) * sizeof(WCHAR); 2171 2172 *start = ch; 2173 } 2174 else 2175 { 2176 FIXME("undeclared entity %s\n", debug_strval(reader, &name)); 2177 return WC_E_UNDECLAREDENTITY; 2178 } 2179 2180 } 2181 2182 return S_OK; 2183 } 2184 2185 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */ 2186 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value) 2187 { 2188 WCHAR *ptr, quote; 2189 UINT start; 2190 2191 ptr = reader_get_ptr(reader); 2192 2193 /* skip opening quote */ 2194 quote = *ptr; 2195 if (quote != '\"' && quote != '\'') return WC_E_QUOTE; 2196 reader_skipn(reader, 1); 2197 2198 ptr = reader_get_ptr(reader); 2199 start = reader_get_cur(reader); 2200 while (*ptr) 2201 { 2202 if (*ptr == '<') return WC_E_LESSTHAN; 2203 2204 if (*ptr == quote) 2205 { 2206 reader_init_strvalue(start, reader_get_cur(reader)-start, value); 2207 /* skip closing quote */ 2208 reader_skipn(reader, 1); 2209 return S_OK; 2210 } 2211 2212 if (*ptr == '&') 2213 { 2214 HRESULT hr = reader_parse_reference(reader); 2215 if (FAILED(hr)) return hr; 2216 } 2217 else 2218 { 2219 /* replace all whitespace chars with ' ' */ 2220 if (is_wchar_space(*ptr)) *ptr = ' '; 2221 reader_skipn(reader, 1); 2222 } 2223 ptr = reader_get_ptr(reader); 2224 } 2225 2226 return WC_E_QUOTE; 2227 } 2228 2229 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName 2230 [2 NS] PrefixedAttName ::= 'xmlns:' NCName 2231 [3 NS] DefaultAttName ::= 'xmlns' 2232 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */ 2233 static HRESULT reader_parse_attribute(xmlreader *reader) 2234 { 2235 struct reader_position position = reader->position; 2236 strval prefix, local, qname, value; 2237 enum attribute_flags flags = 0; 2238 HRESULT hr; 2239 2240 hr = reader_parse_qname(reader, &prefix, &local, &qname); 2241 if (FAILED(hr)) return hr; 2242 2243 if (strval_eq(reader, &prefix, &strval_xmlns)) 2244 flags |= ATTRIBUTE_NS_DEFINITION; 2245 2246 if (strval_eq(reader, &qname, &strval_xmlns)) 2247 flags |= ATTRIBUTE_DEFAULT_NS_DEFINITION; 2248 2249 hr = reader_parse_eq(reader); 2250 if (FAILED(hr)) return hr; 2251 2252 hr = reader_parse_attvalue(reader, &value); 2253 if (FAILED(hr)) return hr; 2254 2255 if (flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION)) 2256 reader_push_ns(reader, &local, &value, !!(flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)); 2257 2258 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value)); 2259 return reader_add_attr(reader, &prefix, &local, &qname, &value, &position, flags); 2260 } 2261 2262 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>' 2263 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */ 2264 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname) 2265 { 2266 struct reader_position position = reader->position; 2267 HRESULT hr; 2268 2269 hr = reader_parse_qname(reader, prefix, local, qname); 2270 if (FAILED(hr)) return hr; 2271 2272 for (;;) 2273 { 2274 static const WCHAR endW[] = {'/','>',0}; 2275 2276 reader_skipspaces(reader); 2277 2278 /* empty element */ 2279 if ((reader->is_empty_element = !reader_cmp(reader, endW))) 2280 { 2281 struct element *element = &reader->empty_element; 2282 2283 /* skip '/>' */ 2284 reader_skipn(reader, 2); 2285 2286 reader_free_strvalued(reader, &element->qname); 2287 reader_free_strvalued(reader, &element->localname); 2288 2289 element->prefix = *prefix; 2290 reader_strvaldup(reader, qname, &element->qname); 2291 reader_strvaldup(reader, local, &element->localname); 2292 element->position = position; 2293 reader_mark_ns_nodes(reader, element); 2294 return S_OK; 2295 } 2296 2297 /* got a start tag */ 2298 if (!reader_cmp(reader, gtW)) 2299 { 2300 /* skip '>' */ 2301 reader_skipn(reader, 1); 2302 return reader_push_element(reader, prefix, local, qname, &position); 2303 } 2304 2305 hr = reader_parse_attribute(reader); 2306 if (FAILED(hr)) return hr; 2307 } 2308 2309 return S_OK; 2310 } 2311 2312 /* [39] element ::= EmptyElemTag | STag content ETag */ 2313 static HRESULT reader_parse_element(xmlreader *reader) 2314 { 2315 HRESULT hr; 2316 2317 switch (reader->resumestate) 2318 { 2319 case XmlReadResumeState_Initial: 2320 /* check if we are really on element */ 2321 if (reader_cmp(reader, ltW)) return S_FALSE; 2322 2323 /* skip '<' */ 2324 reader_skipn(reader, 1); 2325 2326 reader_shrink(reader); 2327 reader->resumestate = XmlReadResumeState_STag; 2328 case XmlReadResumeState_STag: 2329 { 2330 strval qname, prefix, local; 2331 2332 /* this handles empty elements too */ 2333 hr = reader_parse_stag(reader, &prefix, &local, &qname); 2334 if (FAILED(hr)) return hr; 2335 2336 /* FIXME: need to check for defined namespace to reject invalid prefix */ 2337 2338 /* if we got empty element and stack is empty go straight to Misc */ 2339 if (reader->is_empty_element && list_empty(&reader->elements)) 2340 reader->instate = XmlReadInState_MiscEnd; 2341 else 2342 reader->instate = XmlReadInState_Content; 2343 2344 reader->nodetype = XmlNodeType_Element; 2345 reader->resumestate = XmlReadResumeState_Initial; 2346 reader_set_strvalue(reader, StringValue_Prefix, &prefix); 2347 reader_set_strvalue(reader, StringValue_QualifiedName, &qname); 2348 reader_set_strvalue(reader, StringValue_Value, &strval_empty); 2349 break; 2350 } 2351 default: 2352 hr = E_FAIL; 2353 } 2354 2355 return hr; 2356 } 2357 2358 /* [13 NS] ETag ::= '</' QName S? '>' */ 2359 static HRESULT reader_parse_endtag(xmlreader *reader) 2360 { 2361 struct reader_position position; 2362 strval prefix, local, qname; 2363 struct element *element; 2364 HRESULT hr; 2365 2366 /* skip '</' */ 2367 reader_skipn(reader, 2); 2368 2369 position = reader->position; 2370 hr = reader_parse_qname(reader, &prefix, &local, &qname); 2371 if (FAILED(hr)) return hr; 2372 2373 reader_skipspaces(reader); 2374 2375 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN; 2376 2377 /* skip '>' */ 2378 reader_skipn(reader, 1); 2379 2380 /* Element stack should never be empty at this point, cause we shouldn't get to 2381 content parsing if it's empty. */ 2382 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry); 2383 if (!strval_eq(reader, &element->qname, &qname)) return WC_E_ELEMENTMATCH; 2384 2385 /* update position stored for start tag, we won't be using it */ 2386 element->position = position; 2387 2388 reader->nodetype = XmlNodeType_EndElement; 2389 reader->is_empty_element = FALSE; 2390 reader_set_strvalue(reader, StringValue_Prefix, &prefix); 2391 2392 return S_OK; 2393 } 2394 2395 /* [18] CDSect ::= CDStart CData CDEnd 2396 [19] CDStart ::= '<![CDATA[' 2397 [20] CData ::= (Char* - (Char* ']]>' Char*)) 2398 [21] CDEnd ::= ']]>' */ 2399 static HRESULT reader_parse_cdata(xmlreader *reader) 2400 { 2401 WCHAR *ptr; 2402 UINT start; 2403 2404 if (reader->resumestate == XmlReadResumeState_CDATA) 2405 { 2406 start = reader->resume[XmlReadResume_Body]; 2407 ptr = reader_get_ptr(reader); 2408 } 2409 else 2410 { 2411 /* skip markup '<![CDATA[' */ 2412 reader_skipn(reader, 9); 2413 reader_shrink(reader); 2414 ptr = reader_get_ptr(reader); 2415 start = reader_get_cur(reader); 2416 reader->nodetype = XmlNodeType_CDATA; 2417 reader->resume[XmlReadResume_Body] = start; 2418 reader->resumestate = XmlReadResumeState_CDATA; 2419 reader_set_strvalue(reader, StringValue_Value, NULL); 2420 } 2421 2422 while (*ptr) 2423 { 2424 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>') 2425 { 2426 strval value; 2427 2428 reader_init_strvalue(start, reader_get_cur(reader)-start, &value); 2429 2430 /* skip ']]>' */ 2431 reader_skipn(reader, 3); 2432 TRACE("%s\n", debug_strval(reader, &value)); 2433 2434 reader_set_strvalue(reader, StringValue_Value, &value); 2435 reader->resume[XmlReadResume_Body] = 0; 2436 reader->resumestate = XmlReadResumeState_Initial; 2437 return S_OK; 2438 } 2439 else 2440 { 2441 reader_skipn(reader, 1); 2442 ptr++; 2443 } 2444 } 2445 2446 return S_OK; 2447 } 2448 2449 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */ 2450 static HRESULT reader_parse_chardata(xmlreader *reader) 2451 { 2452 struct reader_position position; 2453 WCHAR *ptr; 2454 UINT start; 2455 2456 if (reader->resumestate == XmlReadResumeState_CharData) 2457 { 2458 start = reader->resume[XmlReadResume_Body]; 2459 ptr = reader_get_ptr(reader); 2460 } 2461 else 2462 { 2463 reader_shrink(reader); 2464 ptr = reader_get_ptr(reader); 2465 start = reader_get_cur(reader); 2466 /* There's no text */ 2467 if (!*ptr || *ptr == '<') return S_OK; 2468 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text; 2469 reader->resume[XmlReadResume_Body] = start; 2470 reader->resumestate = XmlReadResumeState_CharData; 2471 reader_set_strvalue(reader, StringValue_Value, NULL); 2472 } 2473 2474 position = reader->position; 2475 while (*ptr) 2476 { 2477 static const WCHAR ampW[] = {'&',0}; 2478 2479 /* CDATA closing sequence ']]>' is not allowed */ 2480 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>') 2481 return WC_E_CDSECTEND; 2482 2483 /* Found next markup part */ 2484 if (ptr[0] == '<') 2485 { 2486 strval value; 2487 2488 reader->empty_element.position = position; 2489 reader_init_strvalue(start, reader_get_cur(reader)-start, &value); 2490 reader_set_strvalue(reader, StringValue_Value, &value); 2491 reader->resume[XmlReadResume_Body] = 0; 2492 reader->resumestate = XmlReadResumeState_Initial; 2493 return S_OK; 2494 } 2495 2496 /* this covers a case when text has leading whitespace chars */ 2497 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text; 2498 2499 if (!reader_cmp(reader, ampW)) 2500 reader_parse_reference(reader); 2501 else 2502 reader_skipn(reader, 1); 2503 2504 ptr = reader_get_ptr(reader); 2505 } 2506 2507 return S_OK; 2508 } 2509 2510 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */ 2511 static HRESULT reader_parse_content(xmlreader *reader) 2512 { 2513 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0}; 2514 static const WCHAR etagW[] = {'<','/',0}; 2515 2516 if (reader->resumestate != XmlReadResumeState_Initial) 2517 { 2518 switch (reader->resumestate) 2519 { 2520 case XmlReadResumeState_CDATA: 2521 return reader_parse_cdata(reader); 2522 case XmlReadResumeState_Comment: 2523 return reader_parse_comment(reader); 2524 case XmlReadResumeState_PIBody: 2525 case XmlReadResumeState_PITarget: 2526 return reader_parse_pi(reader); 2527 case XmlReadResumeState_CharData: 2528 return reader_parse_chardata(reader); 2529 default: 2530 ERR("unknown resume state %d\n", reader->resumestate); 2531 } 2532 } 2533 2534 reader_shrink(reader); 2535 2536 /* handle end tag here, it indicates end of content as well */ 2537 if (!reader_cmp(reader, etagW)) 2538 return reader_parse_endtag(reader); 2539 2540 if (!reader_cmp(reader, commentW)) 2541 return reader_parse_comment(reader); 2542 2543 if (!reader_cmp(reader, piW)) 2544 return reader_parse_pi(reader); 2545 2546 if (!reader_cmp(reader, cdstartW)) 2547 return reader_parse_cdata(reader); 2548 2549 if (!reader_cmp(reader, ltW)) 2550 return reader_parse_element(reader); 2551 2552 /* what's left must be CharData */ 2553 return reader_parse_chardata(reader); 2554 } 2555 2556 static HRESULT reader_parse_nextnode(xmlreader *reader) 2557 { 2558 XmlNodeType nodetype = reader_get_nodetype(reader); 2559 HRESULT hr; 2560 2561 if (!is_reader_pending(reader)) 2562 { 2563 reader->chunk_read_off = 0; 2564 reader_clear_attrs(reader); 2565 } 2566 2567 /* When moving from EndElement or empty element, pop its own namespace definitions */ 2568 switch (nodetype) 2569 { 2570 case XmlNodeType_Attribute: 2571 reader_dec_depth(reader); 2572 /* fallthrough */ 2573 case XmlNodeType_Element: 2574 if (reader->is_empty_element) 2575 reader_pop_ns_nodes(reader, &reader->empty_element); 2576 else if (FAILED(hr = reader_inc_depth(reader))) 2577 return hr; 2578 break; 2579 case XmlNodeType_EndElement: 2580 reader_pop_element(reader); 2581 reader_dec_depth(reader); 2582 break; 2583 default: 2584 ; 2585 } 2586 2587 for (;;) 2588 { 2589 switch (reader->instate) 2590 { 2591 /* if it's a first call for a new input we need to detect stream encoding */ 2592 case XmlReadInState_Initial: 2593 { 2594 xml_encoding enc; 2595 2596 hr = readerinput_growraw(reader->input); 2597 if (FAILED(hr)) return hr; 2598 2599 reader->position.line_number = 1; 2600 reader->position.line_position = 1; 2601 2602 /* try to detect encoding by BOM or data and set input code page */ 2603 hr = readerinput_detectencoding(reader->input, &enc); 2604 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" : 2605 debugstr_w(xml_encoding_map[enc].name), hr); 2606 if (FAILED(hr)) return hr; 2607 2608 /* always switch first time cause we have to put something in */ 2609 readerinput_switchencoding(reader->input, enc); 2610 2611 /* parse xml declaration */ 2612 hr = reader_parse_xmldecl(reader); 2613 if (FAILED(hr)) return hr; 2614 2615 readerinput_shrinkraw(reader->input, -1); 2616 reader->instate = XmlReadInState_Misc_DTD; 2617 if (hr == S_OK) return hr; 2618 } 2619 break; 2620 case XmlReadInState_Misc_DTD: 2621 hr = reader_parse_misc(reader); 2622 if (FAILED(hr)) return hr; 2623 2624 if (hr == S_FALSE) 2625 reader->instate = XmlReadInState_DTD; 2626 else 2627 return hr; 2628 break; 2629 case XmlReadInState_DTD: 2630 hr = reader_parse_dtd(reader); 2631 if (FAILED(hr)) return hr; 2632 2633 if (hr == S_OK) 2634 { 2635 reader->instate = XmlReadInState_DTD_Misc; 2636 return hr; 2637 } 2638 else 2639 reader->instate = XmlReadInState_Element; 2640 break; 2641 case XmlReadInState_DTD_Misc: 2642 hr = reader_parse_misc(reader); 2643 if (FAILED(hr)) return hr; 2644 2645 if (hr == S_FALSE) 2646 reader->instate = XmlReadInState_Element; 2647 else 2648 return hr; 2649 break; 2650 case XmlReadInState_Element: 2651 return reader_parse_element(reader); 2652 case XmlReadInState_Content: 2653 return reader_parse_content(reader); 2654 case XmlReadInState_MiscEnd: 2655 hr = reader_parse_misc(reader); 2656 if (hr != S_FALSE) return hr; 2657 2658 if (*reader_get_ptr(reader)) 2659 { 2660 WARN("found garbage in the end of XML\n"); 2661 return WC_E_SYNTAX; 2662 } 2663 2664 reader->instate = XmlReadInState_Eof; 2665 reader->state = XmlReadState_EndOfFile; 2666 reader->nodetype = XmlNodeType_None; 2667 return hr; 2668 case XmlReadInState_Eof: 2669 return S_FALSE; 2670 default: 2671 FIXME("internal state %d not handled\n", reader->instate); 2672 return E_NOTIMPL; 2673 } 2674 } 2675 2676 return E_NOTIMPL; 2677 } 2678 2679 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject) 2680 { 2681 xmlreader *This = impl_from_IXmlReader(iface); 2682 2683 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject); 2684 2685 if (IsEqualGUID(riid, &IID_IUnknown) || 2686 IsEqualGUID(riid, &IID_IXmlReader)) 2687 { 2688 *ppvObject = iface; 2689 } 2690 else 2691 { 2692 FIXME("interface %s not implemented\n", debugstr_guid(riid)); 2693 *ppvObject = NULL; 2694 return E_NOINTERFACE; 2695 } 2696 2697 IXmlReader_AddRef(iface); 2698 2699 return S_OK; 2700 } 2701 2702 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface) 2703 { 2704 xmlreader *This = impl_from_IXmlReader(iface); 2705 ULONG ref = InterlockedIncrement(&This->ref); 2706 TRACE("(%p)->(%d)\n", This, ref); 2707 return ref; 2708 } 2709 2710 static void reader_clear_ns(xmlreader *reader) 2711 { 2712 struct ns *ns, *ns2; 2713 2714 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) { 2715 list_remove(&ns->entry); 2716 reader_free_strvalued(reader, &ns->prefix); 2717 reader_free_strvalued(reader, &ns->uri); 2718 reader_free(reader, ns); 2719 } 2720 2721 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) { 2722 list_remove(&ns->entry); 2723 reader_free_strvalued(reader, &ns->uri); 2724 reader_free(reader, ns); 2725 } 2726 } 2727 2728 static void reader_reset_parser(xmlreader *reader) 2729 { 2730 reader->position.line_number = 0; 2731 reader->position.line_position = 0; 2732 2733 reader_clear_elements(reader); 2734 reader_clear_attrs(reader); 2735 reader_clear_ns(reader); 2736 reader_free_strvalues(reader); 2737 2738 reader->depth = 0; 2739 reader->nodetype = XmlNodeType_None; 2740 reader->resumestate = XmlReadResumeState_Initial; 2741 memset(reader->resume, 0, sizeof(reader->resume)); 2742 reader->is_empty_element = FALSE; 2743 } 2744 2745 static ULONG WINAPI xmlreader_Release(IXmlReader *iface) 2746 { 2747 xmlreader *This = impl_from_IXmlReader(iface); 2748 LONG ref = InterlockedDecrement(&This->ref); 2749 2750 TRACE("(%p)->(%d)\n", This, ref); 2751 2752 if (ref == 0) 2753 { 2754 IMalloc *imalloc = This->imalloc; 2755 reader_reset_parser(This); 2756 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface); 2757 if (This->resolver) IXmlResolver_Release(This->resolver); 2758 if (This->mlang) IUnknown_Release(This->mlang); 2759 reader_free(This, This); 2760 if (imalloc) IMalloc_Release(imalloc); 2761 } 2762 2763 return ref; 2764 } 2765 2766 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input) 2767 { 2768 xmlreader *This = impl_from_IXmlReader(iface); 2769 IXmlReaderInput *readerinput; 2770 HRESULT hr; 2771 2772 TRACE("(%p)->(%p)\n", This, input); 2773 2774 if (This->input) 2775 { 2776 readerinput_release_stream(This->input); 2777 IUnknown_Release(&This->input->IXmlReaderInput_iface); 2778 This->input = NULL; 2779 } 2780 2781 reader_reset_parser(This); 2782 2783 /* just reset current input */ 2784 if (!input) 2785 { 2786 This->state = XmlReadState_Initial; 2787 return S_OK; 2788 } 2789 2790 /* now try IXmlReaderInput, ISequentialStream, IStream */ 2791 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput); 2792 if (hr == S_OK) 2793 { 2794 if (readerinput->lpVtbl == &xmlreaderinputvtbl) 2795 This->input = impl_from_IXmlReaderInput(readerinput); 2796 else 2797 { 2798 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n", 2799 readerinput, readerinput->lpVtbl); 2800 IUnknown_Release(readerinput); 2801 return E_FAIL; 2802 2803 } 2804 } 2805 2806 if (hr != S_OK || !readerinput) 2807 { 2808 /* create IXmlReaderInput basing on supplied interface */ 2809 hr = CreateXmlReaderInputWithEncodingName(input, 2810 This->imalloc, NULL, FALSE, NULL, &readerinput); 2811 if (hr != S_OK) return hr; 2812 This->input = impl_from_IXmlReaderInput(readerinput); 2813 } 2814 2815 /* set stream for supplied IXmlReaderInput */ 2816 hr = readerinput_query_for_stream(This->input); 2817 if (hr == S_OK) 2818 { 2819 This->state = XmlReadState_Initial; 2820 This->instate = XmlReadInState_Initial; 2821 } 2822 return hr; 2823 } 2824 2825 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value) 2826 { 2827 xmlreader *This = impl_from_IXmlReader(iface); 2828 2829 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value); 2830 2831 if (!value) return E_INVALIDARG; 2832 2833 switch (property) 2834 { 2835 case XmlReaderProperty_MultiLanguage: 2836 *value = (LONG_PTR)This->mlang; 2837 if (This->mlang) 2838 IUnknown_AddRef(This->mlang); 2839 break; 2840 case XmlReaderProperty_XmlResolver: 2841 *value = (LONG_PTR)This->resolver; 2842 if (This->resolver) 2843 IXmlResolver_AddRef(This->resolver); 2844 break; 2845 case XmlReaderProperty_DtdProcessing: 2846 *value = This->dtdmode; 2847 break; 2848 case XmlReaderProperty_ReadState: 2849 *value = This->state; 2850 break; 2851 case XmlReaderProperty_MaxElementDepth: 2852 *value = This->max_depth; 2853 break; 2854 default: 2855 FIXME("Unimplemented property (%u)\n", property); 2856 return E_NOTIMPL; 2857 } 2858 2859 return S_OK; 2860 } 2861 2862 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value) 2863 { 2864 xmlreader *This = impl_from_IXmlReader(iface); 2865 2866 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value); 2867 2868 switch (property) 2869 { 2870 case XmlReaderProperty_MultiLanguage: 2871 if (This->mlang) 2872 IUnknown_Release(This->mlang); 2873 This->mlang = (IUnknown*)value; 2874 if (This->mlang) 2875 IUnknown_AddRef(This->mlang); 2876 if (This->mlang) 2877 FIXME("Ignoring MultiLanguage %p\n", This->mlang); 2878 break; 2879 case XmlReaderProperty_XmlResolver: 2880 if (This->resolver) 2881 IXmlResolver_Release(This->resolver); 2882 This->resolver = (IXmlResolver*)value; 2883 if (This->resolver) 2884 IXmlResolver_AddRef(This->resolver); 2885 break; 2886 case XmlReaderProperty_DtdProcessing: 2887 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG; 2888 This->dtdmode = value; 2889 break; 2890 case XmlReaderProperty_MaxElementDepth: 2891 This->max_depth = value; 2892 break; 2893 default: 2894 FIXME("Unimplemented property (%u)\n", property); 2895 return E_NOTIMPL; 2896 } 2897 2898 return S_OK; 2899 } 2900 2901 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype) 2902 { 2903 xmlreader *This = impl_from_IXmlReader(iface); 2904 XmlNodeType oldtype = This->nodetype; 2905 XmlNodeType type; 2906 HRESULT hr; 2907 2908 TRACE("(%p)->(%p)\n", This, nodetype); 2909 2910 if (!nodetype) 2911 nodetype = &type; 2912 2913 switch (This->state) 2914 { 2915 case XmlReadState_Closed: 2916 hr = S_FALSE; 2917 break; 2918 case XmlReadState_Error: 2919 hr = This->error; 2920 break; 2921 default: 2922 hr = reader_parse_nextnode(This); 2923 if (SUCCEEDED(hr) && oldtype == XmlNodeType_None && This->nodetype != oldtype) 2924 This->state = XmlReadState_Interactive; 2925 2926 if (FAILED(hr)) 2927 { 2928 This->state = XmlReadState_Error; 2929 This->nodetype = XmlNodeType_None; 2930 This->depth = 0; 2931 This->error = hr; 2932 } 2933 } 2934 2935 TRACE("node type %s\n", debugstr_nodetype(This->nodetype)); 2936 *nodetype = This->nodetype; 2937 2938 return hr; 2939 } 2940 2941 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type) 2942 { 2943 xmlreader *This = impl_from_IXmlReader(iface); 2944 2945 TRACE("(%p)->(%p)\n", This, node_type); 2946 2947 if (!node_type) 2948 return E_INVALIDARG; 2949 2950 *node_type = reader_get_nodetype(This); 2951 return This->state == XmlReadState_Closed ? S_FALSE : S_OK; 2952 } 2953 2954 static void reader_set_current_attribute(xmlreader *reader, struct attribute *attr) 2955 { 2956 reader->attr = attr; 2957 reader->chunk_read_off = 0; 2958 reader_set_strvalue(reader, StringValue_Prefix, &attr->prefix); 2959 reader_set_strvalue(reader, StringValue_QualifiedName, &attr->qname); 2960 reader_set_strvalue(reader, StringValue_Value, &attr->value); 2961 } 2962 2963 static HRESULT reader_move_to_first_attribute(xmlreader *reader) 2964 { 2965 if (!reader->attr_count) 2966 return S_FALSE; 2967 2968 if (!reader->attr) 2969 reader_inc_depth(reader); 2970 2971 reader_set_current_attribute(reader, LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry)); 2972 2973 return S_OK; 2974 } 2975 2976 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface) 2977 { 2978 xmlreader *This = impl_from_IXmlReader(iface); 2979 2980 TRACE("(%p)\n", This); 2981 2982 return reader_move_to_first_attribute(This); 2983 } 2984 2985 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface) 2986 { 2987 xmlreader *This = impl_from_IXmlReader(iface); 2988 const struct list *next; 2989 2990 TRACE("(%p)\n", This); 2991 2992 if (!This->attr_count) return S_FALSE; 2993 2994 if (!This->attr) 2995 return reader_move_to_first_attribute(This); 2996 2997 next = list_next(&This->attrs, &This->attr->entry); 2998 if (next) 2999 reader_set_current_attribute(This, LIST_ENTRY(next, struct attribute, entry)); 3000 3001 return next ? S_OK : S_FALSE; 3002 } 3003 3004 static void reader_get_attribute_ns_uri(xmlreader *reader, struct attribute *attr, const WCHAR **uri, UINT *len) 3005 { 3006 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/', 3007 '2','0','0','0','/','x','m','l','n','s','/',0}; 3008 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/', 3009 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0}; 3010 3011 /* Check for reserved prefixes first */ 3012 if ((strval_eq(reader, &attr->prefix, &strval_empty) && strval_eq(reader, &attr->localname, &strval_xmlns)) || 3013 strval_eq(reader, &attr->prefix, &strval_xmlns)) 3014 { 3015 *uri = xmlns_uriW; 3016 *len = ARRAY_SIZE(xmlns_uriW) - 1; 3017 } 3018 else if (strval_eq(reader, &attr->prefix, &strval_xml)) 3019 { 3020 *uri = xml_uriW; 3021 *len = ARRAY_SIZE(xml_uriW) - 1; 3022 } 3023 else 3024 { 3025 *uri = NULL; 3026 *len = 0; 3027 } 3028 3029 if (!*uri) 3030 { 3031 struct ns *ns; 3032 3033 if ((ns = reader_lookup_ns(reader, &attr->prefix))) 3034 { 3035 *uri = ns->uri.str; 3036 *len = ns->uri.len; 3037 } 3038 else 3039 { 3040 *uri = emptyW; 3041 *len = 0; 3042 } 3043 } 3044 } 3045 3046 static void reader_get_attribute_local_name(xmlreader *reader, struct attribute *attr, const WCHAR **name, UINT *len) 3047 { 3048 if (attr->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION) 3049 { 3050 *name = xmlnsW; 3051 *len = 5; 3052 } 3053 else if (attr->flags & ATTRIBUTE_NS_DEFINITION) 3054 { 3055 const struct ns *ns = reader_lookup_ns(reader, &attr->localname); 3056 *name = ns->prefix.str; 3057 *len = ns->prefix.len; 3058 } 3059 else 3060 { 3061 *name = attr->localname.str; 3062 *len = attr->localname.len; 3063 } 3064 } 3065 3066 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface, 3067 const WCHAR *local_name, const WCHAR *namespace_uri) 3068 { 3069 xmlreader *This = impl_from_IXmlReader(iface); 3070 UINT target_name_len, target_uri_len; 3071 struct attribute *attr; 3072 3073 TRACE("(%p)->(%s %s)\n", This, debugstr_w(local_name), debugstr_w(namespace_uri)); 3074 3075 if (!local_name) 3076 return E_INVALIDARG; 3077 3078 if (!This->attr_count) 3079 return S_FALSE; 3080 3081 if (!namespace_uri) 3082 namespace_uri = emptyW; 3083 3084 target_name_len = strlenW(local_name); 3085 target_uri_len = strlenW(namespace_uri); 3086 3087 LIST_FOR_EACH_ENTRY(attr, &This->attrs, struct attribute, entry) 3088 { 3089 UINT name_len, uri_len; 3090 const WCHAR *name, *uri; 3091 3092 reader_get_attribute_local_name(This, attr, &name, &name_len); 3093 reader_get_attribute_ns_uri(This, attr, &uri, &uri_len); 3094 3095 if (name_len == target_name_len && uri_len == target_uri_len && 3096 !strcmpW(name, local_name) && !strcmpW(uri, namespace_uri)) 3097 { 3098 reader_set_current_attribute(This, attr); 3099 return S_OK; 3100 } 3101 } 3102 3103 return S_FALSE; 3104 } 3105 3106 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface) 3107 { 3108 xmlreader *This = impl_from_IXmlReader(iface); 3109 3110 TRACE("(%p)\n", This); 3111 3112 if (!This->attr_count) return S_FALSE; 3113 3114 if (This->attr) 3115 reader_dec_depth(This); 3116 3117 This->attr = NULL; 3118 3119 /* FIXME: support other node types with 'attributes' like DTD */ 3120 if (This->is_empty_element) { 3121 reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix); 3122 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname); 3123 } 3124 else { 3125 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry); 3126 if (element) { 3127 reader_set_strvalue(This, StringValue_Prefix, &element->prefix); 3128 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname); 3129 } 3130 } 3131 This->chunk_read_off = 0; 3132 reader_set_strvalue(This, StringValue_Value, &strval_empty); 3133 3134 return S_OK; 3135 } 3136 3137 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len) 3138 { 3139 xmlreader *This = impl_from_IXmlReader(iface); 3140 struct attribute *attribute = This->attr; 3141 struct element *element; 3142 UINT length; 3143 3144 TRACE("(%p)->(%p %p)\n", This, name, len); 3145 3146 if (!len) 3147 len = &length; 3148 3149 switch (reader_get_nodetype(This)) 3150 { 3151 case XmlNodeType_Text: 3152 case XmlNodeType_CDATA: 3153 case XmlNodeType_Comment: 3154 case XmlNodeType_Whitespace: 3155 *name = emptyW; 3156 *len = 0; 3157 break; 3158 case XmlNodeType_Element: 3159 case XmlNodeType_EndElement: 3160 element = reader_get_element(This); 3161 if (element->prefix.len) 3162 { 3163 *name = element->qname.str; 3164 *len = element->qname.len; 3165 } 3166 else 3167 { 3168 *name = element->localname.str; 3169 *len = element->localname.len; 3170 } 3171 break; 3172 case XmlNodeType_Attribute: 3173 if (attribute->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION) 3174 { 3175 *name = xmlnsW; 3176 *len = 5; 3177 } else if (attribute->prefix.len) 3178 { 3179 *name = This->strvalues[StringValue_QualifiedName].str; 3180 *len = This->strvalues[StringValue_QualifiedName].len; 3181 } 3182 else 3183 { 3184 *name = attribute->localname.str; 3185 *len = attribute->localname.len; 3186 } 3187 break; 3188 default: 3189 *name = This->strvalues[StringValue_QualifiedName].str; 3190 *len = This->strvalues[StringValue_QualifiedName].len; 3191 break; 3192 } 3193 3194 return S_OK; 3195 } 3196 3197 static struct ns *reader_lookup_nsdef(xmlreader *reader) 3198 { 3199 if (list_empty(&reader->nsdef)) 3200 return NULL; 3201 3202 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry); 3203 } 3204 3205 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len) 3206 { 3207 xmlreader *This = impl_from_IXmlReader(iface); 3208 const strval *prefix = &This->strvalues[StringValue_Prefix]; 3209 XmlNodeType nodetype; 3210 struct ns *ns; 3211 UINT length; 3212 3213 TRACE("(%p %p %p)\n", iface, uri, len); 3214 3215 if (!len) 3216 len = &length; 3217 3218 switch ((nodetype = reader_get_nodetype(This))) 3219 { 3220 case XmlNodeType_Attribute: 3221 reader_get_attribute_ns_uri(This, This->attr, uri, len); 3222 break; 3223 case XmlNodeType_Element: 3224 case XmlNodeType_EndElement: 3225 { 3226 ns = reader_lookup_ns(This, prefix); 3227 3228 /* pick top default ns if any */ 3229 if (!ns) 3230 ns = reader_lookup_nsdef(This); 3231 3232 if (ns) { 3233 *uri = ns->uri.str; 3234 *len = ns->uri.len; 3235 } 3236 else { 3237 *uri = emptyW; 3238 *len = 0; 3239 } 3240 } 3241 break; 3242 case XmlNodeType_Text: 3243 case XmlNodeType_CDATA: 3244 case XmlNodeType_ProcessingInstruction: 3245 case XmlNodeType_Comment: 3246 case XmlNodeType_Whitespace: 3247 case XmlNodeType_XmlDeclaration: 3248 *uri = emptyW; 3249 *len = 0; 3250 break; 3251 default: 3252 FIXME("Unhandled node type %d\n", nodetype); 3253 *uri = NULL; 3254 *len = 0; 3255 return E_NOTIMPL; 3256 } 3257 3258 return S_OK; 3259 } 3260 3261 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len) 3262 { 3263 xmlreader *This = impl_from_IXmlReader(iface); 3264 struct element *element; 3265 UINT length; 3266 3267 TRACE("(%p)->(%p %p)\n", This, name, len); 3268 3269 if (!len) 3270 len = &length; 3271 3272 switch (reader_get_nodetype(This)) 3273 { 3274 case XmlNodeType_Text: 3275 case XmlNodeType_CDATA: 3276 case XmlNodeType_Comment: 3277 case XmlNodeType_Whitespace: 3278 *name = emptyW; 3279 *len = 0; 3280 break; 3281 case XmlNodeType_Element: 3282 case XmlNodeType_EndElement: 3283 element = reader_get_element(This); 3284 *name = element->localname.str; 3285 *len = element->localname.len; 3286 break; 3287 case XmlNodeType_Attribute: 3288 reader_get_attribute_local_name(This, This->attr, name, len); 3289 break; 3290 default: 3291 *name = This->strvalues[StringValue_LocalName].str; 3292 *len = This->strvalues[StringValue_LocalName].len; 3293 break; 3294 } 3295 3296 return S_OK; 3297 } 3298 3299 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, const WCHAR **ret, UINT *len) 3300 { 3301 xmlreader *This = impl_from_IXmlReader(iface); 3302 XmlNodeType nodetype; 3303 UINT length; 3304 3305 TRACE("(%p)->(%p %p)\n", This, ret, len); 3306 3307 if (!len) 3308 len = &length; 3309 3310 *ret = emptyW; 3311 *len = 0; 3312 3313 switch ((nodetype = reader_get_nodetype(This))) 3314 { 3315 case XmlNodeType_Element: 3316 case XmlNodeType_EndElement: 3317 case XmlNodeType_Attribute: 3318 { 3319 const strval *prefix = &This->strvalues[StringValue_Prefix]; 3320 struct ns *ns; 3321 3322 if (strval_eq(This, prefix, &strval_xml)) 3323 { 3324 *ret = xmlW; 3325 *len = 3; 3326 } 3327 else if (strval_eq(This, prefix, &strval_xmlns)) 3328 { 3329 *ret = xmlnsW; 3330 *len = 5; 3331 } 3332 else if ((ns = reader_lookup_ns(This, prefix))) 3333 { 3334 *ret = ns->prefix.str; 3335 *len = ns->prefix.len; 3336 } 3337 3338 break; 3339 } 3340 default: 3341 ; 3342 } 3343 3344 return S_OK; 3345 } 3346 3347 static const strval *reader_get_value(xmlreader *reader, BOOL ensure_allocated) 3348 { 3349 strval *val; 3350 3351 switch (reader_get_nodetype(reader)) 3352 { 3353 case XmlNodeType_XmlDeclaration: 3354 case XmlNodeType_EndElement: 3355 case XmlNodeType_None: 3356 return &strval_empty; 3357 case XmlNodeType_Attribute: 3358 /* For namespace definition attributes return values from namespace list */ 3359 if (reader->attr->flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION)) 3360 { 3361 struct ns *ns; 3362 3363 if (!(ns = reader_lookup_ns(reader, &reader->attr->localname))) 3364 ns = reader_lookup_nsdef(reader); 3365 3366 return &ns->uri; 3367 } 3368 return &reader->attr->value; 3369 default: 3370 break; 3371 } 3372 3373 val = &reader->strvalues[StringValue_Value]; 3374 if (!val->str && ensure_allocated) 3375 { 3376 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR)); 3377 if (!ptr) return NULL; 3378 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR)); 3379 ptr[val->len] = 0; 3380 val->str = ptr; 3381 } 3382 3383 return val; 3384 } 3385 3386 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len) 3387 { 3388 xmlreader *reader = impl_from_IXmlReader(iface); 3389 const strval *val = &reader->strvalues[StringValue_Value]; 3390 UINT off; 3391 3392 TRACE("(%p)->(%p %p)\n", reader, value, len); 3393 3394 *value = NULL; 3395 3396 if ((reader->nodetype == XmlNodeType_Comment && !val->str && !val->len) || is_reader_pending(reader)) 3397 { 3398 XmlNodeType type; 3399 HRESULT hr; 3400 3401 hr = IXmlReader_Read(iface, &type); 3402 if (FAILED(hr)) return hr; 3403 3404 /* return if still pending, partially read values are not reported */ 3405 if (is_reader_pending(reader)) return E_PENDING; 3406 } 3407 3408 val = reader_get_value(reader, TRUE); 3409 if (!val) 3410 return E_OUTOFMEMORY; 3411 3412 off = abs(reader->chunk_read_off); 3413 assert(off <= val->len); 3414 *value = val->str + off; 3415 if (len) *len = val->len - off; 3416 reader->chunk_read_off = -off; 3417 return S_OK; 3418 } 3419 3420 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read) 3421 { 3422 xmlreader *reader = impl_from_IXmlReader(iface); 3423 const strval *val; 3424 UINT len = 0; 3425 3426 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read); 3427 3428 val = reader_get_value(reader, FALSE); 3429 3430 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */ 3431 if (reader->chunk_read_off >= 0) 3432 { 3433 assert(reader->chunk_read_off <= val->len); 3434 len = min(val->len - reader->chunk_read_off, chunk_size); 3435 } 3436 if (read) *read = len; 3437 3438 if (len) 3439 { 3440 memcpy(buffer, reader_get_strptr(reader, val) + reader->chunk_read_off, len*sizeof(WCHAR)); 3441 reader->chunk_read_off += len; 3442 } 3443 3444 return len || !chunk_size ? S_OK : S_FALSE; 3445 } 3446 3447 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface, 3448 LPCWSTR *baseUri, 3449 UINT *baseUri_length) 3450 { 3451 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length); 3452 return E_NOTIMPL; 3453 } 3454 3455 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface) 3456 { 3457 FIXME("(%p): stub\n", iface); 3458 return FALSE; 3459 } 3460 3461 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface) 3462 { 3463 xmlreader *This = impl_from_IXmlReader(iface); 3464 TRACE("(%p)\n", This); 3465 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense 3466 when current node is start tag of an element */ 3467 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE; 3468 } 3469 3470 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *line_number) 3471 { 3472 xmlreader *This = impl_from_IXmlReader(iface); 3473 const struct element *element; 3474 3475 TRACE("(%p %p)\n", This, line_number); 3476 3477 if (!line_number) 3478 return E_INVALIDARG; 3479 3480 switch (reader_get_nodetype(This)) 3481 { 3482 case XmlNodeType_Element: 3483 case XmlNodeType_EndElement: 3484 element = reader_get_element(This); 3485 *line_number = element->position.line_number; 3486 break; 3487 case XmlNodeType_Attribute: 3488 *line_number = This->attr->position.line_number; 3489 break; 3490 case XmlNodeType_Whitespace: 3491 case XmlNodeType_XmlDeclaration: 3492 *line_number = This->empty_element.position.line_number; 3493 break; 3494 default: 3495 *line_number = This->position.line_number; 3496 break; 3497 } 3498 3499 return This->state == XmlReadState_Closed ? S_FALSE : S_OK; 3500 } 3501 3502 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *line_position) 3503 { 3504 xmlreader *This = impl_from_IXmlReader(iface); 3505 const struct element *element; 3506 3507 TRACE("(%p %p)\n", This, line_position); 3508 3509 if (!line_position) 3510 return E_INVALIDARG; 3511 3512 switch (reader_get_nodetype(This)) 3513 { 3514 case XmlNodeType_Element: 3515 case XmlNodeType_EndElement: 3516 element = reader_get_element(This); 3517 *line_position = element->position.line_position; 3518 break; 3519 case XmlNodeType_Attribute: 3520 *line_position = This->attr->position.line_position; 3521 break; 3522 case XmlNodeType_Whitespace: 3523 case XmlNodeType_XmlDeclaration: 3524 *line_position = This->empty_element.position.line_position; 3525 break; 3526 default: 3527 *line_position = This->position.line_position; 3528 break; 3529 } 3530 3531 return This->state == XmlReadState_Closed ? S_FALSE : S_OK; 3532 } 3533 3534 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count) 3535 { 3536 xmlreader *This = impl_from_IXmlReader(iface); 3537 3538 TRACE("(%p)->(%p)\n", This, count); 3539 3540 if (!count) return E_INVALIDARG; 3541 3542 *count = This->attr_count; 3543 return S_OK; 3544 } 3545 3546 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth) 3547 { 3548 xmlreader *This = impl_from_IXmlReader(iface); 3549 TRACE("(%p)->(%p)\n", This, depth); 3550 *depth = This->depth; 3551 return S_OK; 3552 } 3553 3554 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface) 3555 { 3556 xmlreader *This = impl_from_IXmlReader(iface); 3557 TRACE("(%p)\n", iface); 3558 return This->state == XmlReadState_EndOfFile; 3559 } 3560 3561 static const struct IXmlReaderVtbl xmlreader_vtbl = 3562 { 3563 xmlreader_QueryInterface, 3564 xmlreader_AddRef, 3565 xmlreader_Release, 3566 xmlreader_SetInput, 3567 xmlreader_GetProperty, 3568 xmlreader_SetProperty, 3569 xmlreader_Read, 3570 xmlreader_GetNodeType, 3571 xmlreader_MoveToFirstAttribute, 3572 xmlreader_MoveToNextAttribute, 3573 xmlreader_MoveToAttributeByName, 3574 xmlreader_MoveToElement, 3575 xmlreader_GetQualifiedName, 3576 xmlreader_GetNamespaceUri, 3577 xmlreader_GetLocalName, 3578 xmlreader_GetPrefix, 3579 xmlreader_GetValue, 3580 xmlreader_ReadValueChunk, 3581 xmlreader_GetBaseUri, 3582 xmlreader_IsDefault, 3583 xmlreader_IsEmptyElement, 3584 xmlreader_GetLineNumber, 3585 xmlreader_GetLinePosition, 3586 xmlreader_GetAttributeCount, 3587 xmlreader_GetDepth, 3588 xmlreader_IsEOF 3589 }; 3590 3591 /** IXmlReaderInput **/ 3592 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject) 3593 { 3594 xmlreaderinput *This = impl_from_IXmlReaderInput(iface); 3595 3596 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject); 3597 3598 if (IsEqualGUID(riid, &IID_IXmlReaderInput) || 3599 IsEqualGUID(riid, &IID_IUnknown)) 3600 { 3601 *ppvObject = iface; 3602 } 3603 else 3604 { 3605 WARN("interface %s not implemented\n", debugstr_guid(riid)); 3606 *ppvObject = NULL; 3607 return E_NOINTERFACE; 3608 } 3609 3610 IUnknown_AddRef(iface); 3611 3612 return S_OK; 3613 } 3614 3615 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface) 3616 { 3617 xmlreaderinput *This = impl_from_IXmlReaderInput(iface); 3618 ULONG ref = InterlockedIncrement(&This->ref); 3619 TRACE("(%p)->(%d)\n", This, ref); 3620 return ref; 3621 } 3622 3623 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface) 3624 { 3625 xmlreaderinput *This = impl_from_IXmlReaderInput(iface); 3626 LONG ref = InterlockedDecrement(&This->ref); 3627 3628 TRACE("(%p)->(%d)\n", This, ref); 3629 3630 if (ref == 0) 3631 { 3632 IMalloc *imalloc = This->imalloc; 3633 if (This->input) IUnknown_Release(This->input); 3634 if (This->stream) ISequentialStream_Release(This->stream); 3635 if (This->buffer) free_input_buffer(This->buffer); 3636 readerinput_free(This, This->baseuri); 3637 readerinput_free(This, This); 3638 if (imalloc) IMalloc_Release(imalloc); 3639 } 3640 3641 return ref; 3642 } 3643 3644 static const struct IUnknownVtbl xmlreaderinputvtbl = 3645 { 3646 xmlreaderinput_QueryInterface, 3647 xmlreaderinput_AddRef, 3648 xmlreaderinput_Release 3649 }; 3650 3651 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc) 3652 { 3653 xmlreader *reader; 3654 HRESULT hr; 3655 int i; 3656 3657 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc); 3658 3659 if (imalloc) 3660 reader = IMalloc_Alloc(imalloc, sizeof(*reader)); 3661 else 3662 reader = heap_alloc(sizeof(*reader)); 3663 if (!reader) 3664 return E_OUTOFMEMORY; 3665 3666 memset(reader, 0, sizeof(*reader)); 3667 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl; 3668 reader->ref = 1; 3669 reader->state = XmlReadState_Closed; 3670 reader->instate = XmlReadInState_Initial; 3671 reader->resumestate = XmlReadResumeState_Initial; 3672 reader->dtdmode = DtdProcessing_Prohibit; 3673 reader->imalloc = imalloc; 3674 if (imalloc) IMalloc_AddRef(imalloc); 3675 reader->nodetype = XmlNodeType_None; 3676 list_init(&reader->attrs); 3677 list_init(&reader->nsdef); 3678 list_init(&reader->ns); 3679 list_init(&reader->elements); 3680 reader->max_depth = 256; 3681 3682 reader->chunk_read_off = 0; 3683 for (i = 0; i < StringValue_Last; i++) 3684 reader->strvalues[i] = strval_empty; 3685 3686 hr = IXmlReader_QueryInterface(&reader->IXmlReader_iface, riid, obj); 3687 IXmlReader_Release(&reader->IXmlReader_iface); 3688 3689 TRACE("returning iface %p, hr %#x\n", *obj, hr); 3690 3691 return hr; 3692 } 3693 3694 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream, 3695 IMalloc *imalloc, 3696 LPCWSTR encoding, 3697 BOOL hint, 3698 LPCWSTR base_uri, 3699 IXmlReaderInput **ppInput) 3700 { 3701 xmlreaderinput *readerinput; 3702 HRESULT hr; 3703 3704 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding), 3705 hint, wine_dbgstr_w(base_uri), ppInput); 3706 3707 if (!stream || !ppInput) return E_INVALIDARG; 3708 3709 if (imalloc) 3710 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput)); 3711 else 3712 readerinput = heap_alloc(sizeof(*readerinput)); 3713 if(!readerinput) return E_OUTOFMEMORY; 3714 3715 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl; 3716 readerinput->ref = 1; 3717 readerinput->imalloc = imalloc; 3718 readerinput->stream = NULL; 3719 if (imalloc) IMalloc_AddRef(imalloc); 3720 readerinput->encoding = parse_encoding_name(encoding, -1); 3721 readerinput->hint = hint; 3722 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri); 3723 readerinput->pending = 0; 3724 3725 hr = alloc_input_buffer(readerinput); 3726 if (hr != S_OK) 3727 { 3728 readerinput_free(readerinput, readerinput->baseuri); 3729 readerinput_free(readerinput, readerinput); 3730 if (imalloc) IMalloc_Release(imalloc); 3731 return hr; 3732 } 3733 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input); 3734 3735 *ppInput = &readerinput->IXmlReaderInput_iface; 3736 3737 TRACE("returning iface %p\n", *ppInput); 3738 3739 return S_OK; 3740 } 3741