1 /* 2 * IXmlReader implementation 3 * 4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 19 */ 20 21 #define COBJMACROS 22 23 #include <stdio.h> 24 #include <stdarg.h> 25 #include <assert.h> 26 #include "windef.h" 27 #include "winbase.h" 28 #include "initguid.h" 29 #include "objbase.h" 30 #include "xmllite.h" 31 #include "xmllite_private.h" 32 #ifdef __REACTOS__ 33 #include <winnls.h> 34 #endif 35 36 #include "wine/debug.h" 37 #include "wine/list.h" 38 39 WINE_DEFAULT_DEBUG_CHANNEL(xmllite); 40 41 /* not defined in public headers */ 42 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda); 43 44 typedef enum 45 { 46 XmlReadInState_Initial, 47 XmlReadInState_XmlDecl, 48 XmlReadInState_Misc_DTD, 49 XmlReadInState_DTD, 50 XmlReadInState_DTD_Misc, 51 XmlReadInState_Element, 52 XmlReadInState_Content, 53 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */ 54 XmlReadInState_Eof 55 } XmlReaderInternalState; 56 57 /* This state denotes where parsing was interrupted by input problem. 58 Reader resumes parsing using this information. */ 59 typedef enum 60 { 61 XmlReadResumeState_Initial, 62 XmlReadResumeState_PITarget, 63 XmlReadResumeState_PIBody, 64 XmlReadResumeState_CDATA, 65 XmlReadResumeState_Comment, 66 XmlReadResumeState_STag, 67 XmlReadResumeState_CharData, 68 XmlReadResumeState_Whitespace 69 } XmlReaderResumeState; 70 71 /* saved pointer index to resume from particular input position */ 72 typedef enum 73 { 74 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */ 75 XmlReadResume_Local, /* local for QName */ 76 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */ 77 XmlReadResume_Last 78 } XmlReaderResume; 79 80 typedef enum 81 { 82 StringValue_LocalName, 83 StringValue_Prefix, 84 StringValue_QualifiedName, 85 StringValue_Value, 86 StringValue_Last 87 } XmlReaderStringValue; 88 89 static const WCHAR usasciiW[] = {'U','S','-','A','S','C','I','I',0}; 90 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0}; 91 static const WCHAR utf8W[] = {'U','T','F','-','8',0}; 92 93 static const WCHAR dblquoteW[] = {'\"',0}; 94 static const WCHAR quoteW[] = {'\'',0}; 95 static const WCHAR ltW[] = {'<',0}; 96 static const WCHAR gtW[] = {'>',0}; 97 static const WCHAR commentW[] = {'<','!','-','-',0}; 98 static const WCHAR piW[] = {'<','?',0}; 99 100 BOOL is_namestartchar(WCHAR ch); 101 102 static const char *debugstr_nodetype(XmlNodeType nodetype) 103 { 104 static const char * const type_names[] = 105 { 106 "None", 107 "Element", 108 "Attribute", 109 "Text", 110 "CDATA", 111 "", 112 "", 113 "ProcessingInstruction", 114 "Comment", 115 "", 116 "DocumentType", 117 "", 118 "", 119 "Whitespace", 120 "", 121 "EndElement", 122 "", 123 "XmlDeclaration" 124 }; 125 126 if (nodetype > _XmlNodeType_Last) 127 return wine_dbg_sprintf("unknown type=%d", nodetype); 128 129 return type_names[nodetype]; 130 } 131 132 static const char *debugstr_reader_prop(XmlReaderProperty prop) 133 { 134 static const char * const prop_names[] = 135 { 136 "MultiLanguage", 137 "ConformanceLevel", 138 "RandomAccess", 139 "XmlResolver", 140 "DtdProcessing", 141 "ReadState", 142 "MaxElementDepth", 143 "MaxEntityExpansion" 144 }; 145 146 if (prop > _XmlReaderProperty_Last) 147 return wine_dbg_sprintf("unknown property=%d", prop); 148 149 return prop_names[prop]; 150 } 151 152 struct xml_encoding_data 153 { 154 const WCHAR *name; 155 xml_encoding enc; 156 UINT cp; 157 }; 158 159 static const struct xml_encoding_data xml_encoding_map[] = { 160 { usasciiW, XmlEncoding_USASCII, 20127 }, 161 { utf16W, XmlEncoding_UTF16, 1200 }, 162 { utf8W, XmlEncoding_UTF8, CP_UTF8 }, 163 }; 164 165 const WCHAR *get_encoding_name(xml_encoding encoding) 166 { 167 return xml_encoding_map[encoding].name; 168 } 169 170 xml_encoding get_encoding_from_codepage(UINT codepage) 171 { 172 int i; 173 for (i = 0; i < ARRAY_SIZE(xml_encoding_map); i++) 174 { 175 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc; 176 } 177 return XmlEncoding_Unknown; 178 } 179 180 typedef struct 181 { 182 char *data; 183 UINT cur; 184 unsigned int allocated; 185 unsigned int written; 186 BOOL prev_cr; 187 } encoded_buffer; 188 189 typedef struct input_buffer input_buffer; 190 191 typedef struct 192 { 193 IXmlReaderInput IXmlReaderInput_iface; 194 LONG ref; 195 /* reference passed on IXmlReaderInput creation, is kept when input is created */ 196 IUnknown *input; 197 IMalloc *imalloc; 198 xml_encoding encoding; 199 BOOL hint; 200 WCHAR *baseuri; 201 /* stream reference set after SetInput() call from reader, 202 stored as sequential stream, cause currently 203 optimizations possible with IStream aren't implemented */ 204 ISequentialStream *stream; 205 input_buffer *buffer; 206 unsigned int pending : 1; 207 } xmlreaderinput; 208 209 static const struct IUnknownVtbl xmlreaderinputvtbl; 210 211 /* Structure to hold parsed string of specific length. 212 213 Reader stores node value as 'start' pointer, on request 214 a null-terminated version of it is allocated. 215 216 To init a strval variable use reader_init_strval(), 217 to set strval as a reader value use reader_set_strval(). 218 */ 219 typedef struct 220 { 221 WCHAR *str; /* allocated null-terminated string */ 222 UINT len; /* length in WCHARs, altered after ReadValueChunk */ 223 UINT start; /* input position where value starts */ 224 } strval; 225 226 static WCHAR emptyW[] = {0}; 227 static WCHAR xmlW[] = {'x','m','l',0}; 228 static WCHAR xmlnsW[] = {'x','m','l','n','s',0}; 229 static const strval strval_empty = { emptyW }; 230 static const strval strval_xml = { xmlW, 3 }; 231 static const strval strval_xmlns = { xmlnsW, 5 }; 232 233 struct reader_position 234 { 235 UINT line_number; 236 UINT line_position; 237 }; 238 239 enum attribute_flags 240 { 241 ATTRIBUTE_NS_DEFINITION = 0x1, 242 ATTRIBUTE_DEFAULT_NS_DEFINITION = 0x2, 243 }; 244 245 struct attribute 246 { 247 struct list entry; 248 strval prefix; 249 strval localname; 250 strval qname; 251 strval value; 252 struct reader_position position; 253 unsigned int flags; 254 }; 255 256 struct element 257 { 258 struct list entry; 259 strval prefix; 260 strval localname; 261 strval qname; 262 struct reader_position position; 263 }; 264 265 struct ns 266 { 267 struct list entry; 268 strval prefix; 269 strval uri; 270 struct element *element; 271 }; 272 273 typedef struct 274 { 275 IXmlReader IXmlReader_iface; 276 LONG ref; 277 xmlreaderinput *input; 278 IMalloc *imalloc; 279 XmlReadState state; 280 HRESULT error; /* error set on XmlReadState_Error */ 281 XmlReaderInternalState instate; 282 XmlReaderResumeState resumestate; 283 XmlNodeType nodetype; 284 DtdProcessing dtdmode; 285 IXmlResolver *resolver; 286 IUnknown *mlang; 287 struct reader_position position; 288 struct list attrs; /* attributes list for current node */ 289 struct attribute *attr; /* current attribute */ 290 UINT attr_count; 291 struct list nsdef; 292 struct list ns; 293 struct list elements; 294 int chunk_read_off; 295 strval strvalues[StringValue_Last]; 296 UINT depth; 297 UINT max_depth; 298 BOOL is_empty_element; 299 struct element empty_element; /* used for empty elements without end tag <a />, 300 and to keep <?xml reader position */ 301 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */ 302 } xmlreader; 303 304 struct input_buffer 305 { 306 encoded_buffer utf16; 307 encoded_buffer encoded; 308 UINT code_page; 309 xmlreaderinput *input; 310 }; 311 312 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface) 313 { 314 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface); 315 } 316 317 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface) 318 { 319 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface); 320 } 321 322 /* reader memory allocation functions */ 323 static inline void *reader_alloc(xmlreader *reader, size_t len) 324 { 325 return m_alloc(reader->imalloc, len); 326 } 327 328 static inline void *reader_alloc_zero(xmlreader *reader, size_t len) 329 { 330 void *ret = reader_alloc(reader, len); 331 if (ret) 332 memset(ret, 0, len); 333 return ret; 334 } 335 336 static inline void reader_free(xmlreader *reader, void *mem) 337 { 338 m_free(reader->imalloc, mem); 339 } 340 341 /* Just return pointer from offset, no attempt to read more. */ 342 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset) 343 { 344 encoded_buffer *buffer = &reader->input->buffer->utf16; 345 return (WCHAR*)buffer->data + offset; 346 } 347 348 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v) 349 { 350 return v->str ? v->str : reader_get_ptr2(reader, v->start); 351 } 352 353 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest) 354 { 355 *dest = *src; 356 357 if (src->str != strval_empty.str) 358 { 359 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR)); 360 if (!dest->str) return E_OUTOFMEMORY; 361 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR)); 362 dest->str[dest->len] = 0; 363 dest->start = 0; 364 } 365 366 return S_OK; 367 } 368 369 /* reader input memory allocation functions */ 370 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len) 371 { 372 return m_alloc(input->imalloc, len); 373 } 374 375 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len) 376 { 377 return m_realloc(input->imalloc, mem, len); 378 } 379 380 static inline void readerinput_free(xmlreaderinput *input, void *mem) 381 { 382 m_free(input->imalloc, mem); 383 } 384 385 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str) 386 { 387 LPWSTR ret = NULL; 388 389 if(str) { 390 DWORD size; 391 392 size = (lstrlenW(str)+1)*sizeof(WCHAR); 393 ret = readerinput_alloc(input, size); 394 if (ret) memcpy(ret, str, size); 395 } 396 397 return ret; 398 } 399 400 /* This one frees stored string value if needed */ 401 static void reader_free_strvalued(xmlreader *reader, strval *v) 402 { 403 if (v->str != strval_empty.str) 404 { 405 reader_free(reader, v->str); 406 *v = strval_empty; 407 } 408 } 409 410 static void reader_clear_attrs(xmlreader *reader) 411 { 412 struct attribute *attr, *attr2; 413 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry) 414 { 415 reader_free_strvalued(reader, &attr->localname); 416 reader_free_strvalued(reader, &attr->value); 417 reader_free(reader, attr); 418 } 419 list_init(&reader->attrs); 420 reader->attr_count = 0; 421 reader->attr = NULL; 422 } 423 424 /* attribute data holds pointers to buffer data, so buffer shrink is not possible 425 while we are on a node with attributes */ 426 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *qname, 427 strval *value, const struct reader_position *position, unsigned int flags) 428 { 429 struct attribute *attr; 430 HRESULT hr; 431 432 attr = reader_alloc(reader, sizeof(*attr)); 433 if (!attr) return E_OUTOFMEMORY; 434 435 hr = reader_strvaldup(reader, localname, &attr->localname); 436 if (hr == S_OK) 437 { 438 hr = reader_strvaldup(reader, value, &attr->value); 439 if (hr != S_OK) 440 reader_free_strvalued(reader, &attr->value); 441 } 442 if (hr != S_OK) 443 { 444 reader_free(reader, attr); 445 return hr; 446 } 447 448 if (prefix) 449 attr->prefix = *prefix; 450 else 451 memset(&attr->prefix, 0, sizeof(attr->prefix)); 452 attr->qname = qname ? *qname : *localname; 453 attr->position = *position; 454 attr->flags = flags; 455 list_add_tail(&reader->attrs, &attr->entry); 456 reader->attr_count++; 457 458 return S_OK; 459 } 460 461 /* Returns current element, doesn't check if reader is actually positioned on it. */ 462 static struct element *reader_get_element(xmlreader *reader) 463 { 464 if (reader->is_empty_element) 465 return &reader->empty_element; 466 467 return LIST_ENTRY(list_head(&reader->elements), struct element, entry); 468 } 469 470 static inline void reader_init_strvalue(UINT start, UINT len, strval *v) 471 { 472 v->start = start; 473 v->len = len; 474 v->str = NULL; 475 } 476 477 static inline const char* debug_strval(const xmlreader *reader, const strval *v) 478 { 479 return debugstr_wn(reader_get_strptr(reader, v), v->len); 480 } 481 482 /* used to initialize from constant string */ 483 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v) 484 { 485 v->start = 0; 486 v->len = len; 487 v->str = str; 488 } 489 490 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type) 491 { 492 reader_free_strvalued(reader, &reader->strvalues[type]); 493 } 494 495 static void reader_free_strvalues(xmlreader *reader) 496 { 497 int type; 498 for (type = 0; type < StringValue_Last; type++) 499 reader_free_strvalue(reader, type); 500 } 501 502 /* This helper should only be used to test if strings are the same, 503 it doesn't try to sort. */ 504 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2) 505 { 506 if (str1->len != str2->len) return 0; 507 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR)); 508 } 509 510 static void reader_clear_elements(xmlreader *reader) 511 { 512 struct element *elem, *elem2; 513 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry) 514 { 515 reader_free_strvalued(reader, &elem->prefix); 516 reader_free_strvalued(reader, &elem->localname); 517 reader_free_strvalued(reader, &elem->qname); 518 reader_free(reader, elem); 519 } 520 list_init(&reader->elements); 521 reader_free_strvalued(reader, &reader->empty_element.localname); 522 reader_free_strvalued(reader, &reader->empty_element.qname); 523 reader->is_empty_element = FALSE; 524 } 525 526 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix) 527 { 528 struct list *nslist = prefix ? &reader->ns : &reader->nsdef; 529 struct ns *ns; 530 531 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) { 532 if (strval_eq(reader, prefix, &ns->prefix)) 533 return ns; 534 } 535 536 return NULL; 537 } 538 539 static HRESULT reader_inc_depth(xmlreader *reader) 540 { 541 return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK; 542 } 543 544 static void reader_dec_depth(xmlreader *reader) 545 { 546 if (reader->depth) 547 reader->depth--; 548 } 549 550 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def) 551 { 552 struct ns *ns; 553 HRESULT hr; 554 555 ns = reader_alloc(reader, sizeof(*ns)); 556 if (!ns) return E_OUTOFMEMORY; 557 558 if (def) 559 memset(&ns->prefix, 0, sizeof(ns->prefix)); 560 else { 561 hr = reader_strvaldup(reader, prefix, &ns->prefix); 562 if (FAILED(hr)) { 563 reader_free(reader, ns); 564 return hr; 565 } 566 } 567 568 hr = reader_strvaldup(reader, uri, &ns->uri); 569 if (FAILED(hr)) { 570 reader_free_strvalued(reader, &ns->prefix); 571 reader_free(reader, ns); 572 return hr; 573 } 574 575 ns->element = NULL; 576 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry); 577 return hr; 578 } 579 580 static void reader_free_element(xmlreader *reader, struct element *element) 581 { 582 reader_free_strvalued(reader, &element->prefix); 583 reader_free_strvalued(reader, &element->localname); 584 reader_free_strvalued(reader, &element->qname); 585 reader_free(reader, element); 586 } 587 588 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element) 589 { 590 struct ns *ns; 591 592 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) { 593 if (ns->element) 594 break; 595 ns->element = element; 596 } 597 598 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) { 599 if (ns->element) 600 break; 601 ns->element = element; 602 } 603 } 604 605 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname, 606 strval *qname, const struct reader_position *position) 607 { 608 struct element *element; 609 HRESULT hr; 610 611 element = reader_alloc_zero(reader, sizeof(*element)); 612 if (!element) 613 return E_OUTOFMEMORY; 614 615 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) == S_OK && 616 (hr = reader_strvaldup(reader, localname, &element->localname)) == S_OK && 617 (hr = reader_strvaldup(reader, qname, &element->qname)) == S_OK) 618 { 619 list_add_head(&reader->elements, &element->entry); 620 reader_mark_ns_nodes(reader, element); 621 reader->is_empty_element = FALSE; 622 element->position = *position; 623 } 624 else 625 reader_free_element(reader, element); 626 627 return hr; 628 } 629 630 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element) 631 { 632 struct ns *ns, *ns2; 633 634 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) { 635 if (ns->element != element) 636 break; 637 638 list_remove(&ns->entry); 639 reader_free_strvalued(reader, &ns->prefix); 640 reader_free_strvalued(reader, &ns->uri); 641 reader_free(reader, ns); 642 } 643 644 if (!list_empty(&reader->nsdef)) { 645 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry); 646 if (ns->element == element) { 647 list_remove(&ns->entry); 648 reader_free_strvalued(reader, &ns->prefix); 649 reader_free_strvalued(reader, &ns->uri); 650 reader_free(reader, ns); 651 } 652 } 653 } 654 655 static void reader_pop_element(xmlreader *reader) 656 { 657 struct element *element; 658 659 if (list_empty(&reader->elements)) 660 return; 661 662 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry); 663 list_remove(&element->entry); 664 665 reader_pop_ns_nodes(reader, element); 666 reader_free_element(reader, element); 667 668 /* It was a root element, the rest is expected as Misc */ 669 if (list_empty(&reader->elements)) 670 reader->instate = XmlReadInState_MiscEnd; 671 } 672 673 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value' 674 means node value is to be determined. */ 675 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value) 676 { 677 strval *v = &reader->strvalues[type]; 678 679 reader_free_strvalue(reader, type); 680 if (!value) 681 { 682 v->str = NULL; 683 v->start = 0; 684 v->len = 0; 685 return; 686 } 687 688 if (value->str == strval_empty.str) 689 *v = *value; 690 else 691 { 692 if (type == StringValue_Value) 693 { 694 /* defer allocation for value string */ 695 v->str = NULL; 696 v->start = value->start; 697 v->len = value->len; 698 } 699 else 700 { 701 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR)); 702 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR)); 703 v->str[value->len] = 0; 704 v->len = value->len; 705 } 706 } 707 } 708 709 static inline int is_reader_pending(xmlreader *reader) 710 { 711 return reader->input->pending; 712 } 713 714 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer) 715 { 716 const int initial_len = 0x2000; 717 buffer->data = readerinput_alloc(input, initial_len); 718 if (!buffer->data) return E_OUTOFMEMORY; 719 720 memset(buffer->data, 0, 4); 721 buffer->cur = 0; 722 buffer->allocated = initial_len; 723 buffer->written = 0; 724 buffer->prev_cr = FALSE; 725 726 return S_OK; 727 } 728 729 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer) 730 { 731 readerinput_free(input, buffer->data); 732 } 733 734 HRESULT get_code_page(xml_encoding encoding, UINT *cp) 735 { 736 if (encoding == XmlEncoding_Unknown) 737 { 738 FIXME("unsupported encoding %d\n", encoding); 739 return E_NOTIMPL; 740 } 741 742 *cp = xml_encoding_map[encoding].cp; 743 744 return S_OK; 745 } 746 747 xml_encoding parse_encoding_name(const WCHAR *name, int len) 748 { 749 int min, max, n, c; 750 751 if (!name) return XmlEncoding_Unknown; 752 753 min = 0; 754 max = ARRAY_SIZE(xml_encoding_map) - 1; 755 756 while (min <= max) 757 { 758 n = (min+max)/2; 759 760 if (len != -1) 761 c = _wcsnicmp(xml_encoding_map[n].name, name, len); 762 else 763 c = wcsicmp(xml_encoding_map[n].name, name); 764 if (!c) 765 return xml_encoding_map[n].enc; 766 767 if (c > 0) 768 max = n-1; 769 else 770 min = n+1; 771 } 772 773 return XmlEncoding_Unknown; 774 } 775 776 static HRESULT alloc_input_buffer(xmlreaderinput *input) 777 { 778 input_buffer *buffer; 779 HRESULT hr; 780 781 input->buffer = NULL; 782 783 buffer = readerinput_alloc(input, sizeof(*buffer)); 784 if (!buffer) return E_OUTOFMEMORY; 785 786 buffer->input = input; 787 buffer->code_page = ~0; /* code page is unknown at this point */ 788 hr = init_encoded_buffer(input, &buffer->utf16); 789 if (hr != S_OK) { 790 readerinput_free(input, buffer); 791 return hr; 792 } 793 794 hr = init_encoded_buffer(input, &buffer->encoded); 795 if (hr != S_OK) { 796 free_encoded_buffer(input, &buffer->utf16); 797 readerinput_free(input, buffer); 798 return hr; 799 } 800 801 input->buffer = buffer; 802 return S_OK; 803 } 804 805 static void free_input_buffer(input_buffer *buffer) 806 { 807 free_encoded_buffer(buffer->input, &buffer->encoded); 808 free_encoded_buffer(buffer->input, &buffer->utf16); 809 readerinput_free(buffer->input, buffer); 810 } 811 812 static void readerinput_release_stream(xmlreaderinput *readerinput) 813 { 814 if (readerinput->stream) { 815 ISequentialStream_Release(readerinput->stream); 816 readerinput->stream = NULL; 817 } 818 } 819 820 /* Queries already stored interface for IStream/ISequentialStream. 821 Interface supplied on creation will be overwritten */ 822 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput) 823 { 824 HRESULT hr; 825 826 readerinput_release_stream(readerinput); 827 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream); 828 if (hr != S_OK) 829 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream); 830 831 return hr; 832 } 833 834 /* reads a chunk to raw buffer */ 835 static HRESULT readerinput_growraw(xmlreaderinput *readerinput) 836 { 837 encoded_buffer *buffer = &readerinput->buffer->encoded; 838 /* to make sure aligned length won't exceed allocated length */ 839 ULONG len = buffer->allocated - buffer->written - 4; 840 ULONG read; 841 HRESULT hr; 842 843 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is 844 variable width encodings like UTF-8 */ 845 len = (len + 3) & ~3; 846 /* try to use allocated space or grow */ 847 if (buffer->allocated - buffer->written < len) 848 { 849 buffer->allocated *= 2; 850 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated); 851 len = buffer->allocated - buffer->written; 852 } 853 854 read = 0; 855 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read); 856 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr); 857 readerinput->pending = hr == E_PENDING; 858 if (FAILED(hr)) return hr; 859 buffer->written += read; 860 861 return hr; 862 } 863 864 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */ 865 static void readerinput_grow(xmlreaderinput *readerinput, int length) 866 { 867 encoded_buffer *buffer = &readerinput->buffer->utf16; 868 869 length *= sizeof(WCHAR); 870 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */ 871 if (buffer->allocated < buffer->written + length + 4) 872 { 873 int grown_size = max(2*buffer->allocated, buffer->allocated + length); 874 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size); 875 buffer->allocated = grown_size; 876 } 877 } 878 879 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput) 880 { 881 static const char startA[] = {'<','?'}; 882 static const char commentA[] = {'<','!'}; 883 encoded_buffer *buffer = &readerinput->buffer->encoded; 884 unsigned char *ptr = (unsigned char*)buffer->data; 885 886 return !memcmp(buffer->data, startA, sizeof(startA)) || 887 !memcmp(buffer->data, commentA, sizeof(commentA)) || 888 /* test start byte */ 889 (ptr[0] == '<' && 890 ( 891 (ptr[1] && (ptr[1] <= 0x7f)) || 892 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */ 893 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */ 894 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */ 895 ); 896 } 897 898 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc) 899 { 900 encoded_buffer *buffer = &readerinput->buffer->encoded; 901 static const char utf8bom[] = {0xef,0xbb,0xbf}; 902 static const char utf16lebom[] = {0xff,0xfe}; 903 WCHAR *ptrW; 904 905 *enc = XmlEncoding_Unknown; 906 907 if (buffer->written <= 3) 908 { 909 HRESULT hr = readerinput_growraw(readerinput); 910 if (FAILED(hr)) return hr; 911 if (buffer->written < 3) return MX_E_INPUTEND; 912 } 913 914 ptrW = (WCHAR *)buffer->data; 915 /* try start symbols if we have enough data to do that, input buffer should contain 916 first chunk already */ 917 if (readerinput_is_utf8(readerinput)) 918 *enc = XmlEncoding_UTF8; 919 else if (*ptrW == '<') 920 { 921 ptrW++; 922 if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW)) 923 *enc = XmlEncoding_UTF16; 924 } 925 /* try with BOM now */ 926 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom))) 927 { 928 buffer->cur += sizeof(utf8bom); 929 *enc = XmlEncoding_UTF8; 930 } 931 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom))) 932 { 933 buffer->cur += sizeof(utf16lebom); 934 *enc = XmlEncoding_UTF16; 935 } 936 937 return S_OK; 938 } 939 940 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput) 941 { 942 encoded_buffer *buffer = &readerinput->buffer->encoded; 943 int len = buffer->written; 944 945 /* complete single byte char */ 946 if (!(buffer->data[len-1] & 0x80)) return len; 947 948 /* find start byte of multibyte char */ 949 while (--len && !(buffer->data[len] & 0xc0)) 950 ; 951 952 return len; 953 } 954 955 /* Returns byte length of complete char sequence for buffer code page, 956 it's relative to current buffer position which is currently used for BOM handling 957 only. */ 958 static int readerinput_get_convlen(xmlreaderinput *readerinput) 959 { 960 encoded_buffer *buffer = &readerinput->buffer->encoded; 961 int len; 962 963 if (readerinput->buffer->code_page == CP_UTF8) 964 len = readerinput_get_utf8_convlen(readerinput); 965 else 966 len = buffer->written; 967 968 TRACE("%d\n", len - buffer->cur); 969 return len - buffer->cur; 970 } 971 972 /* It's possible that raw buffer has some leftovers from last conversion - some char 973 sequence that doesn't represent a full code point. Length argument should be calculated with 974 readerinput_get_convlen(), if it's -1 it will be calculated here. */ 975 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len) 976 { 977 encoded_buffer *buffer = &readerinput->buffer->encoded; 978 979 if (len == -1) 980 len = readerinput_get_convlen(readerinput); 981 982 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len); 983 /* everything below cur is lost too */ 984 buffer->written -= len + buffer->cur; 985 /* after this point we don't need cur offset really, 986 it's used only to mark where actual data begins when first chunk is read */ 987 buffer->cur = 0; 988 } 989 990 static void fixup_buffer_cr(encoded_buffer *buffer, int off) 991 { 992 BOOL prev_cr = buffer->prev_cr; 993 const WCHAR *src; 994 WCHAR *dest; 995 996 src = dest = (WCHAR*)buffer->data + off; 997 while ((const char*)src < buffer->data + buffer->written) 998 { 999 if (*src == '\r') 1000 { 1001 *dest++ = '\n'; 1002 src++; 1003 prev_cr = TRUE; 1004 continue; 1005 } 1006 if(prev_cr && *src == '\n') 1007 src++; 1008 else 1009 *dest++ = *src++; 1010 prev_cr = FALSE; 1011 } 1012 1013 buffer->written = (char*)dest - buffer->data; 1014 buffer->prev_cr = prev_cr; 1015 *dest = 0; 1016 } 1017 1018 /* note that raw buffer content is kept */ 1019 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc) 1020 { 1021 encoded_buffer *src = &readerinput->buffer->encoded; 1022 encoded_buffer *dest = &readerinput->buffer->utf16; 1023 int len, dest_len; 1024 UINT cp = ~0u; 1025 HRESULT hr; 1026 WCHAR *ptr; 1027 1028 hr = get_code_page(enc, &cp); 1029 if (FAILED(hr)) return; 1030 1031 readerinput->buffer->code_page = cp; 1032 len = readerinput_get_convlen(readerinput); 1033 1034 TRACE("switching to cp %d\n", cp); 1035 1036 /* just copy in this case */ 1037 if (enc == XmlEncoding_UTF16) 1038 { 1039 readerinput_grow(readerinput, len); 1040 memcpy(dest->data, src->data + src->cur, len); 1041 dest->written += len*sizeof(WCHAR); 1042 } 1043 else 1044 { 1045 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0); 1046 readerinput_grow(readerinput, dest_len); 1047 ptr = (WCHAR*)dest->data; 1048 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len); 1049 ptr[dest_len] = 0; 1050 dest->written += dest_len*sizeof(WCHAR); 1051 } 1052 1053 fixup_buffer_cr(dest, 0); 1054 } 1055 1056 /* shrinks parsed data a buffer begins with */ 1057 static void reader_shrink(xmlreader *reader) 1058 { 1059 encoded_buffer *buffer = &reader->input->buffer->utf16; 1060 1061 /* avoid to move too often using threshold shrink length */ 1062 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2) 1063 { 1064 buffer->written -= buffer->cur*sizeof(WCHAR); 1065 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written); 1066 buffer->cur = 0; 1067 *(WCHAR*)&buffer->data[buffer->written] = 0; 1068 } 1069 } 1070 1071 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer. 1072 It won't attempt to shrink but will grow destination buffer if needed */ 1073 static HRESULT reader_more(xmlreader *reader) 1074 { 1075 xmlreaderinput *readerinput = reader->input; 1076 encoded_buffer *src = &readerinput->buffer->encoded; 1077 encoded_buffer *dest = &readerinput->buffer->utf16; 1078 UINT cp = readerinput->buffer->code_page; 1079 int len, dest_len, prev_len; 1080 HRESULT hr; 1081 WCHAR *ptr; 1082 1083 /* get some raw data from stream first */ 1084 hr = readerinput_growraw(readerinput); 1085 len = readerinput_get_convlen(readerinput); 1086 prev_len = dest->written / sizeof(WCHAR); 1087 1088 /* just copy for UTF-16 case */ 1089 if (cp == 1200) 1090 { 1091 readerinput_grow(readerinput, len); 1092 memcpy(dest->data + dest->written, src->data + src->cur, len); 1093 dest->written += len*sizeof(WCHAR); 1094 } 1095 else 1096 { 1097 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0); 1098 readerinput_grow(readerinput, dest_len); 1099 ptr = (WCHAR*)(dest->data + dest->written); 1100 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len); 1101 ptr[dest_len] = 0; 1102 dest->written += dest_len*sizeof(WCHAR); 1103 /* get rid of processed data */ 1104 readerinput_shrinkraw(readerinput, len); 1105 } 1106 1107 fixup_buffer_cr(dest, prev_len); 1108 return hr; 1109 } 1110 1111 static inline UINT reader_get_cur(xmlreader *reader) 1112 { 1113 return reader->input->buffer->utf16.cur; 1114 } 1115 1116 static inline WCHAR *reader_get_ptr(xmlreader *reader) 1117 { 1118 encoded_buffer *buffer = &reader->input->buffer->utf16; 1119 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur; 1120 if (!*ptr) reader_more(reader); 1121 return (WCHAR*)buffer->data + buffer->cur; 1122 } 1123 1124 static int reader_cmp(xmlreader *reader, const WCHAR *str) 1125 { 1126 int i=0; 1127 const WCHAR *ptr = reader_get_ptr(reader); 1128 while (str[i]) 1129 { 1130 if (!ptr[i]) 1131 { 1132 reader_more(reader); 1133 ptr = reader_get_ptr(reader); 1134 } 1135 if (str[i] != ptr[i]) 1136 return ptr[i] - str[i]; 1137 i++; 1138 } 1139 return 0; 1140 } 1141 1142 static void reader_update_position(xmlreader *reader, WCHAR ch) 1143 { 1144 if (ch == '\r') 1145 reader->position.line_position = 1; 1146 else if (ch == '\n') 1147 { 1148 reader->position.line_number++; 1149 reader->position.line_position = 1; 1150 } 1151 else 1152 reader->position.line_position++; 1153 } 1154 1155 /* moves cursor n WCHARs forward */ 1156 static void reader_skipn(xmlreader *reader, int n) 1157 { 1158 encoded_buffer *buffer = &reader->input->buffer->utf16; 1159 const WCHAR *ptr; 1160 1161 while (*(ptr = reader_get_ptr(reader)) && n--) 1162 { 1163 reader_update_position(reader, *ptr); 1164 buffer->cur++; 1165 } 1166 } 1167 1168 static inline BOOL is_wchar_space(WCHAR ch) 1169 { 1170 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'; 1171 } 1172 1173 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ 1174 static int reader_skipspaces(xmlreader *reader) 1175 { 1176 const WCHAR *ptr = reader_get_ptr(reader); 1177 UINT start = reader_get_cur(reader); 1178 1179 while (is_wchar_space(*ptr)) 1180 { 1181 reader_skipn(reader, 1); 1182 ptr = reader_get_ptr(reader); 1183 } 1184 1185 return reader_get_cur(reader) - start; 1186 } 1187 1188 /* [26] VersionNum ::= '1.' [0-9]+ */ 1189 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val) 1190 { 1191 static const WCHAR onedotW[] = {'1','.',0}; 1192 WCHAR *ptr, *ptr2; 1193 UINT start; 1194 1195 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL; 1196 1197 start = reader_get_cur(reader); 1198 /* skip "1." */ 1199 reader_skipn(reader, 2); 1200 1201 ptr2 = ptr = reader_get_ptr(reader); 1202 while (*ptr >= '0' && *ptr <= '9') 1203 { 1204 reader_skipn(reader, 1); 1205 ptr = reader_get_ptr(reader); 1206 } 1207 1208 if (ptr2 == ptr) return WC_E_DIGIT; 1209 reader_init_strvalue(start, reader_get_cur(reader)-start, val); 1210 TRACE("version=%s\n", debug_strval(reader, val)); 1211 return S_OK; 1212 } 1213 1214 /* [25] Eq ::= S? '=' S? */ 1215 static HRESULT reader_parse_eq(xmlreader *reader) 1216 { 1217 static const WCHAR eqW[] = {'=',0}; 1218 reader_skipspaces(reader); 1219 if (reader_cmp(reader, eqW)) return WC_E_EQUAL; 1220 /* skip '=' */ 1221 reader_skipn(reader, 1); 1222 reader_skipspaces(reader); 1223 return S_OK; 1224 } 1225 1226 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */ 1227 static HRESULT reader_parse_versioninfo(xmlreader *reader) 1228 { 1229 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0}; 1230 struct reader_position position; 1231 strval val, name; 1232 HRESULT hr; 1233 1234 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE; 1235 1236 position = reader->position; 1237 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL; 1238 reader_init_strvalue(reader_get_cur(reader), 7, &name); 1239 /* skip 'version' */ 1240 reader_skipn(reader, 7); 1241 1242 hr = reader_parse_eq(reader); 1243 if (FAILED(hr)) return hr; 1244 1245 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) 1246 return WC_E_QUOTE; 1247 /* skip "'"|'"' */ 1248 reader_skipn(reader, 1); 1249 1250 hr = reader_parse_versionnum(reader, &val); 1251 if (FAILED(hr)) return hr; 1252 1253 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) 1254 return WC_E_QUOTE; 1255 1256 /* skip "'"|'"' */ 1257 reader_skipn(reader, 1); 1258 1259 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0); 1260 } 1261 1262 /* ([A-Za-z0-9._] | '-') */ 1263 static inline BOOL is_wchar_encname(WCHAR ch) 1264 { 1265 return ((ch >= 'A' && ch <= 'Z') || 1266 (ch >= 'a' && ch <= 'z') || 1267 (ch >= '0' && ch <= '9') || 1268 (ch == '.') || (ch == '_') || 1269 (ch == '-')); 1270 } 1271 1272 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */ 1273 static HRESULT reader_parse_encname(xmlreader *reader, strval *val) 1274 { 1275 WCHAR *start = reader_get_ptr(reader), *ptr; 1276 xml_encoding enc; 1277 int len; 1278 1279 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z')) 1280 return WC_E_ENCNAME; 1281 1282 val->start = reader_get_cur(reader); 1283 1284 ptr = start; 1285 while (is_wchar_encname(*++ptr)) 1286 ; 1287 1288 len = ptr - start; 1289 enc = parse_encoding_name(start, len); 1290 TRACE("encoding name %s\n", debugstr_wn(start, len)); 1291 val->str = start; 1292 val->len = len; 1293 1294 if (enc == XmlEncoding_Unknown) 1295 return WC_E_ENCNAME; 1296 1297 /* skip encoding name */ 1298 reader_skipn(reader, len); 1299 return S_OK; 1300 } 1301 1302 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */ 1303 static HRESULT reader_parse_encdecl(xmlreader *reader) 1304 { 1305 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0}; 1306 struct reader_position position; 1307 strval name, val; 1308 HRESULT hr; 1309 1310 if (!reader_skipspaces(reader)) return S_FALSE; 1311 1312 position = reader->position; 1313 if (reader_cmp(reader, encodingW)) return S_FALSE; 1314 name.str = reader_get_ptr(reader); 1315 name.start = reader_get_cur(reader); 1316 name.len = 8; 1317 /* skip 'encoding' */ 1318 reader_skipn(reader, 8); 1319 1320 hr = reader_parse_eq(reader); 1321 if (FAILED(hr)) return hr; 1322 1323 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) 1324 return WC_E_QUOTE; 1325 /* skip "'"|'"' */ 1326 reader_skipn(reader, 1); 1327 1328 hr = reader_parse_encname(reader, &val); 1329 if (FAILED(hr)) return hr; 1330 1331 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) 1332 return WC_E_QUOTE; 1333 1334 /* skip "'"|'"' */ 1335 reader_skipn(reader, 1); 1336 1337 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0); 1338 } 1339 1340 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */ 1341 static HRESULT reader_parse_sddecl(xmlreader *reader) 1342 { 1343 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0}; 1344 static const WCHAR yesW[] = {'y','e','s',0}; 1345 static const WCHAR noW[] = {'n','o',0}; 1346 struct reader_position position; 1347 strval name, val; 1348 UINT start; 1349 HRESULT hr; 1350 1351 if (!reader_skipspaces(reader)) return S_FALSE; 1352 1353 position = reader->position; 1354 if (reader_cmp(reader, standaloneW)) return S_FALSE; 1355 reader_init_strvalue(reader_get_cur(reader), 10, &name); 1356 /* skip 'standalone' */ 1357 reader_skipn(reader, 10); 1358 1359 hr = reader_parse_eq(reader); 1360 if (FAILED(hr)) return hr; 1361 1362 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) 1363 return WC_E_QUOTE; 1364 /* skip "'"|'"' */ 1365 reader_skipn(reader, 1); 1366 1367 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW)) 1368 return WC_E_XMLDECL; 1369 1370 start = reader_get_cur(reader); 1371 /* skip 'yes'|'no' */ 1372 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3); 1373 reader_init_strvalue(start, reader_get_cur(reader)-start, &val); 1374 TRACE("standalone=%s\n", debug_strval(reader, &val)); 1375 1376 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW)) 1377 return WC_E_QUOTE; 1378 /* skip "'"|'"' */ 1379 reader_skipn(reader, 1); 1380 1381 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0); 1382 } 1383 1384 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */ 1385 static HRESULT reader_parse_xmldecl(xmlreader *reader) 1386 { 1387 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0}; 1388 static const WCHAR declcloseW[] = {'?','>',0}; 1389 struct reader_position position; 1390 HRESULT hr; 1391 1392 /* check if we have "<?xml " */ 1393 if (reader_cmp(reader, xmldeclW)) 1394 return S_FALSE; 1395 1396 reader_skipn(reader, 2); 1397 position = reader->position; 1398 reader_skipn(reader, 3); 1399 hr = reader_parse_versioninfo(reader); 1400 if (FAILED(hr)) 1401 return hr; 1402 1403 hr = reader_parse_encdecl(reader); 1404 if (FAILED(hr)) 1405 return hr; 1406 1407 hr = reader_parse_sddecl(reader); 1408 if (FAILED(hr)) 1409 return hr; 1410 1411 reader_skipspaces(reader); 1412 if (reader_cmp(reader, declcloseW)) 1413 return WC_E_XMLDECL; 1414 1415 /* skip '?>' */ 1416 reader_skipn(reader, 2); 1417 1418 reader->nodetype = XmlNodeType_XmlDeclaration; 1419 reader->empty_element.position = position; 1420 reader_set_strvalue(reader, StringValue_LocalName, &strval_xml); 1421 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_xml); 1422 1423 return S_OK; 1424 } 1425 1426 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */ 1427 static HRESULT reader_parse_comment(xmlreader *reader) 1428 { 1429 WCHAR *ptr; 1430 UINT start; 1431 1432 if (reader->resumestate == XmlReadResumeState_Comment) 1433 { 1434 start = reader->resume[XmlReadResume_Body]; 1435 ptr = reader_get_ptr(reader); 1436 } 1437 else 1438 { 1439 /* skip '<!--' */ 1440 reader_skipn(reader, 4); 1441 reader_shrink(reader); 1442 ptr = reader_get_ptr(reader); 1443 start = reader_get_cur(reader); 1444 reader->nodetype = XmlNodeType_Comment; 1445 reader->resume[XmlReadResume_Body] = start; 1446 reader->resumestate = XmlReadResumeState_Comment; 1447 reader_set_strvalue(reader, StringValue_Value, NULL); 1448 } 1449 1450 /* will exit when there's no more data, it won't attempt to 1451 read more from stream */ 1452 while (*ptr) 1453 { 1454 if (ptr[0] == '-') 1455 { 1456 if (ptr[1] == '-') 1457 { 1458 if (ptr[2] == '>') 1459 { 1460 strval value; 1461 1462 reader_init_strvalue(start, reader_get_cur(reader)-start, &value); 1463 TRACE("%s\n", debug_strval(reader, &value)); 1464 1465 /* skip rest of markup '->' */ 1466 reader_skipn(reader, 3); 1467 1468 reader_set_strvalue(reader, StringValue_Value, &value); 1469 reader->resume[XmlReadResume_Body] = 0; 1470 reader->resumestate = XmlReadResumeState_Initial; 1471 return S_OK; 1472 } 1473 else 1474 return WC_E_COMMENT; 1475 } 1476 } 1477 1478 reader_skipn(reader, 1); 1479 ptr++; 1480 } 1481 1482 return S_OK; 1483 } 1484 1485 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */ 1486 static inline BOOL is_char(WCHAR ch) 1487 { 1488 return (ch == '\t') || (ch == '\r') || (ch == '\n') || 1489 (ch >= 0x20 && ch <= 0xd7ff) || 1490 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */ 1491 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */ 1492 (ch >= 0xe000 && ch <= 0xfffd); 1493 } 1494 1495 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */ 1496 BOOL is_pubchar(WCHAR ch) 1497 { 1498 return (ch == ' ') || 1499 (ch >= 'a' && ch <= 'z') || 1500 (ch >= 'A' && ch <= 'Z') || 1501 (ch >= '0' && ch <= '9') || 1502 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */ 1503 (ch == '=') || (ch == '?') || 1504 (ch == '@') || (ch == '!') || 1505 (ch >= '#' && ch <= '%') || /* #$% */ 1506 (ch == '_') || (ch == '\r') || (ch == '\n'); 1507 } 1508 1509 BOOL is_namestartchar(WCHAR ch) 1510 { 1511 return (ch == ':') || (ch >= 'A' && ch <= 'Z') || 1512 (ch == '_') || (ch >= 'a' && ch <= 'z') || 1513 (ch >= 0xc0 && ch <= 0xd6) || 1514 (ch >= 0xd8 && ch <= 0xf6) || 1515 (ch >= 0xf8 && ch <= 0x2ff) || 1516 (ch >= 0x370 && ch <= 0x37d) || 1517 (ch >= 0x37f && ch <= 0x1fff) || 1518 (ch >= 0x200c && ch <= 0x200d) || 1519 (ch >= 0x2070 && ch <= 0x218f) || 1520 (ch >= 0x2c00 && ch <= 0x2fef) || 1521 (ch >= 0x3001 && ch <= 0xd7ff) || 1522 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */ 1523 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */ 1524 (ch >= 0xf900 && ch <= 0xfdcf) || 1525 (ch >= 0xfdf0 && ch <= 0xfffd); 1526 } 1527 1528 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */ 1529 BOOL is_ncnamechar(WCHAR ch) 1530 { 1531 return (ch >= 'A' && ch <= 'Z') || 1532 (ch == '_') || (ch >= 'a' && ch <= 'z') || 1533 (ch == '-') || (ch == '.') || 1534 (ch >= '0' && ch <= '9') || 1535 (ch == 0xb7) || 1536 (ch >= 0xc0 && ch <= 0xd6) || 1537 (ch >= 0xd8 && ch <= 0xf6) || 1538 (ch >= 0xf8 && ch <= 0x2ff) || 1539 (ch >= 0x300 && ch <= 0x36f) || 1540 (ch >= 0x370 && ch <= 0x37d) || 1541 (ch >= 0x37f && ch <= 0x1fff) || 1542 (ch >= 0x200c && ch <= 0x200d) || 1543 (ch >= 0x203f && ch <= 0x2040) || 1544 (ch >= 0x2070 && ch <= 0x218f) || 1545 (ch >= 0x2c00 && ch <= 0x2fef) || 1546 (ch >= 0x3001 && ch <= 0xd7ff) || 1547 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */ 1548 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */ 1549 (ch >= 0xf900 && ch <= 0xfdcf) || 1550 (ch >= 0xfdf0 && ch <= 0xfffd); 1551 } 1552 1553 BOOL is_namechar(WCHAR ch) 1554 { 1555 return (ch == ':') || is_ncnamechar(ch); 1556 } 1557 1558 static XmlNodeType reader_get_nodetype(const xmlreader *reader) 1559 { 1560 /* When we're on attribute always return attribute type, container node type is kept. 1561 Note that container is not necessarily an element, and attribute doesn't mean it's 1562 an attribute in XML spec terms. */ 1563 return reader->attr ? XmlNodeType_Attribute : reader->nodetype; 1564 } 1565 1566 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | 1567 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | 1568 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] 1569 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] 1570 [5] Name ::= NameStartChar (NameChar)* */ 1571 static HRESULT reader_parse_name(xmlreader *reader, strval *name) 1572 { 1573 WCHAR *ptr; 1574 UINT start; 1575 1576 if (reader->resume[XmlReadResume_Name]) 1577 { 1578 start = reader->resume[XmlReadResume_Name]; 1579 ptr = reader_get_ptr(reader); 1580 } 1581 else 1582 { 1583 ptr = reader_get_ptr(reader); 1584 start = reader_get_cur(reader); 1585 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER; 1586 } 1587 1588 while (is_namechar(*ptr)) 1589 { 1590 reader_skipn(reader, 1); 1591 ptr = reader_get_ptr(reader); 1592 } 1593 1594 if (is_reader_pending(reader)) 1595 { 1596 reader->resume[XmlReadResume_Name] = start; 1597 return E_PENDING; 1598 } 1599 else 1600 reader->resume[XmlReadResume_Name] = 0; 1601 1602 reader_init_strvalue(start, reader_get_cur(reader)-start, name); 1603 TRACE("name %s:%d\n", debug_strval(reader, name), name->len); 1604 1605 return S_OK; 1606 } 1607 1608 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */ 1609 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target) 1610 { 1611 static const WCHAR xmlW[] = {'x','m','l'}; 1612 static const strval xmlval = { (WCHAR*)xmlW, 3 }; 1613 strval name; 1614 WCHAR *ptr; 1615 HRESULT hr; 1616 UINT i; 1617 1618 hr = reader_parse_name(reader, &name); 1619 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI; 1620 1621 /* now that we got name check for illegal content */ 1622 if (strval_eq(reader, &name, &xmlval)) 1623 return WC_E_LEADINGXML; 1624 1625 /* PITarget can't be a qualified name */ 1626 ptr = reader_get_strptr(reader, &name); 1627 for (i = 0; i < name.len; i++) 1628 if (ptr[i] == ':') 1629 return i ? NC_E_NAMECOLON : WC_E_PI; 1630 1631 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len); 1632 *target = name; 1633 return S_OK; 1634 } 1635 1636 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */ 1637 static HRESULT reader_parse_pi(xmlreader *reader) 1638 { 1639 strval target; 1640 WCHAR *ptr; 1641 UINT start; 1642 HRESULT hr; 1643 1644 switch (reader->resumestate) 1645 { 1646 case XmlReadResumeState_Initial: 1647 /* skip '<?' */ 1648 reader_skipn(reader, 2); 1649 reader_shrink(reader); 1650 reader->resumestate = XmlReadResumeState_PITarget; 1651 case XmlReadResumeState_PITarget: 1652 hr = reader_parse_pitarget(reader, &target); 1653 if (FAILED(hr)) return hr; 1654 reader_set_strvalue(reader, StringValue_LocalName, &target); 1655 reader_set_strvalue(reader, StringValue_QualifiedName, &target); 1656 reader_set_strvalue(reader, StringValue_Value, &strval_empty); 1657 reader->resumestate = XmlReadResumeState_PIBody; 1658 reader->resume[XmlReadResume_Body] = reader_get_cur(reader); 1659 default: 1660 ; 1661 } 1662 1663 start = reader->resume[XmlReadResume_Body]; 1664 ptr = reader_get_ptr(reader); 1665 while (*ptr) 1666 { 1667 if (ptr[0] == '?') 1668 { 1669 if (ptr[1] == '>') 1670 { 1671 UINT cur = reader_get_cur(reader); 1672 strval value; 1673 1674 /* strip all leading whitespace chars */ 1675 while (start < cur) 1676 { 1677 ptr = reader_get_ptr2(reader, start); 1678 if (!is_wchar_space(*ptr)) break; 1679 start++; 1680 } 1681 1682 reader_init_strvalue(start, cur-start, &value); 1683 1684 /* skip '?>' */ 1685 reader_skipn(reader, 2); 1686 TRACE("%s\n", debug_strval(reader, &value)); 1687 reader->nodetype = XmlNodeType_ProcessingInstruction; 1688 reader->resumestate = XmlReadResumeState_Initial; 1689 reader->resume[XmlReadResume_Body] = 0; 1690 reader_set_strvalue(reader, StringValue_Value, &value); 1691 return S_OK; 1692 } 1693 } 1694 1695 reader_skipn(reader, 1); 1696 ptr = reader_get_ptr(reader); 1697 } 1698 1699 return S_OK; 1700 } 1701 1702 /* This one is used to parse significant whitespace nodes, like in Misc production */ 1703 static HRESULT reader_parse_whitespace(xmlreader *reader) 1704 { 1705 switch (reader->resumestate) 1706 { 1707 case XmlReadResumeState_Initial: 1708 reader_shrink(reader); 1709 reader->resumestate = XmlReadResumeState_Whitespace; 1710 reader->resume[XmlReadResume_Body] = reader_get_cur(reader); 1711 reader->nodetype = XmlNodeType_Whitespace; 1712 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty); 1713 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty); 1714 reader_set_strvalue(reader, StringValue_Value, &strval_empty); 1715 /* fallthrough */ 1716 case XmlReadResumeState_Whitespace: 1717 { 1718 strval value; 1719 UINT start; 1720 1721 reader_skipspaces(reader); 1722 if (is_reader_pending(reader)) return S_OK; 1723 1724 start = reader->resume[XmlReadResume_Body]; 1725 reader_init_strvalue(start, reader_get_cur(reader)-start, &value); 1726 reader_set_strvalue(reader, StringValue_Value, &value); 1727 TRACE("%s\n", debug_strval(reader, &value)); 1728 reader->resumestate = XmlReadResumeState_Initial; 1729 } 1730 default: 1731 ; 1732 } 1733 1734 return S_OK; 1735 } 1736 1737 /* [27] Misc ::= Comment | PI | S */ 1738 static HRESULT reader_parse_misc(xmlreader *reader) 1739 { 1740 HRESULT hr = S_FALSE; 1741 1742 if (reader->resumestate != XmlReadResumeState_Initial) 1743 { 1744 hr = reader_more(reader); 1745 if (FAILED(hr)) return hr; 1746 1747 /* finish current node */ 1748 switch (reader->resumestate) 1749 { 1750 case XmlReadResumeState_PITarget: 1751 case XmlReadResumeState_PIBody: 1752 return reader_parse_pi(reader); 1753 case XmlReadResumeState_Comment: 1754 return reader_parse_comment(reader); 1755 case XmlReadResumeState_Whitespace: 1756 return reader_parse_whitespace(reader); 1757 default: 1758 ERR("unknown resume state %d\n", reader->resumestate); 1759 } 1760 } 1761 1762 while (1) 1763 { 1764 const WCHAR *cur = reader_get_ptr(reader); 1765 1766 if (is_wchar_space(*cur)) 1767 hr = reader_parse_whitespace(reader); 1768 else if (!reader_cmp(reader, commentW)) 1769 hr = reader_parse_comment(reader); 1770 else if (!reader_cmp(reader, piW)) 1771 hr = reader_parse_pi(reader); 1772 else 1773 break; 1774 1775 if (hr != S_FALSE) return hr; 1776 } 1777 1778 return hr; 1779 } 1780 1781 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */ 1782 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal) 1783 { 1784 WCHAR *cur = reader_get_ptr(reader), quote; 1785 UINT start; 1786 1787 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE; 1788 1789 quote = *cur; 1790 reader_skipn(reader, 1); 1791 1792 cur = reader_get_ptr(reader); 1793 start = reader_get_cur(reader); 1794 while (is_char(*cur) && *cur != quote) 1795 { 1796 reader_skipn(reader, 1); 1797 cur = reader_get_ptr(reader); 1798 } 1799 reader_init_strvalue(start, reader_get_cur(reader)-start, literal); 1800 if (*cur == quote) reader_skipn(reader, 1); 1801 1802 TRACE("%s\n", debug_strval(reader, literal)); 1803 return S_OK; 1804 } 1805 1806 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 1807 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */ 1808 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal) 1809 { 1810 WCHAR *cur = reader_get_ptr(reader), quote; 1811 UINT start; 1812 1813 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE; 1814 1815 quote = *cur; 1816 reader_skipn(reader, 1); 1817 1818 start = reader_get_cur(reader); 1819 cur = reader_get_ptr(reader); 1820 while (is_pubchar(*cur) && *cur != quote) 1821 { 1822 reader_skipn(reader, 1); 1823 cur = reader_get_ptr(reader); 1824 } 1825 reader_init_strvalue(start, reader_get_cur(reader)-start, literal); 1826 if (*cur == quote) reader_skipn(reader, 1); 1827 1828 TRACE("%s\n", debug_strval(reader, literal)); 1829 return S_OK; 1830 } 1831 1832 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */ 1833 static HRESULT reader_parse_externalid(xmlreader *reader) 1834 { 1835 static WCHAR systemW[] = {'S','Y','S','T','E','M',0}; 1836 static WCHAR publicW[] = {'P','U','B','L','I','C',0}; 1837 struct reader_position position = reader->position; 1838 strval name, sys; 1839 HRESULT hr; 1840 int cnt; 1841 1842 if (!reader_cmp(reader, publicW)) { 1843 strval pub; 1844 1845 /* public id */ 1846 reader_skipn(reader, 6); 1847 cnt = reader_skipspaces(reader); 1848 if (!cnt) return WC_E_WHITESPACE; 1849 1850 hr = reader_parse_pub_literal(reader, &pub); 1851 if (FAILED(hr)) return hr; 1852 1853 reader_init_cstrvalue(publicW, lstrlenW(publicW), &name); 1854 hr = reader_add_attr(reader, NULL, &name, NULL, &pub, &position, 0); 1855 if (FAILED(hr)) return hr; 1856 1857 cnt = reader_skipspaces(reader); 1858 if (!cnt) return S_OK; 1859 1860 /* optional system id */ 1861 hr = reader_parse_sys_literal(reader, &sys); 1862 if (FAILED(hr)) return S_OK; 1863 1864 reader_init_cstrvalue(systemW, lstrlenW(systemW), &name); 1865 hr = reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0); 1866 if (FAILED(hr)) return hr; 1867 1868 return S_OK; 1869 } else if (!reader_cmp(reader, systemW)) { 1870 /* system id */ 1871 reader_skipn(reader, 6); 1872 cnt = reader_skipspaces(reader); 1873 if (!cnt) return WC_E_WHITESPACE; 1874 1875 hr = reader_parse_sys_literal(reader, &sys); 1876 if (FAILED(hr)) return hr; 1877 1878 reader_init_cstrvalue(systemW, lstrlenW(systemW), &name); 1879 return reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0); 1880 } 1881 1882 return S_FALSE; 1883 } 1884 1885 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */ 1886 static HRESULT reader_parse_dtd(xmlreader *reader) 1887 { 1888 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0}; 1889 strval name; 1890 WCHAR *cur; 1891 HRESULT hr; 1892 1893 /* check if we have "<!DOCTYPE" */ 1894 if (reader_cmp(reader, doctypeW)) return S_FALSE; 1895 reader_shrink(reader); 1896 1897 /* DTD processing is not allowed by default */ 1898 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED; 1899 1900 reader_skipn(reader, 9); 1901 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE; 1902 1903 /* name */ 1904 hr = reader_parse_name(reader, &name); 1905 if (FAILED(hr)) return WC_E_DECLDOCTYPE; 1906 1907 reader_skipspaces(reader); 1908 1909 hr = reader_parse_externalid(reader); 1910 if (FAILED(hr)) return hr; 1911 1912 reader_skipspaces(reader); 1913 1914 cur = reader_get_ptr(reader); 1915 if (*cur != '>') 1916 { 1917 FIXME("internal subset parsing not implemented\n"); 1918 return E_NOTIMPL; 1919 } 1920 1921 /* skip '>' */ 1922 reader_skipn(reader, 1); 1923 1924 reader->nodetype = XmlNodeType_DocumentType; 1925 reader_set_strvalue(reader, StringValue_LocalName, &name); 1926 reader_set_strvalue(reader, StringValue_QualifiedName, &name); 1927 1928 return S_OK; 1929 } 1930 1931 /* [11 NS] LocalPart ::= NCName */ 1932 static HRESULT reader_parse_local(xmlreader *reader, strval *local, BOOL check_for_separator) 1933 { 1934 WCHAR *ptr; 1935 UINT start; 1936 1937 if (reader->resume[XmlReadResume_Local]) 1938 { 1939 start = reader->resume[XmlReadResume_Local]; 1940 ptr = reader_get_ptr(reader); 1941 } 1942 else 1943 { 1944 ptr = reader_get_ptr(reader); 1945 start = reader_get_cur(reader); 1946 } 1947 1948 while (is_ncnamechar(*ptr)) 1949 { 1950 reader_skipn(reader, 1); 1951 ptr = reader_get_ptr(reader); 1952 } 1953 1954 if (check_for_separator && *ptr == ':') 1955 return NC_E_QNAMECOLON; 1956 1957 if (is_reader_pending(reader)) 1958 { 1959 reader->resume[XmlReadResume_Local] = start; 1960 return E_PENDING; 1961 } 1962 else 1963 reader->resume[XmlReadResume_Local] = 0; 1964 1965 reader_init_strvalue(start, reader_get_cur(reader)-start, local); 1966 1967 return S_OK; 1968 } 1969 1970 /* [7 NS] QName ::= PrefixedName | UnprefixedName 1971 [8 NS] PrefixedName ::= Prefix ':' LocalPart 1972 [9 NS] UnprefixedName ::= LocalPart 1973 [10 NS] Prefix ::= NCName */ 1974 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname) 1975 { 1976 WCHAR *ptr; 1977 UINT start; 1978 HRESULT hr; 1979 1980 if (reader->resume[XmlReadResume_Name]) 1981 { 1982 start = reader->resume[XmlReadResume_Name]; 1983 ptr = reader_get_ptr(reader); 1984 } 1985 else 1986 { 1987 ptr = reader_get_ptr(reader); 1988 start = reader_get_cur(reader); 1989 reader->resume[XmlReadResume_Name] = start; 1990 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER; 1991 } 1992 1993 if (reader->resume[XmlReadResume_Local]) 1994 { 1995 hr = reader_parse_local(reader, local, FALSE); 1996 if (FAILED(hr)) return hr; 1997 1998 reader_init_strvalue(reader->resume[XmlReadResume_Name], 1999 local->start - reader->resume[XmlReadResume_Name] - 1, 2000 prefix); 2001 } 2002 else 2003 { 2004 /* skip prefix part */ 2005 while (is_ncnamechar(*ptr)) 2006 { 2007 reader_skipn(reader, 1); 2008 ptr = reader_get_ptr(reader); 2009 } 2010 2011 if (is_reader_pending(reader)) return E_PENDING; 2012 2013 /* got a qualified name */ 2014 if (*ptr == ':') 2015 { 2016 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix); 2017 2018 /* skip ':' */ 2019 reader_skipn(reader, 1); 2020 hr = reader_parse_local(reader, local, TRUE); 2021 if (FAILED(hr)) return hr; 2022 } 2023 else 2024 { 2025 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local); 2026 reader_init_strvalue(0, 0, prefix); 2027 } 2028 } 2029 2030 if (prefix->len) 2031 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local)); 2032 else 2033 TRACE("ncname %s\n", debug_strval(reader, local)); 2034 2035 reader_init_strvalue(prefix->len ? prefix->start : local->start, 2036 /* count ':' too */ 2037 (prefix->len ? prefix->len + 1 : 0) + local->len, 2038 qname); 2039 2040 reader->resume[XmlReadResume_Name] = 0; 2041 reader->resume[XmlReadResume_Local] = 0; 2042 2043 return S_OK; 2044 } 2045 2046 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name) 2047 { 2048 static const WCHAR entltW[] = {'l','t'}; 2049 static const WCHAR entgtW[] = {'g','t'}; 2050 static const WCHAR entampW[] = {'a','m','p'}; 2051 static const WCHAR entaposW[] = {'a','p','o','s'}; 2052 static const WCHAR entquotW[] = {'q','u','o','t'}; 2053 static const strval lt = { (WCHAR*)entltW, 2 }; 2054 static const strval gt = { (WCHAR*)entgtW, 2 }; 2055 static const strval amp = { (WCHAR*)entampW, 3 }; 2056 static const strval apos = { (WCHAR*)entaposW, 4 }; 2057 static const strval quot = { (WCHAR*)entquotW, 4 }; 2058 WCHAR *str = reader_get_strptr(reader, name); 2059 2060 switch (*str) 2061 { 2062 case 'l': 2063 if (strval_eq(reader, name, <)) return '<'; 2064 break; 2065 case 'g': 2066 if (strval_eq(reader, name, >)) return '>'; 2067 break; 2068 case 'a': 2069 if (strval_eq(reader, name, &)) 2070 return '&'; 2071 else if (strval_eq(reader, name, &apos)) 2072 return '\''; 2073 break; 2074 case 'q': 2075 if (strval_eq(reader, name, ")) return '\"'; 2076 break; 2077 default: 2078 ; 2079 } 2080 2081 return 0; 2082 } 2083 2084 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 2085 [67] Reference ::= EntityRef | CharRef 2086 [68] EntityRef ::= '&' Name ';' */ 2087 static HRESULT reader_parse_reference(xmlreader *reader) 2088 { 2089 encoded_buffer *buffer = &reader->input->buffer->utf16; 2090 WCHAR *start = reader_get_ptr(reader), *ptr; 2091 UINT cur = reader_get_cur(reader); 2092 WCHAR ch = 0; 2093 int len; 2094 2095 /* skip '&' */ 2096 reader_skipn(reader, 1); 2097 ptr = reader_get_ptr(reader); 2098 2099 if (*ptr == '#') 2100 { 2101 reader_skipn(reader, 1); 2102 ptr = reader_get_ptr(reader); 2103 2104 /* hex char or decimal */ 2105 if (*ptr == 'x') 2106 { 2107 reader_skipn(reader, 1); 2108 ptr = reader_get_ptr(reader); 2109 2110 while (*ptr != ';') 2111 { 2112 if ((*ptr >= '0' && *ptr <= '9')) 2113 ch = ch*16 + *ptr - '0'; 2114 else if ((*ptr >= 'a' && *ptr <= 'f')) 2115 ch = ch*16 + *ptr - 'a' + 10; 2116 else if ((*ptr >= 'A' && *ptr <= 'F')) 2117 ch = ch*16 + *ptr - 'A' + 10; 2118 else 2119 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT; 2120 reader_skipn(reader, 1); 2121 ptr = reader_get_ptr(reader); 2122 } 2123 } 2124 else 2125 { 2126 while (*ptr != ';') 2127 { 2128 if ((*ptr >= '0' && *ptr <= '9')) 2129 { 2130 ch = ch*10 + *ptr - '0'; 2131 reader_skipn(reader, 1); 2132 ptr = reader_get_ptr(reader); 2133 } 2134 else 2135 return ch ? WC_E_SEMICOLON : WC_E_DIGIT; 2136 } 2137 } 2138 2139 if (!is_char(ch)) return WC_E_XMLCHARACTER; 2140 2141 /* normalize */ 2142 if (is_wchar_space(ch)) ch = ' '; 2143 2144 ptr = reader_get_ptr(reader); 2145 start = reader_get_ptr2(reader, cur); 2146 len = buffer->written - ((char *)ptr - buffer->data); 2147 memmove(start + 1, ptr + 1, len); 2148 2149 buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR); 2150 buffer->cur = cur + 1; 2151 2152 *start = ch; 2153 } 2154 else 2155 { 2156 strval name; 2157 HRESULT hr; 2158 2159 hr = reader_parse_name(reader, &name); 2160 if (FAILED(hr)) return hr; 2161 2162 ptr = reader_get_ptr(reader); 2163 if (*ptr != ';') return WC_E_SEMICOLON; 2164 2165 /* predefined entities resolve to a single character */ 2166 ch = get_predefined_entity(reader, &name); 2167 if (ch) 2168 { 2169 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR); 2170 memmove(start+1, ptr+1, len); 2171 buffer->cur = cur + 1; 2172 buffer->written -= (ptr - start) * sizeof(WCHAR); 2173 2174 *start = ch; 2175 } 2176 else 2177 { 2178 FIXME("undeclared entity %s\n", debug_strval(reader, &name)); 2179 return WC_E_UNDECLAREDENTITY; 2180 } 2181 2182 } 2183 2184 return S_OK; 2185 } 2186 2187 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */ 2188 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value) 2189 { 2190 WCHAR *ptr, quote; 2191 UINT start; 2192 2193 ptr = reader_get_ptr(reader); 2194 2195 /* skip opening quote */ 2196 quote = *ptr; 2197 if (quote != '\"' && quote != '\'') return WC_E_QUOTE; 2198 reader_skipn(reader, 1); 2199 2200 ptr = reader_get_ptr(reader); 2201 start = reader_get_cur(reader); 2202 while (*ptr) 2203 { 2204 if (*ptr == '<') return WC_E_LESSTHAN; 2205 2206 if (*ptr == quote) 2207 { 2208 reader_init_strvalue(start, reader_get_cur(reader)-start, value); 2209 /* skip closing quote */ 2210 reader_skipn(reader, 1); 2211 return S_OK; 2212 } 2213 2214 if (*ptr == '&') 2215 { 2216 HRESULT hr = reader_parse_reference(reader); 2217 if (FAILED(hr)) return hr; 2218 } 2219 else 2220 { 2221 /* replace all whitespace chars with ' ' */ 2222 if (is_wchar_space(*ptr)) *ptr = ' '; 2223 reader_skipn(reader, 1); 2224 } 2225 ptr = reader_get_ptr(reader); 2226 } 2227 2228 return WC_E_QUOTE; 2229 } 2230 2231 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName 2232 [2 NS] PrefixedAttName ::= 'xmlns:' NCName 2233 [3 NS] DefaultAttName ::= 'xmlns' 2234 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */ 2235 static HRESULT reader_parse_attribute(xmlreader *reader) 2236 { 2237 struct reader_position position = reader->position; 2238 strval prefix, local, qname, value; 2239 enum attribute_flags flags = 0; 2240 HRESULT hr; 2241 2242 hr = reader_parse_qname(reader, &prefix, &local, &qname); 2243 if (FAILED(hr)) return hr; 2244 2245 if (strval_eq(reader, &prefix, &strval_xmlns)) 2246 flags |= ATTRIBUTE_NS_DEFINITION; 2247 2248 if (strval_eq(reader, &qname, &strval_xmlns)) 2249 flags |= ATTRIBUTE_DEFAULT_NS_DEFINITION; 2250 2251 hr = reader_parse_eq(reader); 2252 if (FAILED(hr)) return hr; 2253 2254 hr = reader_parse_attvalue(reader, &value); 2255 if (FAILED(hr)) return hr; 2256 2257 if (flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION)) 2258 reader_push_ns(reader, &local, &value, !!(flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)); 2259 2260 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value)); 2261 return reader_add_attr(reader, &prefix, &local, &qname, &value, &position, flags); 2262 } 2263 2264 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>' 2265 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */ 2266 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname) 2267 { 2268 struct reader_position position = reader->position; 2269 HRESULT hr; 2270 2271 hr = reader_parse_qname(reader, prefix, local, qname); 2272 if (FAILED(hr)) return hr; 2273 2274 for (;;) 2275 { 2276 static const WCHAR endW[] = {'/','>',0}; 2277 2278 reader_skipspaces(reader); 2279 2280 /* empty element */ 2281 if ((reader->is_empty_element = !reader_cmp(reader, endW))) 2282 { 2283 struct element *element = &reader->empty_element; 2284 2285 /* skip '/>' */ 2286 reader_skipn(reader, 2); 2287 2288 reader_free_strvalued(reader, &element->qname); 2289 reader_free_strvalued(reader, &element->localname); 2290 2291 element->prefix = *prefix; 2292 reader_strvaldup(reader, qname, &element->qname); 2293 reader_strvaldup(reader, local, &element->localname); 2294 element->position = position; 2295 reader_mark_ns_nodes(reader, element); 2296 return S_OK; 2297 } 2298 2299 /* got a start tag */ 2300 if (!reader_cmp(reader, gtW)) 2301 { 2302 /* skip '>' */ 2303 reader_skipn(reader, 1); 2304 return reader_push_element(reader, prefix, local, qname, &position); 2305 } 2306 2307 hr = reader_parse_attribute(reader); 2308 if (FAILED(hr)) return hr; 2309 } 2310 2311 return S_OK; 2312 } 2313 2314 /* [39] element ::= EmptyElemTag | STag content ETag */ 2315 static HRESULT reader_parse_element(xmlreader *reader) 2316 { 2317 HRESULT hr; 2318 2319 switch (reader->resumestate) 2320 { 2321 case XmlReadResumeState_Initial: 2322 /* check if we are really on element */ 2323 if (reader_cmp(reader, ltW)) return S_FALSE; 2324 2325 /* skip '<' */ 2326 reader_skipn(reader, 1); 2327 2328 reader_shrink(reader); 2329 reader->resumestate = XmlReadResumeState_STag; 2330 case XmlReadResumeState_STag: 2331 { 2332 strval qname, prefix, local; 2333 2334 /* this handles empty elements too */ 2335 hr = reader_parse_stag(reader, &prefix, &local, &qname); 2336 if (FAILED(hr)) return hr; 2337 2338 /* FIXME: need to check for defined namespace to reject invalid prefix */ 2339 2340 /* if we got empty element and stack is empty go straight to Misc */ 2341 if (reader->is_empty_element && list_empty(&reader->elements)) 2342 reader->instate = XmlReadInState_MiscEnd; 2343 else 2344 reader->instate = XmlReadInState_Content; 2345 2346 reader->nodetype = XmlNodeType_Element; 2347 reader->resumestate = XmlReadResumeState_Initial; 2348 reader_set_strvalue(reader, StringValue_Prefix, &prefix); 2349 reader_set_strvalue(reader, StringValue_QualifiedName, &qname); 2350 reader_set_strvalue(reader, StringValue_Value, &strval_empty); 2351 break; 2352 } 2353 default: 2354 hr = E_FAIL; 2355 } 2356 2357 return hr; 2358 } 2359 2360 /* [13 NS] ETag ::= '</' QName S? '>' */ 2361 static HRESULT reader_parse_endtag(xmlreader *reader) 2362 { 2363 struct reader_position position; 2364 strval prefix, local, qname; 2365 struct element *element; 2366 HRESULT hr; 2367 2368 /* skip '</' */ 2369 reader_skipn(reader, 2); 2370 2371 position = reader->position; 2372 hr = reader_parse_qname(reader, &prefix, &local, &qname); 2373 if (FAILED(hr)) return hr; 2374 2375 reader_skipspaces(reader); 2376 2377 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN; 2378 2379 /* skip '>' */ 2380 reader_skipn(reader, 1); 2381 2382 /* Element stack should never be empty at this point, cause we shouldn't get to 2383 content parsing if it's empty. */ 2384 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry); 2385 if (!strval_eq(reader, &element->qname, &qname)) return WC_E_ELEMENTMATCH; 2386 2387 /* update position stored for start tag, we won't be using it */ 2388 element->position = position; 2389 2390 reader->nodetype = XmlNodeType_EndElement; 2391 reader->is_empty_element = FALSE; 2392 reader_set_strvalue(reader, StringValue_Prefix, &prefix); 2393 2394 return S_OK; 2395 } 2396 2397 /* [18] CDSect ::= CDStart CData CDEnd 2398 [19] CDStart ::= '<![CDATA[' 2399 [20] CData ::= (Char* - (Char* ']]>' Char*)) 2400 [21] CDEnd ::= ']]>' */ 2401 static HRESULT reader_parse_cdata(xmlreader *reader) 2402 { 2403 WCHAR *ptr; 2404 UINT start; 2405 2406 if (reader->resumestate == XmlReadResumeState_CDATA) 2407 { 2408 start = reader->resume[XmlReadResume_Body]; 2409 ptr = reader_get_ptr(reader); 2410 } 2411 else 2412 { 2413 /* skip markup '<![CDATA[' */ 2414 reader_skipn(reader, 9); 2415 reader_shrink(reader); 2416 ptr = reader_get_ptr(reader); 2417 start = reader_get_cur(reader); 2418 reader->nodetype = XmlNodeType_CDATA; 2419 reader->resume[XmlReadResume_Body] = start; 2420 reader->resumestate = XmlReadResumeState_CDATA; 2421 reader_set_strvalue(reader, StringValue_Value, NULL); 2422 } 2423 2424 while (*ptr) 2425 { 2426 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>') 2427 { 2428 strval value; 2429 2430 reader_init_strvalue(start, reader_get_cur(reader)-start, &value); 2431 2432 /* skip ']]>' */ 2433 reader_skipn(reader, 3); 2434 TRACE("%s\n", debug_strval(reader, &value)); 2435 2436 reader_set_strvalue(reader, StringValue_Value, &value); 2437 reader->resume[XmlReadResume_Body] = 0; 2438 reader->resumestate = XmlReadResumeState_Initial; 2439 return S_OK; 2440 } 2441 else 2442 { 2443 reader_skipn(reader, 1); 2444 ptr++; 2445 } 2446 } 2447 2448 return S_OK; 2449 } 2450 2451 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */ 2452 static HRESULT reader_parse_chardata(xmlreader *reader) 2453 { 2454 struct reader_position position; 2455 WCHAR *ptr; 2456 UINT start; 2457 2458 if (reader->resumestate == XmlReadResumeState_CharData) 2459 { 2460 start = reader->resume[XmlReadResume_Body]; 2461 ptr = reader_get_ptr(reader); 2462 } 2463 else 2464 { 2465 reader_shrink(reader); 2466 ptr = reader_get_ptr(reader); 2467 start = reader_get_cur(reader); 2468 /* There's no text */ 2469 if (!*ptr || *ptr == '<') return S_OK; 2470 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text; 2471 reader->resume[XmlReadResume_Body] = start; 2472 reader->resumestate = XmlReadResumeState_CharData; 2473 reader_set_strvalue(reader, StringValue_Value, NULL); 2474 } 2475 2476 position = reader->position; 2477 while (*ptr) 2478 { 2479 static const WCHAR ampW[] = {'&',0}; 2480 2481 /* CDATA closing sequence ']]>' is not allowed */ 2482 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>') 2483 return WC_E_CDSECTEND; 2484 2485 /* Found next markup part */ 2486 if (ptr[0] == '<') 2487 { 2488 strval value; 2489 2490 reader->empty_element.position = position; 2491 reader_init_strvalue(start, reader_get_cur(reader)-start, &value); 2492 reader_set_strvalue(reader, StringValue_Value, &value); 2493 reader->resume[XmlReadResume_Body] = 0; 2494 reader->resumestate = XmlReadResumeState_Initial; 2495 return S_OK; 2496 } 2497 2498 /* this covers a case when text has leading whitespace chars */ 2499 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text; 2500 2501 if (!reader_cmp(reader, ampW)) 2502 reader_parse_reference(reader); 2503 else 2504 reader_skipn(reader, 1); 2505 2506 ptr = reader_get_ptr(reader); 2507 } 2508 2509 return S_OK; 2510 } 2511 2512 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */ 2513 static HRESULT reader_parse_content(xmlreader *reader) 2514 { 2515 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0}; 2516 static const WCHAR etagW[] = {'<','/',0}; 2517 2518 if (reader->resumestate != XmlReadResumeState_Initial) 2519 { 2520 switch (reader->resumestate) 2521 { 2522 case XmlReadResumeState_CDATA: 2523 return reader_parse_cdata(reader); 2524 case XmlReadResumeState_Comment: 2525 return reader_parse_comment(reader); 2526 case XmlReadResumeState_PIBody: 2527 case XmlReadResumeState_PITarget: 2528 return reader_parse_pi(reader); 2529 case XmlReadResumeState_CharData: 2530 return reader_parse_chardata(reader); 2531 default: 2532 ERR("unknown resume state %d\n", reader->resumestate); 2533 } 2534 } 2535 2536 reader_shrink(reader); 2537 2538 /* handle end tag here, it indicates end of content as well */ 2539 if (!reader_cmp(reader, etagW)) 2540 return reader_parse_endtag(reader); 2541 2542 if (!reader_cmp(reader, commentW)) 2543 return reader_parse_comment(reader); 2544 2545 if (!reader_cmp(reader, piW)) 2546 return reader_parse_pi(reader); 2547 2548 if (!reader_cmp(reader, cdstartW)) 2549 return reader_parse_cdata(reader); 2550 2551 if (!reader_cmp(reader, ltW)) 2552 return reader_parse_element(reader); 2553 2554 /* what's left must be CharData */ 2555 return reader_parse_chardata(reader); 2556 } 2557 2558 static HRESULT reader_parse_nextnode(xmlreader *reader) 2559 { 2560 XmlNodeType nodetype = reader_get_nodetype(reader); 2561 HRESULT hr; 2562 2563 if (!is_reader_pending(reader)) 2564 { 2565 reader->chunk_read_off = 0; 2566 reader_clear_attrs(reader); 2567 } 2568 2569 /* When moving from EndElement or empty element, pop its own namespace definitions */ 2570 switch (nodetype) 2571 { 2572 case XmlNodeType_Attribute: 2573 reader_dec_depth(reader); 2574 /* fallthrough */ 2575 case XmlNodeType_Element: 2576 if (reader->is_empty_element) 2577 reader_pop_ns_nodes(reader, &reader->empty_element); 2578 else if (FAILED(hr = reader_inc_depth(reader))) 2579 return hr; 2580 break; 2581 case XmlNodeType_EndElement: 2582 reader_pop_element(reader); 2583 reader_dec_depth(reader); 2584 break; 2585 default: 2586 ; 2587 } 2588 2589 for (;;) 2590 { 2591 switch (reader->instate) 2592 { 2593 /* if it's a first call for a new input we need to detect stream encoding */ 2594 case XmlReadInState_Initial: 2595 { 2596 xml_encoding enc; 2597 2598 hr = readerinput_growraw(reader->input); 2599 if (FAILED(hr)) return hr; 2600 2601 reader->position.line_number = 1; 2602 reader->position.line_position = 1; 2603 2604 /* try to detect encoding by BOM or data and set input code page */ 2605 hr = readerinput_detectencoding(reader->input, &enc); 2606 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" : 2607 debugstr_w(xml_encoding_map[enc].name), hr); 2608 if (FAILED(hr)) return hr; 2609 2610 /* always switch first time cause we have to put something in */ 2611 readerinput_switchencoding(reader->input, enc); 2612 2613 /* parse xml declaration */ 2614 hr = reader_parse_xmldecl(reader); 2615 if (FAILED(hr)) return hr; 2616 2617 readerinput_shrinkraw(reader->input, -1); 2618 reader->instate = XmlReadInState_Misc_DTD; 2619 if (hr == S_OK) return hr; 2620 } 2621 break; 2622 case XmlReadInState_Misc_DTD: 2623 hr = reader_parse_misc(reader); 2624 if (FAILED(hr)) return hr; 2625 2626 if (hr == S_FALSE) 2627 reader->instate = XmlReadInState_DTD; 2628 else 2629 return hr; 2630 break; 2631 case XmlReadInState_DTD: 2632 hr = reader_parse_dtd(reader); 2633 if (FAILED(hr)) return hr; 2634 2635 if (hr == S_OK) 2636 { 2637 reader->instate = XmlReadInState_DTD_Misc; 2638 return hr; 2639 } 2640 else 2641 reader->instate = XmlReadInState_Element; 2642 break; 2643 case XmlReadInState_DTD_Misc: 2644 hr = reader_parse_misc(reader); 2645 if (FAILED(hr)) return hr; 2646 2647 if (hr == S_FALSE) 2648 reader->instate = XmlReadInState_Element; 2649 else 2650 return hr; 2651 break; 2652 case XmlReadInState_Element: 2653 return reader_parse_element(reader); 2654 case XmlReadInState_Content: 2655 return reader_parse_content(reader); 2656 case XmlReadInState_MiscEnd: 2657 hr = reader_parse_misc(reader); 2658 if (hr != S_FALSE) return hr; 2659 2660 if (*reader_get_ptr(reader)) 2661 { 2662 WARN("found garbage in the end of XML\n"); 2663 return WC_E_SYNTAX; 2664 } 2665 2666 reader->instate = XmlReadInState_Eof; 2667 reader->state = XmlReadState_EndOfFile; 2668 reader->nodetype = XmlNodeType_None; 2669 return hr; 2670 case XmlReadInState_Eof: 2671 return S_FALSE; 2672 default: 2673 FIXME("internal state %d not handled\n", reader->instate); 2674 return E_NOTIMPL; 2675 } 2676 } 2677 2678 return E_NOTIMPL; 2679 } 2680 2681 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject) 2682 { 2683 xmlreader *This = impl_from_IXmlReader(iface); 2684 2685 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject); 2686 2687 if (IsEqualGUID(riid, &IID_IUnknown) || 2688 IsEqualGUID(riid, &IID_IXmlReader)) 2689 { 2690 *ppvObject = iface; 2691 } 2692 else 2693 { 2694 FIXME("interface %s not implemented\n", debugstr_guid(riid)); 2695 *ppvObject = NULL; 2696 return E_NOINTERFACE; 2697 } 2698 2699 IXmlReader_AddRef(iface); 2700 2701 return S_OK; 2702 } 2703 2704 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface) 2705 { 2706 xmlreader *This = impl_from_IXmlReader(iface); 2707 ULONG ref = InterlockedIncrement(&This->ref); 2708 TRACE("(%p)->(%d)\n", This, ref); 2709 return ref; 2710 } 2711 2712 static void reader_clear_ns(xmlreader *reader) 2713 { 2714 struct ns *ns, *ns2; 2715 2716 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) { 2717 list_remove(&ns->entry); 2718 reader_free_strvalued(reader, &ns->prefix); 2719 reader_free_strvalued(reader, &ns->uri); 2720 reader_free(reader, ns); 2721 } 2722 2723 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) { 2724 list_remove(&ns->entry); 2725 reader_free_strvalued(reader, &ns->uri); 2726 reader_free(reader, ns); 2727 } 2728 } 2729 2730 static void reader_reset_parser(xmlreader *reader) 2731 { 2732 reader->position.line_number = 0; 2733 reader->position.line_position = 0; 2734 2735 reader_clear_elements(reader); 2736 reader_clear_attrs(reader); 2737 reader_clear_ns(reader); 2738 reader_free_strvalues(reader); 2739 2740 reader->depth = 0; 2741 reader->nodetype = XmlNodeType_None; 2742 reader->resumestate = XmlReadResumeState_Initial; 2743 memset(reader->resume, 0, sizeof(reader->resume)); 2744 reader->is_empty_element = FALSE; 2745 } 2746 2747 static ULONG WINAPI xmlreader_Release(IXmlReader *iface) 2748 { 2749 xmlreader *This = impl_from_IXmlReader(iface); 2750 LONG ref = InterlockedDecrement(&This->ref); 2751 2752 TRACE("(%p)->(%d)\n", This, ref); 2753 2754 if (ref == 0) 2755 { 2756 IMalloc *imalloc = This->imalloc; 2757 reader_reset_parser(This); 2758 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface); 2759 if (This->resolver) IXmlResolver_Release(This->resolver); 2760 if (This->mlang) IUnknown_Release(This->mlang); 2761 reader_free(This, This); 2762 if (imalloc) IMalloc_Release(imalloc); 2763 } 2764 2765 return ref; 2766 } 2767 2768 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input) 2769 { 2770 xmlreader *This = impl_from_IXmlReader(iface); 2771 IXmlReaderInput *readerinput; 2772 HRESULT hr; 2773 2774 TRACE("(%p)->(%p)\n", This, input); 2775 2776 if (This->input) 2777 { 2778 readerinput_release_stream(This->input); 2779 IUnknown_Release(&This->input->IXmlReaderInput_iface); 2780 This->input = NULL; 2781 } 2782 2783 reader_reset_parser(This); 2784 2785 /* just reset current input */ 2786 if (!input) 2787 { 2788 This->state = XmlReadState_Initial; 2789 return S_OK; 2790 } 2791 2792 /* now try IXmlReaderInput, ISequentialStream, IStream */ 2793 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput); 2794 if (hr == S_OK) 2795 { 2796 if (readerinput->lpVtbl == &xmlreaderinputvtbl) 2797 This->input = impl_from_IXmlReaderInput(readerinput); 2798 else 2799 { 2800 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n", 2801 readerinput, readerinput->lpVtbl); 2802 IUnknown_Release(readerinput); 2803 return E_FAIL; 2804 2805 } 2806 } 2807 2808 if (hr != S_OK || !readerinput) 2809 { 2810 /* create IXmlReaderInput basing on supplied interface */ 2811 hr = CreateXmlReaderInputWithEncodingName(input, 2812 This->imalloc, NULL, FALSE, NULL, &readerinput); 2813 if (hr != S_OK) return hr; 2814 This->input = impl_from_IXmlReaderInput(readerinput); 2815 } 2816 2817 /* set stream for supplied IXmlReaderInput */ 2818 hr = readerinput_query_for_stream(This->input); 2819 if (hr == S_OK) 2820 { 2821 This->state = XmlReadState_Initial; 2822 This->instate = XmlReadInState_Initial; 2823 } 2824 return hr; 2825 } 2826 2827 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value) 2828 { 2829 xmlreader *This = impl_from_IXmlReader(iface); 2830 2831 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value); 2832 2833 if (!value) return E_INVALIDARG; 2834 2835 switch (property) 2836 { 2837 case XmlReaderProperty_MultiLanguage: 2838 *value = (LONG_PTR)This->mlang; 2839 if (This->mlang) 2840 IUnknown_AddRef(This->mlang); 2841 break; 2842 case XmlReaderProperty_XmlResolver: 2843 *value = (LONG_PTR)This->resolver; 2844 if (This->resolver) 2845 IXmlResolver_AddRef(This->resolver); 2846 break; 2847 case XmlReaderProperty_DtdProcessing: 2848 *value = This->dtdmode; 2849 break; 2850 case XmlReaderProperty_ReadState: 2851 *value = This->state; 2852 break; 2853 case XmlReaderProperty_MaxElementDepth: 2854 *value = This->max_depth; 2855 break; 2856 default: 2857 FIXME("Unimplemented property (%u)\n", property); 2858 return E_NOTIMPL; 2859 } 2860 2861 return S_OK; 2862 } 2863 2864 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value) 2865 { 2866 xmlreader *This = impl_from_IXmlReader(iface); 2867 2868 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value); 2869 2870 switch (property) 2871 { 2872 case XmlReaderProperty_MultiLanguage: 2873 if (This->mlang) 2874 IUnknown_Release(This->mlang); 2875 This->mlang = (IUnknown*)value; 2876 if (This->mlang) 2877 IUnknown_AddRef(This->mlang); 2878 if (This->mlang) 2879 FIXME("Ignoring MultiLanguage %p\n", This->mlang); 2880 break; 2881 case XmlReaderProperty_XmlResolver: 2882 if (This->resolver) 2883 IXmlResolver_Release(This->resolver); 2884 This->resolver = (IXmlResolver*)value; 2885 if (This->resolver) 2886 IXmlResolver_AddRef(This->resolver); 2887 break; 2888 case XmlReaderProperty_DtdProcessing: 2889 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG; 2890 This->dtdmode = value; 2891 break; 2892 case XmlReaderProperty_MaxElementDepth: 2893 This->max_depth = value; 2894 break; 2895 default: 2896 FIXME("Unimplemented property (%u)\n", property); 2897 return E_NOTIMPL; 2898 } 2899 2900 return S_OK; 2901 } 2902 2903 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype) 2904 { 2905 xmlreader *This = impl_from_IXmlReader(iface); 2906 XmlNodeType oldtype = This->nodetype; 2907 XmlNodeType type; 2908 HRESULT hr; 2909 2910 TRACE("(%p)->(%p)\n", This, nodetype); 2911 2912 if (!nodetype) 2913 nodetype = &type; 2914 2915 switch (This->state) 2916 { 2917 case XmlReadState_Closed: 2918 hr = S_FALSE; 2919 break; 2920 case XmlReadState_Error: 2921 hr = This->error; 2922 break; 2923 default: 2924 hr = reader_parse_nextnode(This); 2925 if (SUCCEEDED(hr) && oldtype == XmlNodeType_None && This->nodetype != oldtype) 2926 This->state = XmlReadState_Interactive; 2927 2928 if (FAILED(hr)) 2929 { 2930 This->state = XmlReadState_Error; 2931 This->nodetype = XmlNodeType_None; 2932 This->depth = 0; 2933 This->error = hr; 2934 } 2935 } 2936 2937 TRACE("node type %s\n", debugstr_nodetype(This->nodetype)); 2938 *nodetype = This->nodetype; 2939 2940 return hr; 2941 } 2942 2943 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type) 2944 { 2945 xmlreader *This = impl_from_IXmlReader(iface); 2946 2947 TRACE("(%p)->(%p)\n", This, node_type); 2948 2949 if (!node_type) 2950 return E_INVALIDARG; 2951 2952 *node_type = reader_get_nodetype(This); 2953 return This->state == XmlReadState_Closed ? S_FALSE : S_OK; 2954 } 2955 2956 static void reader_set_current_attribute(xmlreader *reader, struct attribute *attr) 2957 { 2958 reader->attr = attr; 2959 reader->chunk_read_off = 0; 2960 reader_set_strvalue(reader, StringValue_Prefix, &attr->prefix); 2961 reader_set_strvalue(reader, StringValue_QualifiedName, &attr->qname); 2962 reader_set_strvalue(reader, StringValue_Value, &attr->value); 2963 } 2964 2965 static HRESULT reader_move_to_first_attribute(xmlreader *reader) 2966 { 2967 if (!reader->attr_count) 2968 return S_FALSE; 2969 2970 if (!reader->attr) 2971 reader_inc_depth(reader); 2972 2973 reader_set_current_attribute(reader, LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry)); 2974 2975 return S_OK; 2976 } 2977 2978 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface) 2979 { 2980 xmlreader *This = impl_from_IXmlReader(iface); 2981 2982 TRACE("(%p)\n", This); 2983 2984 return reader_move_to_first_attribute(This); 2985 } 2986 2987 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface) 2988 { 2989 xmlreader *This = impl_from_IXmlReader(iface); 2990 const struct list *next; 2991 2992 TRACE("(%p)\n", This); 2993 2994 if (!This->attr_count) return S_FALSE; 2995 2996 if (!This->attr) 2997 return reader_move_to_first_attribute(This); 2998 2999 next = list_next(&This->attrs, &This->attr->entry); 3000 if (next) 3001 reader_set_current_attribute(This, LIST_ENTRY(next, struct attribute, entry)); 3002 3003 return next ? S_OK : S_FALSE; 3004 } 3005 3006 static void reader_get_attribute_ns_uri(xmlreader *reader, struct attribute *attr, const WCHAR **uri, UINT *len) 3007 { 3008 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/', 3009 '2','0','0','0','/','x','m','l','n','s','/',0}; 3010 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/', 3011 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0}; 3012 3013 /* Check for reserved prefixes first */ 3014 if ((strval_eq(reader, &attr->prefix, &strval_empty) && strval_eq(reader, &attr->localname, &strval_xmlns)) || 3015 strval_eq(reader, &attr->prefix, &strval_xmlns)) 3016 { 3017 *uri = xmlns_uriW; 3018 *len = ARRAY_SIZE(xmlns_uriW) - 1; 3019 } 3020 else if (strval_eq(reader, &attr->prefix, &strval_xml)) 3021 { 3022 *uri = xml_uriW; 3023 *len = ARRAY_SIZE(xml_uriW) - 1; 3024 } 3025 else 3026 { 3027 *uri = NULL; 3028 *len = 0; 3029 } 3030 3031 if (!*uri) 3032 { 3033 struct ns *ns; 3034 3035 if ((ns = reader_lookup_ns(reader, &attr->prefix))) 3036 { 3037 *uri = ns->uri.str; 3038 *len = ns->uri.len; 3039 } 3040 else 3041 { 3042 *uri = emptyW; 3043 *len = 0; 3044 } 3045 } 3046 } 3047 3048 static void reader_get_attribute_local_name(xmlreader *reader, struct attribute *attr, const WCHAR **name, UINT *len) 3049 { 3050 if (attr->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION) 3051 { 3052 *name = xmlnsW; 3053 *len = 5; 3054 } 3055 else if (attr->flags & ATTRIBUTE_NS_DEFINITION) 3056 { 3057 const struct ns *ns = reader_lookup_ns(reader, &attr->localname); 3058 *name = ns->prefix.str; 3059 *len = ns->prefix.len; 3060 } 3061 else 3062 { 3063 *name = attr->localname.str; 3064 *len = attr->localname.len; 3065 } 3066 } 3067 3068 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface, 3069 const WCHAR *local_name, const WCHAR *namespace_uri) 3070 { 3071 xmlreader *This = impl_from_IXmlReader(iface); 3072 UINT target_name_len, target_uri_len; 3073 struct attribute *attr; 3074 3075 TRACE("(%p)->(%s %s)\n", This, debugstr_w(local_name), debugstr_w(namespace_uri)); 3076 3077 if (!local_name) 3078 return E_INVALIDARG; 3079 3080 if (!This->attr_count) 3081 return S_FALSE; 3082 3083 if (!namespace_uri) 3084 namespace_uri = emptyW; 3085 3086 target_name_len = lstrlenW(local_name); 3087 target_uri_len = lstrlenW(namespace_uri); 3088 3089 LIST_FOR_EACH_ENTRY(attr, &This->attrs, struct attribute, entry) 3090 { 3091 UINT name_len, uri_len; 3092 const WCHAR *name, *uri; 3093 3094 reader_get_attribute_local_name(This, attr, &name, &name_len); 3095 reader_get_attribute_ns_uri(This, attr, &uri, &uri_len); 3096 3097 if (name_len == target_name_len && uri_len == target_uri_len && 3098 !wcscmp(name, local_name) && !wcscmp(uri, namespace_uri)) 3099 { 3100 reader_set_current_attribute(This, attr); 3101 return S_OK; 3102 } 3103 } 3104 3105 return S_FALSE; 3106 } 3107 3108 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface) 3109 { 3110 xmlreader *This = impl_from_IXmlReader(iface); 3111 3112 TRACE("(%p)\n", This); 3113 3114 if (!This->attr_count) return S_FALSE; 3115 3116 if (This->attr) 3117 reader_dec_depth(This); 3118 3119 This->attr = NULL; 3120 3121 /* FIXME: support other node types with 'attributes' like DTD */ 3122 if (This->is_empty_element) { 3123 reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix); 3124 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname); 3125 } 3126 else { 3127 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry); 3128 if (element) { 3129 reader_set_strvalue(This, StringValue_Prefix, &element->prefix); 3130 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname); 3131 } 3132 } 3133 This->chunk_read_off = 0; 3134 reader_set_strvalue(This, StringValue_Value, &strval_empty); 3135 3136 return S_OK; 3137 } 3138 3139 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len) 3140 { 3141 xmlreader *This = impl_from_IXmlReader(iface); 3142 struct attribute *attribute = This->attr; 3143 struct element *element; 3144 UINT length; 3145 3146 TRACE("(%p)->(%p %p)\n", This, name, len); 3147 3148 if (!len) 3149 len = &length; 3150 3151 switch (reader_get_nodetype(This)) 3152 { 3153 case XmlNodeType_Text: 3154 case XmlNodeType_CDATA: 3155 case XmlNodeType_Comment: 3156 case XmlNodeType_Whitespace: 3157 *name = emptyW; 3158 *len = 0; 3159 break; 3160 case XmlNodeType_Element: 3161 case XmlNodeType_EndElement: 3162 element = reader_get_element(This); 3163 if (element->prefix.len) 3164 { 3165 *name = element->qname.str; 3166 *len = element->qname.len; 3167 } 3168 else 3169 { 3170 *name = element->localname.str; 3171 *len = element->localname.len; 3172 } 3173 break; 3174 case XmlNodeType_Attribute: 3175 if (attribute->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION) 3176 { 3177 *name = xmlnsW; 3178 *len = 5; 3179 } else if (attribute->prefix.len) 3180 { 3181 *name = This->strvalues[StringValue_QualifiedName].str; 3182 *len = This->strvalues[StringValue_QualifiedName].len; 3183 } 3184 else 3185 { 3186 *name = attribute->localname.str; 3187 *len = attribute->localname.len; 3188 } 3189 break; 3190 default: 3191 *name = This->strvalues[StringValue_QualifiedName].str; 3192 *len = This->strvalues[StringValue_QualifiedName].len; 3193 break; 3194 } 3195 3196 return S_OK; 3197 } 3198 3199 static struct ns *reader_lookup_nsdef(xmlreader *reader) 3200 { 3201 if (list_empty(&reader->nsdef)) 3202 return NULL; 3203 3204 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry); 3205 } 3206 3207 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len) 3208 { 3209 xmlreader *This = impl_from_IXmlReader(iface); 3210 const strval *prefix = &This->strvalues[StringValue_Prefix]; 3211 XmlNodeType nodetype; 3212 struct ns *ns; 3213 UINT length; 3214 3215 TRACE("(%p %p %p)\n", iface, uri, len); 3216 3217 if (!len) 3218 len = &length; 3219 3220 switch ((nodetype = reader_get_nodetype(This))) 3221 { 3222 case XmlNodeType_Attribute: 3223 reader_get_attribute_ns_uri(This, This->attr, uri, len); 3224 break; 3225 case XmlNodeType_Element: 3226 case XmlNodeType_EndElement: 3227 { 3228 ns = reader_lookup_ns(This, prefix); 3229 3230 /* pick top default ns if any */ 3231 if (!ns) 3232 ns = reader_lookup_nsdef(This); 3233 3234 if (ns) { 3235 *uri = ns->uri.str; 3236 *len = ns->uri.len; 3237 } 3238 else { 3239 *uri = emptyW; 3240 *len = 0; 3241 } 3242 } 3243 break; 3244 case XmlNodeType_Text: 3245 case XmlNodeType_CDATA: 3246 case XmlNodeType_ProcessingInstruction: 3247 case XmlNodeType_Comment: 3248 case XmlNodeType_Whitespace: 3249 case XmlNodeType_XmlDeclaration: 3250 *uri = emptyW; 3251 *len = 0; 3252 break; 3253 default: 3254 FIXME("Unhandled node type %d\n", nodetype); 3255 *uri = NULL; 3256 *len = 0; 3257 return E_NOTIMPL; 3258 } 3259 3260 return S_OK; 3261 } 3262 3263 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len) 3264 { 3265 xmlreader *This = impl_from_IXmlReader(iface); 3266 struct element *element; 3267 UINT length; 3268 3269 TRACE("(%p)->(%p %p)\n", This, name, len); 3270 3271 if (!len) 3272 len = &length; 3273 3274 switch (reader_get_nodetype(This)) 3275 { 3276 case XmlNodeType_Text: 3277 case XmlNodeType_CDATA: 3278 case XmlNodeType_Comment: 3279 case XmlNodeType_Whitespace: 3280 *name = emptyW; 3281 *len = 0; 3282 break; 3283 case XmlNodeType_Element: 3284 case XmlNodeType_EndElement: 3285 element = reader_get_element(This); 3286 *name = element->localname.str; 3287 *len = element->localname.len; 3288 break; 3289 case XmlNodeType_Attribute: 3290 reader_get_attribute_local_name(This, This->attr, name, len); 3291 break; 3292 default: 3293 *name = This->strvalues[StringValue_LocalName].str; 3294 *len = This->strvalues[StringValue_LocalName].len; 3295 break; 3296 } 3297 3298 return S_OK; 3299 } 3300 3301 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, const WCHAR **ret, UINT *len) 3302 { 3303 xmlreader *This = impl_from_IXmlReader(iface); 3304 XmlNodeType nodetype; 3305 UINT length; 3306 3307 TRACE("(%p)->(%p %p)\n", This, ret, len); 3308 3309 if (!len) 3310 len = &length; 3311 3312 *ret = emptyW; 3313 *len = 0; 3314 3315 switch ((nodetype = reader_get_nodetype(This))) 3316 { 3317 case XmlNodeType_Element: 3318 case XmlNodeType_EndElement: 3319 case XmlNodeType_Attribute: 3320 { 3321 const strval *prefix = &This->strvalues[StringValue_Prefix]; 3322 struct ns *ns; 3323 3324 if (strval_eq(This, prefix, &strval_xml)) 3325 { 3326 *ret = xmlW; 3327 *len = 3; 3328 } 3329 else if (strval_eq(This, prefix, &strval_xmlns)) 3330 { 3331 *ret = xmlnsW; 3332 *len = 5; 3333 } 3334 else if ((ns = reader_lookup_ns(This, prefix))) 3335 { 3336 *ret = ns->prefix.str; 3337 *len = ns->prefix.len; 3338 } 3339 3340 break; 3341 } 3342 default: 3343 ; 3344 } 3345 3346 return S_OK; 3347 } 3348 3349 static const strval *reader_get_value(xmlreader *reader, BOOL ensure_allocated) 3350 { 3351 strval *val; 3352 3353 switch (reader_get_nodetype(reader)) 3354 { 3355 case XmlNodeType_XmlDeclaration: 3356 case XmlNodeType_EndElement: 3357 case XmlNodeType_None: 3358 return &strval_empty; 3359 case XmlNodeType_Attribute: 3360 /* For namespace definition attributes return values from namespace list */ 3361 if (reader->attr->flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION)) 3362 { 3363 struct ns *ns; 3364 3365 if (!(ns = reader_lookup_ns(reader, &reader->attr->localname))) 3366 ns = reader_lookup_nsdef(reader); 3367 3368 return &ns->uri; 3369 } 3370 return &reader->attr->value; 3371 default: 3372 break; 3373 } 3374 3375 val = &reader->strvalues[StringValue_Value]; 3376 if (!val->str && ensure_allocated) 3377 { 3378 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR)); 3379 if (!ptr) return NULL; 3380 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR)); 3381 ptr[val->len] = 0; 3382 val->str = ptr; 3383 } 3384 3385 return val; 3386 } 3387 3388 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len) 3389 { 3390 xmlreader *reader = impl_from_IXmlReader(iface); 3391 const strval *val = &reader->strvalues[StringValue_Value]; 3392 UINT off; 3393 3394 TRACE("(%p)->(%p %p)\n", reader, value, len); 3395 3396 *value = NULL; 3397 3398 if ((reader->nodetype == XmlNodeType_Comment && !val->str && !val->len) || is_reader_pending(reader)) 3399 { 3400 XmlNodeType type; 3401 HRESULT hr; 3402 3403 hr = IXmlReader_Read(iface, &type); 3404 if (FAILED(hr)) return hr; 3405 3406 /* return if still pending, partially read values are not reported */ 3407 if (is_reader_pending(reader)) return E_PENDING; 3408 } 3409 3410 val = reader_get_value(reader, TRUE); 3411 if (!val) 3412 return E_OUTOFMEMORY; 3413 3414 off = abs(reader->chunk_read_off); 3415 assert(off <= val->len); 3416 *value = val->str + off; 3417 if (len) *len = val->len - off; 3418 reader->chunk_read_off = -off; 3419 return S_OK; 3420 } 3421 3422 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read) 3423 { 3424 xmlreader *reader = impl_from_IXmlReader(iface); 3425 const strval *val; 3426 UINT len = 0; 3427 3428 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read); 3429 3430 val = reader_get_value(reader, FALSE); 3431 3432 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */ 3433 if (reader->chunk_read_off >= 0) 3434 { 3435 assert(reader->chunk_read_off <= val->len); 3436 len = min(val->len - reader->chunk_read_off, chunk_size); 3437 } 3438 if (read) *read = len; 3439 3440 if (len) 3441 { 3442 memcpy(buffer, reader_get_strptr(reader, val) + reader->chunk_read_off, len*sizeof(WCHAR)); 3443 reader->chunk_read_off += len; 3444 } 3445 3446 return len || !chunk_size ? S_OK : S_FALSE; 3447 } 3448 3449 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface, 3450 LPCWSTR *baseUri, 3451 UINT *baseUri_length) 3452 { 3453 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length); 3454 return E_NOTIMPL; 3455 } 3456 3457 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface) 3458 { 3459 FIXME("(%p): stub\n", iface); 3460 return FALSE; 3461 } 3462 3463 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface) 3464 { 3465 xmlreader *This = impl_from_IXmlReader(iface); 3466 TRACE("(%p)\n", This); 3467 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense 3468 when current node is start tag of an element */ 3469 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE; 3470 } 3471 3472 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *line_number) 3473 { 3474 xmlreader *This = impl_from_IXmlReader(iface); 3475 const struct element *element; 3476 3477 TRACE("(%p %p)\n", This, line_number); 3478 3479 if (!line_number) 3480 return E_INVALIDARG; 3481 3482 switch (reader_get_nodetype(This)) 3483 { 3484 case XmlNodeType_Element: 3485 case XmlNodeType_EndElement: 3486 element = reader_get_element(This); 3487 *line_number = element->position.line_number; 3488 break; 3489 case XmlNodeType_Attribute: 3490 *line_number = This->attr->position.line_number; 3491 break; 3492 case XmlNodeType_Whitespace: 3493 case XmlNodeType_XmlDeclaration: 3494 *line_number = This->empty_element.position.line_number; 3495 break; 3496 default: 3497 *line_number = This->position.line_number; 3498 break; 3499 } 3500 3501 return This->state == XmlReadState_Closed ? S_FALSE : S_OK; 3502 } 3503 3504 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *line_position) 3505 { 3506 xmlreader *This = impl_from_IXmlReader(iface); 3507 const struct element *element; 3508 3509 TRACE("(%p %p)\n", This, line_position); 3510 3511 if (!line_position) 3512 return E_INVALIDARG; 3513 3514 switch (reader_get_nodetype(This)) 3515 { 3516 case XmlNodeType_Element: 3517 case XmlNodeType_EndElement: 3518 element = reader_get_element(This); 3519 *line_position = element->position.line_position; 3520 break; 3521 case XmlNodeType_Attribute: 3522 *line_position = This->attr->position.line_position; 3523 break; 3524 case XmlNodeType_Whitespace: 3525 case XmlNodeType_XmlDeclaration: 3526 *line_position = This->empty_element.position.line_position; 3527 break; 3528 default: 3529 *line_position = This->position.line_position; 3530 break; 3531 } 3532 3533 return This->state == XmlReadState_Closed ? S_FALSE : S_OK; 3534 } 3535 3536 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count) 3537 { 3538 xmlreader *This = impl_from_IXmlReader(iface); 3539 3540 TRACE("(%p)->(%p)\n", This, count); 3541 3542 if (!count) return E_INVALIDARG; 3543 3544 *count = This->attr_count; 3545 return S_OK; 3546 } 3547 3548 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth) 3549 { 3550 xmlreader *This = impl_from_IXmlReader(iface); 3551 TRACE("(%p)->(%p)\n", This, depth); 3552 *depth = This->depth; 3553 return S_OK; 3554 } 3555 3556 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface) 3557 { 3558 xmlreader *This = impl_from_IXmlReader(iface); 3559 TRACE("(%p)\n", iface); 3560 return This->state == XmlReadState_EndOfFile; 3561 } 3562 3563 static const struct IXmlReaderVtbl xmlreader_vtbl = 3564 { 3565 xmlreader_QueryInterface, 3566 xmlreader_AddRef, 3567 xmlreader_Release, 3568 xmlreader_SetInput, 3569 xmlreader_GetProperty, 3570 xmlreader_SetProperty, 3571 xmlreader_Read, 3572 xmlreader_GetNodeType, 3573 xmlreader_MoveToFirstAttribute, 3574 xmlreader_MoveToNextAttribute, 3575 xmlreader_MoveToAttributeByName, 3576 xmlreader_MoveToElement, 3577 xmlreader_GetQualifiedName, 3578 xmlreader_GetNamespaceUri, 3579 xmlreader_GetLocalName, 3580 xmlreader_GetPrefix, 3581 xmlreader_GetValue, 3582 xmlreader_ReadValueChunk, 3583 xmlreader_GetBaseUri, 3584 xmlreader_IsDefault, 3585 xmlreader_IsEmptyElement, 3586 xmlreader_GetLineNumber, 3587 xmlreader_GetLinePosition, 3588 xmlreader_GetAttributeCount, 3589 xmlreader_GetDepth, 3590 xmlreader_IsEOF 3591 }; 3592 3593 /** IXmlReaderInput **/ 3594 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject) 3595 { 3596 xmlreaderinput *This = impl_from_IXmlReaderInput(iface); 3597 3598 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject); 3599 3600 if (IsEqualGUID(riid, &IID_IXmlReaderInput) || 3601 IsEqualGUID(riid, &IID_IUnknown)) 3602 { 3603 *ppvObject = iface; 3604 } 3605 else 3606 { 3607 WARN("interface %s not implemented\n", debugstr_guid(riid)); 3608 *ppvObject = NULL; 3609 return E_NOINTERFACE; 3610 } 3611 3612 IUnknown_AddRef(iface); 3613 3614 return S_OK; 3615 } 3616 3617 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface) 3618 { 3619 xmlreaderinput *This = impl_from_IXmlReaderInput(iface); 3620 ULONG ref = InterlockedIncrement(&This->ref); 3621 TRACE("(%p)->(%d)\n", This, ref); 3622 return ref; 3623 } 3624 3625 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface) 3626 { 3627 xmlreaderinput *This = impl_from_IXmlReaderInput(iface); 3628 LONG ref = InterlockedDecrement(&This->ref); 3629 3630 TRACE("(%p)->(%d)\n", This, ref); 3631 3632 if (ref == 0) 3633 { 3634 IMalloc *imalloc = This->imalloc; 3635 if (This->input) IUnknown_Release(This->input); 3636 if (This->stream) ISequentialStream_Release(This->stream); 3637 if (This->buffer) free_input_buffer(This->buffer); 3638 readerinput_free(This, This->baseuri); 3639 readerinput_free(This, This); 3640 if (imalloc) IMalloc_Release(imalloc); 3641 } 3642 3643 return ref; 3644 } 3645 3646 static const struct IUnknownVtbl xmlreaderinputvtbl = 3647 { 3648 xmlreaderinput_QueryInterface, 3649 xmlreaderinput_AddRef, 3650 xmlreaderinput_Release 3651 }; 3652 3653 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc) 3654 { 3655 xmlreader *reader; 3656 HRESULT hr; 3657 int i; 3658 3659 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc); 3660 3661 if (imalloc) 3662 reader = IMalloc_Alloc(imalloc, sizeof(*reader)); 3663 else 3664 reader = heap_alloc(sizeof(*reader)); 3665 if (!reader) 3666 return E_OUTOFMEMORY; 3667 3668 memset(reader, 0, sizeof(*reader)); 3669 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl; 3670 reader->ref = 1; 3671 reader->state = XmlReadState_Closed; 3672 reader->instate = XmlReadInState_Initial; 3673 reader->resumestate = XmlReadResumeState_Initial; 3674 reader->dtdmode = DtdProcessing_Prohibit; 3675 reader->imalloc = imalloc; 3676 if (imalloc) IMalloc_AddRef(imalloc); 3677 reader->nodetype = XmlNodeType_None; 3678 list_init(&reader->attrs); 3679 list_init(&reader->nsdef); 3680 list_init(&reader->ns); 3681 list_init(&reader->elements); 3682 reader->max_depth = 256; 3683 3684 reader->chunk_read_off = 0; 3685 for (i = 0; i < StringValue_Last; i++) 3686 reader->strvalues[i] = strval_empty; 3687 3688 hr = IXmlReader_QueryInterface(&reader->IXmlReader_iface, riid, obj); 3689 IXmlReader_Release(&reader->IXmlReader_iface); 3690 3691 TRACE("returning iface %p, hr %#x\n", *obj, hr); 3692 3693 return hr; 3694 } 3695 3696 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream, 3697 IMalloc *imalloc, 3698 LPCWSTR encoding, 3699 BOOL hint, 3700 LPCWSTR base_uri, 3701 IXmlReaderInput **ppInput) 3702 { 3703 xmlreaderinput *readerinput; 3704 HRESULT hr; 3705 3706 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding), 3707 hint, wine_dbgstr_w(base_uri), ppInput); 3708 3709 if (!stream || !ppInput) return E_INVALIDARG; 3710 3711 if (imalloc) 3712 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput)); 3713 else 3714 readerinput = heap_alloc(sizeof(*readerinput)); 3715 if(!readerinput) return E_OUTOFMEMORY; 3716 3717 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl; 3718 readerinput->ref = 1; 3719 readerinput->imalloc = imalloc; 3720 readerinput->stream = NULL; 3721 if (imalloc) IMalloc_AddRef(imalloc); 3722 readerinput->encoding = parse_encoding_name(encoding, -1); 3723 readerinput->hint = hint; 3724 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri); 3725 readerinput->pending = 0; 3726 3727 hr = alloc_input_buffer(readerinput); 3728 if (hr != S_OK) 3729 { 3730 readerinput_free(readerinput, readerinput->baseuri); 3731 readerinput_free(readerinput, readerinput); 3732 if (imalloc) IMalloc_Release(imalloc); 3733 return hr; 3734 } 3735 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input); 3736 3737 *ppInput = &readerinput->IXmlReaderInput_iface; 3738 3739 TRACE("returning iface %p\n", *ppInput); 3740 3741 return S_OK; 3742 } 3743