1 /* 2 3 Copyright (c) 2007-2018, Arvid Norberg 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions 8 are met: 9 10 * Redistributions of source code must retain the above copyright 11 notice, this list of conditions and the following disclaimer. 12 * Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in 14 the documentation and/or other materials provided with the distribution. 15 * Neither the name of the author nor the names of its 16 contributors may be used to endorse or promote products derived 17 from this software without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 POSSIBILITY OF SUCH DAMAGE. 30 31 */ 32 33 #include <cstring> 34 35 #include "libtorrent/xml_parse.hpp" 36 #include "libtorrent/string_util.hpp" 37 38 namespace libtorrent { 39 xml_parse(string_view input,std::function<void (int,string_view,string_view)> callback)40 void xml_parse(string_view input 41 , std::function<void(int, string_view, string_view)> callback) 42 { 43 char const* p = input.data(); 44 char const* end = input.data() + input.size(); 45 for (;p != end; ++p) 46 { 47 char const* start = p; 48 // look for tag start 49 for (; p != end && *p != '<'; ++p); 50 51 if (p != start) 52 { 53 callback(xml_string, {start, std::size_t(p - start)}, {}); 54 } 55 56 if (p == end) break; 57 58 // skip '<' 59 ++p; 60 if (p != end && p + 8 < end && string_begins_no_case("![CDATA[", p)) 61 { 62 // CDATA. match '![CDATA[' 63 p += 8; 64 start = p; 65 while (p != end && !string_begins_no_case("]]>", p - 2)) ++p; 66 67 // parse error 68 if (p == end) 69 { 70 callback(xml_parse_error, "unexpected end of file", {}); 71 break; 72 } 73 74 callback(xml_string, {start, std::size_t(p - start - 2)}, {}); 75 continue; 76 } 77 78 // parse the name of the tag. 79 for (start = p; p != end && *p != '>' && !is_space(*p); ++p); 80 81 char const* tag_name_end = p; 82 83 // skip the attributes for now 84 for (; p != end && *p != '>'; ++p); 85 86 // parse error 87 if (p == end) 88 { 89 callback(xml_parse_error, "unexpected end of file", {}); 90 break; 91 } 92 93 TORRENT_ASSERT(*p == '>'); 94 95 char const* tag_end = p; 96 if (*start == '/') 97 { 98 ++start; 99 callback(xml_end_tag, {start, std::size_t(tag_name_end - start)}, {}); 100 } 101 else if (*(p - 1) == '/') 102 { 103 callback(xml_empty_tag, {start, std::size_t(std::min(tag_name_end - start, p - start - 1))}, {}); 104 tag_end = p - 1; 105 } 106 else if (*start == '?' && *(p - 1) == '?') 107 { 108 ++start; 109 callback(xml_declaration_tag, {start, std::size_t(std::min(tag_name_end - start, p - start - 1))}, {}); 110 tag_end = p - 1; 111 } 112 else if (start + 5 < p && std::memcmp(start, "!--", 3) == 0 && std::memcmp(p - 2, "--", 2) == 0) 113 { 114 start += 3; 115 callback(xml_comment, {start, std::size_t(tag_name_end - start - 2)}, {}); 116 continue; 117 } 118 else 119 { 120 callback(xml_start_tag, {start, std::size_t(tag_name_end - start)}, {}); 121 } 122 123 // parse attributes 124 for (char const* i = tag_name_end; i < tag_end; ++i) 125 { 126 char const* val_start = nullptr; 127 128 // find start of attribute name 129 while (i != tag_end && is_space(*i)) ++i; 130 if (i == tag_end) break; 131 start = i; 132 // find end of attribute name 133 while (i != tag_end && *i != '=' && !is_space(*i)) ++i; 134 std::size_t const name_len = std::size_t(i - start); 135 136 // look for equality sign 137 for (; i != tag_end && *i != '='; ++i); 138 139 // no equality sign found. Report this as xml_tag_content 140 // instead of a series of key value pairs 141 if (i == tag_end) 142 { 143 callback(xml_tag_content, {start, std::size_t(i - start)}, {}); 144 break; 145 } 146 147 ++i; 148 while (i != tag_end && is_space(*i)) ++i; 149 // check for parse error (values must be quoted) 150 if (i == tag_end || (*i != '\'' && *i != '\"')) 151 { 152 callback(xml_parse_error, "unquoted attribute value", {}); 153 break; 154 } 155 char quote = *i; 156 ++i; 157 val_start = i; 158 for (; i != tag_end && *i != quote; ++i); 159 // parse error (missing end quote) 160 if (i == tag_end) 161 { 162 callback(xml_parse_error, "missing end quote on attribute", {}); 163 break; 164 } 165 callback(xml_attribute, {start, name_len}, {val_start, std::size_t(i - val_start)}); 166 } 167 } 168 } 169 170 } 171