1 /*
2 
3 Copyright (c) 2007-2018, Arvid Norberg
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 
10     * Redistributions of source code must retain the above copyright
11       notice, this list of conditions and the following disclaimer.
12     * Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in
14       the documentation and/or other materials provided with the distribution.
15     * Neither the name of the author nor the names of its
16       contributors may be used to endorse or promote products derived
17       from this software without specific prior written permission.
18 
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 POSSIBILITY OF SUCH DAMAGE.
30 
31 */
32 
33 #include <cstring>
34 
35 #include "libtorrent/xml_parse.hpp"
36 #include "libtorrent/string_util.hpp"
37 
38 namespace libtorrent {
39 
xml_parse(string_view input,std::function<void (int,string_view,string_view)> callback)40 	void xml_parse(string_view input
41 		, std::function<void(int, string_view, string_view)> callback)
42 	{
43 		char const* p = input.data();
44 		char const* end = input.data() + input.size();
45 		for (;p != end; ++p)
46 		{
47 			char const* start = p;
48 			// look for tag start
49 			for (; p != end && *p != '<'; ++p);
50 
51 			if (p != start)
52 			{
53 				callback(xml_string, {start, std::size_t(p - start)}, {});
54 			}
55 
56 			if (p == end) break;
57 
58 			// skip '<'
59 			++p;
60 			if (p != end && p + 8 < end && string_begins_no_case("![CDATA[", p))
61 			{
62 				// CDATA. match '![CDATA['
63 				p += 8;
64 				start = p;
65 				while (p != end && !string_begins_no_case("]]>", p - 2)) ++p;
66 
67 				// parse error
68 				if (p == end)
69 				{
70 					callback(xml_parse_error, "unexpected end of file", {});
71 					break;
72 				}
73 
74 				callback(xml_string, {start, std::size_t(p - start - 2)}, {});
75 				continue;
76 			}
77 
78 			// parse the name of the tag.
79 			for (start = p; p != end && *p != '>' && !is_space(*p); ++p);
80 
81 			char const* tag_name_end = p;
82 
83 			// skip the attributes for now
84 			for (; p != end && *p != '>'; ++p);
85 
86 			// parse error
87 			if (p == end)
88 			{
89 				callback(xml_parse_error, "unexpected end of file", {});
90 				break;
91 			}
92 
93 			TORRENT_ASSERT(*p == '>');
94 
95 			char const* tag_end = p;
96 			if (*start == '/')
97 			{
98 				++start;
99 				callback(xml_end_tag, {start, std::size_t(tag_name_end - start)}, {});
100 			}
101 			else if (*(p - 1) == '/')
102 			{
103 				callback(xml_empty_tag, {start, std::size_t(std::min(tag_name_end - start, p - start - 1))}, {});
104 				tag_end = p - 1;
105 			}
106 			else if (*start == '?' && *(p - 1) == '?')
107 			{
108 				++start;
109 				callback(xml_declaration_tag, {start, std::size_t(std::min(tag_name_end - start, p - start - 1))}, {});
110 				tag_end = p - 1;
111 			}
112 			else if (start + 5 < p && std::memcmp(start, "!--", 3) == 0 && std::memcmp(p - 2, "--", 2) == 0)
113 			{
114 				start += 3;
115 				callback(xml_comment, {start, std::size_t(tag_name_end - start - 2)}, {});
116 				continue;
117 			}
118 			else
119 			{
120 				callback(xml_start_tag, {start, std::size_t(tag_name_end - start)}, {});
121 			}
122 
123 			// parse attributes
124 			for (char const* i = tag_name_end; i < tag_end; ++i)
125 			{
126 				char const* val_start = nullptr;
127 
128 				// find start of attribute name
129 				while (i != tag_end && is_space(*i)) ++i;
130 				if (i == tag_end) break;
131 				start = i;
132 				// find end of attribute name
133 				while (i != tag_end && *i != '=' && !is_space(*i)) ++i;
134 				std::size_t const name_len = std::size_t(i - start);
135 
136 				// look for equality sign
137 				for (; i != tag_end && *i != '='; ++i);
138 
139 				// no equality sign found. Report this as xml_tag_content
140 				// instead of a series of key value pairs
141 				if (i == tag_end)
142 				{
143 					callback(xml_tag_content, {start, std::size_t(i - start)}, {});
144 					break;
145 				}
146 
147 				++i;
148 				while (i != tag_end && is_space(*i)) ++i;
149 				// check for parse error (values must be quoted)
150 				if (i == tag_end || (*i != '\'' && *i != '\"'))
151 				{
152 					callback(xml_parse_error, "unquoted attribute value", {});
153 					break;
154 				}
155 				char quote = *i;
156 				++i;
157 				val_start = i;
158 				for (; i != tag_end && *i != quote; ++i);
159 				// parse error (missing end quote)
160 				if (i == tag_end)
161 				{
162 					callback(xml_parse_error, "missing end quote on attribute", {});
163 					break;
164 				}
165 				callback(xml_attribute, {start, name_len}, {val_start, std::size_t(i - val_start)});
166 			}
167 		}
168 	}
169 
170 }
171