1 // XML_Parser.cc - an XML file reader
2 //
3 // Vamos Automotive Simulator
4 // Copyright (C) 2004 Sam Varner
5 //
6 // This program is free software; you can redistribute it and/or modify
7 // it under the terms of the GNU General Public License as published by
8 // the Free Software Foundation; either version 2 of the License, or
9 // (at your option) any later version.
10 //
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 //
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 #include "XML_Parser.h"
21
22 #include <iostream>
23 #include <algorithm>
24 #include <sstream>
25 #include <cassert>
26
27 using namespace Vamos_Media;
28
29 std::string
remove_leading_space(std::string data_string)30 remove_leading_space (std::string data_string)
31 {
32 std::string::iterator it;
33 for (it = data_string.begin (); it != data_string.end (); it++)
34 {
35 if ((*it != ' ') && (*it != '\t') && (*it != '\n'))
36 {
37 break;
38 }
39 }
40 return std::string (it, data_string.end ());
41 }
42
43 // Construct the message for exceptions.
44 std::string
message() const45 XML_Exception::message () const
46 {
47 std::ostringstream ost;
48 ost << m_file << ':';
49 if (m_line == -1)
50 {
51 ost << "eof";
52 }
53 else
54 {
55 ost << m_line;
56 }
57 ost << ' ' << m_message;
58 return ost.str ();
59 }
60
61 // * Class XML_Tag
62 // Read up to and including the next tag.
63
XML_Tag(std::ifstream & stream)64 XML_Tag::XML_Tag (std::ifstream& stream) :
65 m_type (NONE),
66 m_lines (0)
67 {
68 bool done = read_to_tag_start (stream);
69 if (!done)
70 {
71 done = read_to_tag_end (stream);
72 if (!done)
73 {
74 throw Unterminated_Tag (get_lines (), m_text, true);
75 }
76 }
77
78 m_data = remove_leading_space (m_data);
79
80 if (m_text.size () == 0)
81 {
82 return;
83 }
84
85 m_type = find_tag_type (stream);
86 if (m_type != COMMENT)
87 {
88 String_Iterator text_start;
89 String_Iterator text_end;
90 get_text_boundries (text_start, text_end);
91 m_label = find_label (text_start, text_end);
92 find_attributes (text_start, text_end);
93 }
94 }
95
96 void
get_text_boundries(String_Iterator & text_start,String_Iterator & text_end)97 XML_Tag::get_text_boundries (String_Iterator& text_start,
98 String_Iterator& text_end)
99 {
100 text_start = m_text.begin () + 1;
101 text_end = m_text.end () - 1;
102 switch (m_type)
103 {
104 case PROCESSING_INSTRUCTION:
105 text_start++;
106 text_end--;
107 break;
108 case END:
109 text_start++;
110 break;
111 case EMPTY:
112 text_end--;
113 break;
114 case START:
115 break;
116 default:
117 assert (false);
118 }
119 }
120
121 // Read everything up to the next '<'. Return true if '<' was found.
122 bool
read_to_tag_start(std::ifstream & stream)123 XML_Tag::read_to_tag_start (std::ifstream& stream)
124 {
125 char ch;
126 while (get_next_char (stream, ch))
127 {
128 if (ch == '<')
129 {
130 m_text.push_back (ch);
131 return false;
132 }
133 m_data.push_back (ch);
134 }
135 return true;
136 }
137
138 // Read everything up to the next `>'. Return true if '>' was found.
139 bool
read_to_tag_end(std::ifstream & stream)140 XML_Tag::read_to_tag_end (std::ifstream& stream)
141 {
142 bool in_comment = false;
143 char current = '\0';
144 char old = '\0';
145 char older = '\0';
146 char oldest = '\0';
147 while (get_next_char (stream, current))
148 {
149 if ((oldest == '!') && (older == '-') && (old == '-'))
150 {
151 in_comment = true;
152 }
153 if ((current == '<') && !in_comment)
154 {
155 throw Unterminated_Tag (get_lines (), m_text, false);
156 }
157 m_text.push_back (current);
158 if (current == '>')
159 {
160 return true;
161 }
162 oldest = older;
163 older = old;
164 old = current;
165 }
166 return false;
167 }
168
169 Vamos_Media::XML_Tag::Tag_Type
find_tag_type(std::ifstream & stream)170 XML_Tag::find_tag_type (std::ifstream& stream)
171 {
172 Tag_Type type;
173 const size_t last = m_text.size () - 1;
174 if ((m_text [1] == '?') && (m_text [last - 1] == '?'))
175 {
176 type = PROCESSING_INSTRUCTION;
177 }
178 else if ((m_text [1] == '!') && (m_text [2] == '-') && (m_text [3] == '-'))
179 {
180 type = COMMENT;
181 eat_comment (stream);
182 }
183 else if (m_text [1] == '/')
184 {
185 type = END;
186 }
187 else if (m_text [last - 1] == '/')
188 {
189 type = EMPTY;
190 }
191 else
192 {
193 type = START;
194 }
195 return type;
196 }
197
198 std::string
find_label(String_Iterator text_start,String_Iterator text_end)199 XML_Tag::find_label (String_Iterator text_start,
200 String_Iterator text_end)
201 {
202 String_Iterator label_end = std::find (text_start, text_end, ' ');
203 return std::string (text_start, label_end);
204 }
205
206 // Get the next char from the stream and count newlines.
207 std::ifstream&
get_next_char(std::ifstream & stream,char & ch)208 XML_Tag::get_next_char (std::ifstream& stream, char& ch)
209 {
210 ch = '\0';
211 stream.get (ch);
212 if (ch == '\n')
213 {
214 m_lines++;
215 }
216 return stream;
217 }
218
219 bool
find_comment_end(std::ifstream & stream)220 XML_Tag::find_comment_end (std::ifstream& stream)
221 {
222 // ...otherwise, we have to read the file to find the "-->"
223 // comment-ender.
224 char current = '\0';
225 char old = '\0';
226 char older = '\0';
227 while (get_next_char (stream, current))
228 {
229 if ((current == '>') && (old == '-') && (older == '-'))
230 {
231 return true;
232 }
233 older = old;
234 old = current;
235 }
236 return false;
237 }
238
239 // Throw away the remainder of a comment.
240 void
eat_comment(std::ifstream & stream)241 XML_Tag::eat_comment (std::ifstream& stream)
242 {
243 // If there are no '>' within the comment, then we have the whole
244 // thing...
245 const size_t last = m_text.size () - 1;
246 if ((m_text [last - 1] == '-') && (m_text [last - 2] == '-'))
247 {
248 return;
249 }
250
251 if (!find_comment_end (stream))
252 {
253 throw Unterminated_Tag (get_lines (), m_text, true);
254 }
255 }
256
257 void
skip_spaces(String_Iterator & text_start)258 XML_Tag::skip_spaces (String_Iterator& text_start)
259 {
260 while (*text_start == ' ')
261 {
262 text_start++;
263 }
264 }
265
266 // Parse the attributes.
267 void
find_attributes(String_Iterator text_start,String_Iterator text_end)268 XML_Tag::find_attributes (String_Iterator text_start,
269 String_Iterator text_end)
270 {
271 text_start += m_label.size ();
272 if (text_start == text_end) return;
273 text_end++;
274 while (true)
275 {
276 skip_spaces (text_start);
277
278 String_Iterator attrib_end = std::find (text_start, text_end, '"');
279 if (attrib_end == text_end)
280 {
281 return;
282 }
283 attrib_end = std::find (attrib_end + 1, text_end, '"');
284
285 if (attrib_end == text_end)
286 {
287 throw Unterminated_Attribute (get_lines (),
288 std::string (text_start, text_end),
289 true);
290 }
291
292 m_attributes.push_back (get_attribute (text_start, attrib_end));
293 text_start = attrib_end + 1;
294 }
295 }
296
297 Vamos_Media::XML_Tag::Attribute
get_attribute(String_Iterator text_start,String_Iterator text_end)298 XML_Tag::get_attribute (String_Iterator text_start,
299 String_Iterator text_end)
300 {
301 String_Iterator mark = std::find (text_start, text_end, '=');
302 std::string name (text_start, mark);
303 mark += 2;
304 std::string val (mark, std::find (mark, text_end, '"'));
305 return Attribute (name, val);
306 }
307
308 //** Class XML_Path
309 std::string
subpath(size_t n) const310 XML_Path::subpath (size_t n) const
311 {
312 size_t start = m_path.length () - 1;
313 std::string rest = m_path;
314 for (size_t i = 0; i < n; i++)
315 {
316 start = rest.find_last_of ("/");
317 rest = rest.substr (0, start);
318 }
319 return m_path.substr (start + 1);
320 }
321
322 // Split a string at the wildcard character (*) and return the parts
323 // in a vector. A * in the first (last) position yields an empty
324 // string as the first (last) element of the vector.
split(std::string in)325 std::vector<std::string> split (std::string in)
326 {
327 std::vector <std::string> out;
328 size_t start = 0;
329 size_t end = in.size ();
330 while ((end = in.find ('*', start)) != std::string::npos)
331 {
332 out.push_back (in.substr (start, end - start));
333 start = end + 1;
334 }
335 out.push_back (in.substr (start, end - start));
336 return out;
337 }
338
339 bool
match(std::string pattern) const340 XML_Path::match (std::string pattern) const
341 {
342 std::vector <std::string> words = split (pattern);
343 assert (words.size () > 0);
344 // If no wildcard, must match whole string.
345 if (words.size () == 1)
346 return m_path == pattern;
347
348 size_t start_index = 0;
349 for (std::vector <std::string>::iterator it = words.begin ();
350 it != words.end () - 1;
351 it++)
352 {
353 if ((start_index = m_path.find (*it, start_index)) == std::string::npos)
354 return false;
355 // The first (possibly empty) element must match the beginning
356 // of the candidate.
357 if ((it == words.begin ()) && (start_index > 0))
358 return false;
359 start_index += it->size ();
360 }
361
362 size_t end_index = start_index;
363 start_index = m_path.rfind (*(words.end () - 1));
364 if (start_index == std::string::npos)
365 return false;
366 // The last (possibly empty) element must match the end of the path.
367 if (start_index + (words.end () - 1)->size () != m_path.size ())
368 return false;
369 // The last match must not overlap previous matches.
370 if (start_index < end_index)
371 return false;
372
373 return true;
374 }
375
376 //** Class XML_Parser
377
XML_Parser()378 XML_Parser::XML_Parser () :
379 mp_stream (0)
380 {
381 }
382
383
~XML_Parser()384 XML_Parser::~XML_Parser ()
385 {
386 delete mp_stream;
387 }
388
389 void
read(std::string file)390 XML_Parser::read (std::string file)
391 {
392 m_file = file;
393 mp_stream = new std::ifstream (file.c_str ());
394 if ((mp_stream == 0) || (*mp_stream == 0))
395 {
396 throw No_XML_File (m_file);
397 }
398 m_line = 1;
399
400 try
401 {
402 read_document ();
403 }
404 catch (XML_Unterminated& unterminated)
405 {
406 handle_unterminated (unterminated);
407 }
408
409 if (!m_path.empty ())
410 {
411 std::string message =
412 "Unterminated \"<" + m_path.top () + ">\" tag";
413 throw Tag_Mismatch (m_file, -1, message);
414 }
415 }
416
417 void
error(std::string message)418 XML_Parser::error (std::string message)
419 {
420 throw XML_Exception (m_file, m_line, message);
421 }
422
423 // Read the XML declaration.
424 void
check_declaration()425 XML_Parser::check_declaration ()
426 {
427 XML_Tag tag (*mp_stream);
428 m_line += tag.get_lines ();
429 if ((tag.get_type () != XML_Tag::PROCESSING_INSTRUCTION)
430 || (tag.get_label () != "xml"))
431 {
432 throw No_Declaration (m_file, m_line, "XML declaration is missing");
433 }
434 }
435
436 bool
run_callbacks(const XML_Tag & tag)437 XML_Parser::run_callbacks (const XML_Tag& tag)
438 {
439 switch (tag.get_type ())
440 {
441 case XML_Tag::NONE:
442 return true;
443 break;
444 case XML_Tag::START:
445 on_data (tag.get_data ());
446 on_start_tag (tag);
447 break;
448 case XML_Tag::END:
449 on_data (tag.get_data ());
450 on_end_tag (tag);
451 break;
452 case XML_Tag::EMPTY:
453 on_start_tag (tag);
454 on_end_tag (tag);
455 break;
456 case XML_Tag::COMMENT:
457 break;
458 default:
459 std::string message =
460 '"' + tag.get_text () + "\" is an unrecognized tag";
461 throw Bad_Tag_Type (m_file, m_line, message);
462 break;
463 }
464 return false;
465 }
466
467 void
read_document()468 XML_Parser::read_document ()
469 {
470 check_declaration ();
471
472 bool done = false;
473 while (!done)
474 {
475 XML_Tag tag (*mp_stream);
476 m_line += tag.get_lines ();
477
478 // Match start and end tags.
479 if ((tag.get_type () == XML_Tag::START)
480 || (tag.get_type () == XML_Tag::EMPTY))
481 {
482 add_tag (tag);
483 }
484
485 done = run_callbacks (tag);
486
487 if ((tag.get_type () == XML_Tag::END)
488 || (tag.get_type () == XML_Tag::EMPTY))
489 {
490 remove_tag (tag);
491 }
492 }
493 }
494
495 void
add_tag(const XML_Tag & tag)496 XML_Parser::add_tag (const XML_Tag& tag)
497 {
498 m_path.push (tag.get_label ());
499 }
500
501 void
remove_tag(const XML_Tag & tag)502 XML_Parser::remove_tag (const XML_Tag& tag)
503 {
504 if (tag.get_label () != m_path.top ())
505 {
506 std::string message = "Expected </" + m_path.top ()
507 + "> but found </" + tag.get_label () + '>';
508 throw Tag_Mismatch (m_file, m_line, message);
509 }
510 m_path.drop ();
511 }
512
513 void
handle_unterminated(XML_Unterminated & unterminated)514 XML_Parser::handle_unterminated (XML_Unterminated& unterminated)
515 {
516 // If the tag ends with \n, almost certainly a '>' was
517 // forgotten on the previuos line.
518 unterminated.lines -= std::count (unterminated.text.begin (),
519 unterminated.text.end (), '\n');
520 unterminated.text = std::string (unterminated.text.begin (),
521 std::find (unterminated.text.begin (),
522 unterminated.text.end (), '\n'));
523 std::ostringstream message;
524 message << '"' << unterminated.delimiter
525 << "\" is missing for \""
526 << unterminated.text << '"';
527
528 if (unterminated.eof)
529 {
530 m_line = -1;
531 }
532 else
533 {
534 m_line += unterminated.lines;
535 }
536 throw Tag_Mismatch (m_file, m_line, message.str ());
537 }
538