1 //  XML_Parser.cc - an XML file reader
2 //
3 //	Vamos Automotive Simulator
4 //  Copyright (C) 2004 Sam Varner
5 //
6 //  This program is free software; you can redistribute it and/or modify
7 //  it under the terms of the GNU General Public License as published by
8 //  the Free Software Foundation; either version 2 of the License, or
9 //  (at your option) any later version.
10 //
11 //  This program is distributed in the hope that it will be useful,
12 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 //  GNU General Public License for more details.
15 //
16 //  You should have received a copy of the GNU General Public License
17 //  along with this program; if not, write to the Free Software
18 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19 
20 #include "XML_Parser.h"
21 
22 #include <iostream>
23 #include <algorithm>
24 #include <sstream>
25 #include <cassert>
26 
27 using namespace Vamos_Media;
28 
29 std::string
remove_leading_space(std::string data_string)30 remove_leading_space (std::string data_string)
31 {
32   std::string::iterator it;
33   for (it = data_string.begin (); it != data_string.end (); it++)
34 	{
35 	  if ((*it != ' ') && (*it != '\t') && (*it != '\n'))
36 		{
37 		  break;
38 		}
39 	}
40   return std::string (it, data_string.end ());
41 }
42 
43 // Construct the message for exceptions.
44 std::string
message() const45 XML_Exception::message () const
46 {
47   std::ostringstream ost;
48   ost << m_file << ':';
49   if (m_line == -1)
50 	{
51 	  ost << "eof";
52 	}
53   else
54 	{
55 	  ost << m_line;
56 	}
57   ost << ' ' << m_message;
58   return ost.str ();
59 }
60 
61 // * Class XML_Tag
62 // Read up to and including the next tag.
63 
XML_Tag(std::ifstream & stream)64 XML_Tag::XML_Tag (std::ifstream& stream) :
65   m_type (NONE),
66   m_lines (0)
67 {
68   bool done = read_to_tag_start (stream);
69   if (!done)
70 	{
71 	  done = read_to_tag_end (stream);
72 	  if (!done)
73 		{
74 		  throw Unterminated_Tag (get_lines (), m_text, true);
75 		}
76 	}
77 
78   m_data = remove_leading_space (m_data);
79 
80   if (m_text.size () == 0)
81 	{
82 	  return;
83 	}
84 
85   m_type = find_tag_type (stream);
86   if (m_type != COMMENT)
87 	{
88 	  String_Iterator text_start;
89 	  String_Iterator text_end;
90 	  get_text_boundries (text_start, text_end);
91 	  m_label = find_label (text_start, text_end);
92 	  find_attributes (text_start, text_end);
93 	}
94 }
95 
96 void
get_text_boundries(String_Iterator & text_start,String_Iterator & text_end)97 XML_Tag::get_text_boundries (String_Iterator& text_start,
98 							 String_Iterator& text_end)
99 {
100   text_start = m_text.begin () + 1;
101   text_end = m_text.end () - 1;
102   switch (m_type)
103 	{
104 	case PROCESSING_INSTRUCTION:
105 	  text_start++;
106 	  text_end--;
107 	  break;
108 	case END:
109 	  text_start++;
110 	  break;
111 	case EMPTY:
112 	  text_end--;
113 	  break;
114 	case START:
115 	  break;
116 	default:
117 	  assert (false);
118 	}
119 }
120 
121 // Read everything up to the next '<'.  Return true if '<' was found.
122 bool
read_to_tag_start(std::ifstream & stream)123 XML_Tag::read_to_tag_start (std::ifstream& stream)
124 {
125   char ch;
126   while (get_next_char (stream, ch))
127 	{
128 	  if (ch == '<')
129 		{
130 		  m_text.push_back (ch);
131 		  return false;
132 		}
133 	  m_data.push_back (ch);
134 	}
135   return true;
136 }
137 
138 // Read everything up to the next `>'.  Return true if '>' was found.
139 bool
read_to_tag_end(std::ifstream & stream)140 XML_Tag::read_to_tag_end (std::ifstream& stream)
141 {
142   bool in_comment = false;
143   char current = '\0';
144   char old = '\0';
145   char older = '\0';
146   char oldest = '\0';
147   while (get_next_char (stream, current))
148 	{
149 	  if ((oldest == '!') && (older == '-') && (old == '-'))
150 		{
151 		  in_comment = true;
152 		}
153 	  if ((current == '<') && !in_comment)
154 		{
155 		  throw Unterminated_Tag (get_lines (), m_text, false);
156 		}
157 	  m_text.push_back (current);
158 	  if (current == '>')
159 		{
160 		  return true;
161 		}
162 	  oldest = older;
163 	  older = old;
164 	  old = current;
165 	}
166   return false;
167 }
168 
169 Vamos_Media::XML_Tag::Tag_Type
find_tag_type(std::ifstream & stream)170 XML_Tag::find_tag_type (std::ifstream& stream)
171 {
172   Tag_Type type;
173   const size_t last = m_text.size () - 1;
174   if ((m_text [1] == '?') && (m_text [last - 1] == '?'))
175 	{
176 	  type = PROCESSING_INSTRUCTION;
177 	}
178   else if ((m_text [1] == '!') && (m_text [2] == '-') && (m_text [3] == '-'))
179 	{
180 	  type = COMMENT;
181 	  eat_comment (stream);
182 	}
183   else if (m_text [1] == '/')
184 	{
185 	  type = END;
186 	}
187   else if (m_text [last - 1] == '/')
188 	{
189 	  type = EMPTY;
190 	}
191   else
192 	{
193 	  type = START;
194 	}
195   return type;
196 }
197 
198 std::string
find_label(String_Iterator text_start,String_Iterator text_end)199 XML_Tag::find_label (String_Iterator text_start,
200 					 String_Iterator text_end)
201 {
202   String_Iterator label_end = std::find (text_start, text_end, ' ');
203   return std::string (text_start, label_end);
204 }
205 
206 // Get the next char from the stream and count newlines.
207 std::ifstream&
get_next_char(std::ifstream & stream,char & ch)208 XML_Tag::get_next_char (std::ifstream& stream, char& ch)
209 {
210   ch = '\0';
211   stream.get (ch);
212   if (ch == '\n')
213 	{
214 	  m_lines++;
215 	}
216   return stream;
217 }
218 
219 bool
find_comment_end(std::ifstream & stream)220 XML_Tag::find_comment_end (std::ifstream& stream)
221 {
222   // ...otherwise, we have to read the file to find the "-->"
223   // comment-ender.
224   char current = '\0';
225   char old = '\0';
226   char older = '\0';
227   while (get_next_char (stream, current))
228 	{
229 	  if ((current == '>') && (old == '-') && (older == '-'))
230 		{
231 		  return true;
232 		}
233 	  older = old;
234 	  old = current;
235 	}
236   return false;
237 }
238 
239 // Throw away the remainder of a comment.
240 void
eat_comment(std::ifstream & stream)241 XML_Tag::eat_comment (std::ifstream& stream)
242 {
243   // If there are no '>' within the comment, then we have the whole
244   // thing...
245   const size_t last = m_text.size () - 1;
246   if ((m_text [last - 1] == '-') && (m_text [last - 2] == '-'))
247 	{
248 	  return;
249 	}
250 
251   if (!find_comment_end (stream))
252 	{
253 	  throw Unterminated_Tag (get_lines (), m_text, true);
254 	}
255 }
256 
257 void
skip_spaces(String_Iterator & text_start)258 XML_Tag::skip_spaces (String_Iterator& text_start)
259 {
260   while (*text_start == ' ')
261 	{
262 	  text_start++;
263 	}
264 }
265 
266 // Parse the attributes.
267 void
find_attributes(String_Iterator text_start,String_Iterator text_end)268 XML_Tag::find_attributes (String_Iterator text_start,
269 						  String_Iterator text_end)
270 {
271   text_start += m_label.size ();
272   if (text_start == text_end) return;
273   text_end++;
274   while (true)
275 	{
276 	  skip_spaces (text_start);
277 
278 	  String_Iterator attrib_end = std::find (text_start, text_end, '"');
279 	  if (attrib_end == text_end)
280 		{
281 		  return;
282 		}
283 	  attrib_end = std::find (attrib_end + 1, text_end, '"');
284 
285 	  if (attrib_end == text_end)
286 		{
287 		  throw Unterminated_Attribute (get_lines (),
288 										std::string (text_start, text_end),
289 										true);
290 		}
291 
292 	  m_attributes.push_back (get_attribute (text_start, attrib_end));
293 	  text_start = attrib_end + 1;
294 	}
295 }
296 
297 Vamos_Media::XML_Tag::Attribute
get_attribute(String_Iterator text_start,String_Iterator text_end)298 XML_Tag::get_attribute (String_Iterator text_start,
299 						String_Iterator text_end)
300 {
301   String_Iterator mark = std::find (text_start, text_end, '=');
302   std::string name (text_start, mark);
303   mark += 2;
304   std::string val (mark, std::find (mark, text_end, '"'));
305   return Attribute (name, val);
306 }
307 
308 //** Class XML_Path
309 std::string
subpath(size_t n) const310 XML_Path::subpath (size_t n) const
311 {
312   size_t start = m_path.length () - 1;
313   std::string rest = m_path;
314   for (size_t i = 0; i < n; i++)
315     {
316       start = rest.find_last_of ("/");
317       rest = rest.substr (0, start);
318     }
319   return m_path.substr (start + 1);
320 }
321 
322 // Split a string at the wildcard character (*) and return the parts
323 // in a vector.  A * in the first (last) position yields an empty
324 // string as the first (last) element of the vector.
split(std::string in)325 std::vector<std::string> split (std::string in)
326 {
327   std::vector <std::string> out;
328   size_t start = 0;
329   size_t end = in.size ();
330   while ((end = in.find ('*', start)) != std::string::npos)
331     {
332       out.push_back (in.substr (start, end - start));
333       start = end + 1;
334     }
335   out.push_back (in.substr (start, end - start));
336   return out;
337 }
338 
339 bool
match(std::string pattern) const340 XML_Path::match (std::string pattern) const
341 {
342   std::vector <std::string> words = split (pattern);
343   assert (words.size () > 0);
344   // If no wildcard, must match whole string.
345   if (words.size () == 1)
346     return m_path == pattern;
347 
348   size_t start_index = 0;
349   for (std::vector <std::string>::iterator it = words.begin ();
350        it != words.end () - 1;
351        it++)
352     {
353       if ((start_index = m_path.find (*it, start_index)) == std::string::npos)
354         return false;
355       // The first (possibly empty) element must match the beginning
356       // of the candidate.
357       if ((it == words.begin ()) && (start_index > 0))
358         return false;
359       start_index += it->size ();
360     }
361 
362   size_t end_index = start_index;
363   start_index = m_path.rfind (*(words.end () - 1));
364   if (start_index == std::string::npos)
365     return false;
366   // The last (possibly empty) element must match the end of the path.
367   if (start_index + (words.end () - 1)->size () != m_path.size ())
368     return false;
369   // The last match must not overlap previous matches.
370   if (start_index < end_index)
371     return false;
372 
373   return true;
374 }
375 
376 //** Class XML_Parser
377 
XML_Parser()378 XML_Parser::XML_Parser () :
379   mp_stream (0)
380 {
381 }
382 
383 
~XML_Parser()384 XML_Parser::~XML_Parser ()
385 {
386   delete mp_stream;
387 }
388 
389 void
read(std::string file)390 XML_Parser::read (std::string file)
391 {
392   m_file = file;
393   mp_stream = new std::ifstream (file.c_str ());
394   if ((mp_stream == 0) || (*mp_stream == 0))
395 	{
396 	  throw No_XML_File (m_file);
397 	}
398   m_line = 1;
399 
400   try
401 	{
402       read_document ();
403     }
404   catch (XML_Unterminated& unterminated)
405 	{
406 	  handle_unterminated (unterminated);
407 	}
408 
409   if (!m_path.empty ())
410 	{
411 	  std::string message =
412 		"Unterminated \"<" + m_path.top () + ">\" tag";
413 	  throw Tag_Mismatch (m_file, -1, message);
414 	}
415 }
416 
417 void
error(std::string message)418 XML_Parser::error (std::string message)
419 {
420   throw XML_Exception (m_file, m_line, message);
421 }
422 
423 // Read the XML declaration.
424 void
check_declaration()425 XML_Parser::check_declaration ()
426 {
427   XML_Tag tag (*mp_stream);
428   m_line += tag.get_lines ();
429   if ((tag.get_type () != XML_Tag::PROCESSING_INSTRUCTION)
430 	  || (tag.get_label () != "xml"))
431 	{
432 	  throw No_Declaration (m_file, m_line, "XML declaration is missing");
433 	}
434 }
435 
436 bool
run_callbacks(const XML_Tag & tag)437 XML_Parser::run_callbacks (const XML_Tag& tag)
438 {
439   switch (tag.get_type ())
440 	{
441 	case XML_Tag::NONE:
442 	  return true;
443 	  break;
444 	case XML_Tag::START:
445 	  on_data (tag.get_data ());
446 	  on_start_tag (tag);
447 	  break;
448 	case XML_Tag::END:
449 	  on_data (tag.get_data ());
450 	  on_end_tag (tag);
451 	  break;
452 	case XML_Tag::EMPTY:
453 	  on_start_tag (tag);
454 	  on_end_tag (tag);
455 	  break;
456 	case XML_Tag::COMMENT:
457 	  break;
458 	default:
459 	  std::string message =
460 		'"' + tag.get_text () + "\" is an unrecognized tag";
461 	  throw Bad_Tag_Type (m_file, m_line, message);
462 	  break;
463 	}
464   return false;
465 }
466 
467 void
read_document()468 XML_Parser::read_document ()
469 {
470   check_declaration ();
471 
472   bool done = false;
473   while (!done)
474 	{
475 	  XML_Tag tag (*mp_stream);
476 	  m_line += tag.get_lines ();
477 
478 	  // Match start and end tags.
479 	  if ((tag.get_type () == XML_Tag::START)
480           || (tag.get_type () == XML_Tag::EMPTY))
481 		{
482 		  add_tag (tag);
483 		}
484 
485       done = run_callbacks (tag);
486 
487 	  if ((tag.get_type () == XML_Tag::END)
488           || (tag.get_type () == XML_Tag::EMPTY))
489 		{
490 		  remove_tag (tag);
491 		}
492 	}
493 }
494 
495 void
add_tag(const XML_Tag & tag)496 XML_Parser::add_tag (const XML_Tag& tag)
497 {
498   m_path.push (tag.get_label ());
499 }
500 
501 void
remove_tag(const XML_Tag & tag)502 XML_Parser::remove_tag (const XML_Tag& tag)
503 {
504   if (tag.get_label () != m_path.top ())
505 	{
506 	  std::string message = "Expected </" + m_path.top ()
507 		+ "> but found </" + tag.get_label () + '>';
508 	  throw Tag_Mismatch (m_file, m_line, message);
509 	}
510   m_path.drop ();
511 }
512 
513 void
handle_unterminated(XML_Unterminated & unterminated)514 XML_Parser::handle_unterminated (XML_Unterminated& unterminated)
515 {
516   // If the tag ends with \n, almost certainly a '>' was
517   // forgotten on the previuos line.
518   unterminated.lines -= std::count (unterminated.text.begin (),
519 									unterminated.text.end (), '\n');
520   unterminated.text = std::string (unterminated.text.begin (),
521 								   std::find (unterminated.text.begin (),
522 											  unterminated.text.end (), '\n'));
523   std::ostringstream message;
524   message << '"' << unterminated.delimiter
525 		  << "\" is missing for \""
526 		  << unterminated.text << '"';
527 
528   if (unterminated.eof)
529 	{
530 	  m_line = -1;
531 	}
532   else
533 	{
534 	  m_line += unterminated.lines;
535 	}
536   throw Tag_Mismatch (m_file, m_line, message.str ());
537 }
538