1 //
2 // Automated Testing Framework (atf)
3 //
4 // Copyright (c) 2007 The NetBSD Foundation, Inc.
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions
9 // are met:
10 // 1. Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // 2. Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND
17 // CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
18 // INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 // IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY
21 // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 // GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25 // IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27 // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 //
29
30 #if !defined(TOOLS_PARSER_HPP)
31 #define TOOLS_PARSER_HPP
32
33 #include <istream>
34 #include <map>
35 #include <ostream>
36 #include <stdexcept>
37 #include <string>
38 #include <utility>
39 #include <vector>
40
41 namespace tools {
42 namespace parser {
43
44 // ------------------------------------------------------------------------
45 // The "parse_error" class.
46 // ------------------------------------------------------------------------
47
48 class parse_error : public std::runtime_error,
49 public std::pair< size_t, std::string > {
50 mutable std::string m_msg;
51
52 public:
53 parse_error(size_t, std::string);
54 ~parse_error(void) throw();
55
56 const char* what(void) const throw();
57
58 operator std::string(void) const;
59 };
60
61 // ------------------------------------------------------------------------
62 // The "parse_errors" class.
63 // ------------------------------------------------------------------------
64
65 class parse_errors : public std::runtime_error,
66 public std::vector< parse_error > {
67 std::vector< parse_error > m_errors;
68 mutable std::string m_msg;
69
70 public:
71 parse_errors(void);
72 ~parse_errors(void) throw();
73
74 const char* what(void) const throw();
75 };
76
77 // ------------------------------------------------------------------------
78 // The "format_error" class.
79 // ------------------------------------------------------------------------
80
81 class format_error : public std::runtime_error {
82 public:
83 format_error(const std::string&);
84 };
85
86 // ------------------------------------------------------------------------
87 // The "token" class.
88 // ------------------------------------------------------------------------
89
90 typedef int token_type;
91
92 //!
93 //! \brief Representation of a read token.
94 //!
95 //! A pair that contains the information of a token read from a stream.
96 //! It contains the token's type and its associated data, if any.
97 //!
98 struct token {
99 bool m_inited;
100 size_t m_line;
101 token_type m_type;
102 std::string m_text;
103
104 public:
105 token(void);
106 token(size_t, const token_type&, const std::string& = "");
107
108 size_t lineno(void) const;
109 const token_type& type(void) const;
110 const std::string& text(void) const;
111
112 operator bool(void) const;
113 bool operator!(void) const;
114 };
115
116 // ------------------------------------------------------------------------
117 // The "tokenizer" class.
118 // ------------------------------------------------------------------------
119
120 //!
121 //! \brief A stream tokenizer.
122 //!
123 //! This template implements an extremely simple, line-oriented stream
124 //! tokenizer. It is only able to recognize one character-long delimiters,
125 //! random-length keywords, skip whitespace and, anything that does not
126 //! match these rules is supposed to be a word.
127 //!
128 //! Parameter IS: The input stream's type.
129 //!
130 template< class IS >
131 class tokenizer {
132 IS& m_is;
133 size_t m_lineno;
134 token m_la;
135
136 bool m_skipws;
137 token_type m_eof_type, m_nl_type, m_text_type;
138
139 std::map< char, token_type > m_delims_map;
140 std::string m_delims_str;
141
142 char m_quotech;
143 token_type m_quotetype;
144
145 std::map< std::string, token_type > m_keywords_map;
146
147 token_type alloc_type(void);
148
149 template< class TKZ >
150 friend
151 class parser;
152
153 public:
154 tokenizer(IS&, bool, const token_type&, const token_type&,
155 const token_type&, size_t = 1);
156
157 size_t lineno(void) const;
158
159 void add_delim(char, const token_type&);
160 void add_keyword(const std::string&, const token_type&);
161 void add_quote(char, const token_type&);
162
163 token next(void);
164 std::string rest_of_line(void);
165 };
166
167 template< class IS >
tokenizer(IS & p_is,bool p_skipws,const token_type & p_eof_type,const token_type & p_nl_type,const token_type & p_text_type,size_t p_lineno)168 tokenizer< IS >::tokenizer(IS& p_is,
169 bool p_skipws,
170 const token_type& p_eof_type,
171 const token_type& p_nl_type,
172 const token_type& p_text_type,
173 size_t p_lineno) :
174 m_is(p_is),
175 m_lineno(p_lineno),
176 m_skipws(p_skipws),
177 m_eof_type(p_eof_type),
178 m_nl_type(p_nl_type),
179 m_text_type(p_text_type),
180 m_quotech(-1)
181 {
182 }
183
184 template< class IS >
185 size_t
lineno(void) const186 tokenizer< IS >::lineno(void)
187 const
188 {
189 return m_lineno;
190 }
191
192 template< class IS >
193 void
add_delim(char delim,const token_type & type)194 tokenizer< IS >::add_delim(char delim, const token_type& type)
195 {
196 m_delims_map[delim] = type;
197 m_delims_str += delim;
198 }
199
200 template< class IS >
201 void
add_keyword(const std::string & keyword,const token_type & type)202 tokenizer< IS >::add_keyword(const std::string& keyword,
203 const token_type& type)
204 {
205 m_keywords_map[keyword] = type;
206 }
207
208 template< class IS >
209 void
add_quote(char ch,const token_type & type)210 tokenizer< IS >::add_quote(char ch, const token_type& type)
211 {
212 m_quotech = ch;
213 m_quotetype = type;
214 }
215
216 template< class IS >
217 token
next(void)218 tokenizer< IS >::next(void)
219 {
220 if (m_la) {
221 token t = m_la;
222 m_la = token();
223 if (t.type() == m_nl_type)
224 m_lineno++;
225 return t;
226 }
227
228 char ch;
229 std::string text;
230
231 bool done = false, quoted = false;
232 token t(m_lineno, m_eof_type, "<<EOF>>");
233 while (!done && m_is.get(ch).good()) {
234 if (ch == m_quotech) {
235 if (text.empty()) {
236 bool escaped = false;
237 while (!done && m_is.get(ch).good()) {
238 if (!escaped) {
239 if (ch == '\\')
240 escaped = true;
241 else if (ch == '\n') {
242 m_la = token(m_lineno, m_nl_type, "<<NEWLINE>>");
243 throw parse_error(t.lineno(),
244 "Missing double quotes before "
245 "end of line");
246 } else if (ch == m_quotech)
247 done = true;
248 else
249 text += ch;
250 } else {
251 text += ch;
252 escaped = false;
253 }
254 }
255 if (!m_is.good())
256 throw parse_error(t.lineno(),
257 "Missing double quotes before "
258 "end of file");
259 t = token(m_lineno, m_text_type, text);
260 quoted = true;
261 } else {
262 m_is.putback(ch);
263 done = true;
264 }
265 } else {
266 typename std::map< char, token_type >::const_iterator idelim;
267 idelim = m_delims_map.find(ch);
268 if (idelim != m_delims_map.end()) {
269 done = true;
270 if (text.empty())
271 t = token(m_lineno, (*idelim).second,
272 std::string("") + ch);
273 else
274 m_is.putback(ch);
275 } else if (ch == '\n') {
276 done = true;
277 if (text.empty())
278 t = token(m_lineno, m_nl_type, "<<NEWLINE>>");
279 else
280 m_is.putback(ch);
281 } else if (m_skipws && (ch == ' ' || ch == '\t')) {
282 if (!text.empty())
283 done = true;
284 } else
285 text += ch;
286 }
287 }
288
289 if (!quoted && !text.empty()) {
290 typename std::map< std::string, token_type >::const_iterator ikw;
291 ikw = m_keywords_map.find(text);
292 if (ikw != m_keywords_map.end())
293 t = token(m_lineno, (*ikw).second, text);
294 else
295 t = token(m_lineno, m_text_type, text);
296 }
297
298 if (t.type() == m_nl_type)
299 m_lineno++;
300
301 return t;
302 }
303
304 template< class IS >
305 std::string
rest_of_line(void)306 tokenizer< IS >::rest_of_line(void)
307 {
308 std::string str;
309 while (m_is.good() && m_is.peek() != '\n')
310 str += m_is.get();
311 return str;
312 }
313
314 // ------------------------------------------------------------------------
315 // The "parser" class.
316 // ------------------------------------------------------------------------
317
318 template< class TKZ >
319 class parser {
320 TKZ& m_tkz;
321 token m_last;
322 parse_errors m_errors;
323 bool m_thrown;
324
325 public:
326 parser(TKZ& tkz);
327 ~parser(void) noexcept(false);
328
329 bool good(void) const;
330 void add_error(const parse_error&);
331 bool has_errors(void) const;
332
333 token next(void);
334 std::string rest_of_line(void);
335 token reset(const token_type&);
336
337 token
338 expect(const token_type&,
339 const std::string&);
340
341 token
342 expect(const token_type&,
343 const token_type&,
344 const std::string&);
345
346 token
347 expect(const token_type&,
348 const token_type&,
349 const token_type&,
350 const std::string&);
351
352 token
353 expect(const token_type&,
354 const token_type&,
355 const token_type&,
356 const token_type&,
357 const std::string&);
358
359 token
360 expect(const token_type&,
361 const token_type&,
362 const token_type&,
363 const token_type&,
364 const token_type&,
365 const token_type&,
366 const token_type&,
367 const std::string&);
368
369 token
370 expect(const token_type&,
371 const token_type&,
372 const token_type&,
373 const token_type&,
374 const token_type&,
375 const token_type&,
376 const token_type&,
377 const token_type&,
378 const std::string&);
379 };
380
381 template< class TKZ >
parser(TKZ & tkz)382 parser< TKZ >::parser(TKZ& tkz) :
383 m_tkz(tkz),
384 m_thrown(false)
385 {
386 }
387
388 template< class TKZ >
~parser(void)389 parser< TKZ >::~parser(void) noexcept(false)
390 {
391 if (!m_errors.empty() && !m_thrown)
392 throw m_errors;
393 }
394
395 template< class TKZ >
396 bool
good(void) const397 parser< TKZ >::good(void)
398 const
399 {
400 return m_tkz.m_is.good();
401 }
402
403 template< class TKZ >
404 void
add_error(const parse_error & pe)405 parser< TKZ >::add_error(const parse_error& pe)
406 {
407 m_errors.push_back(pe);
408 }
409
410 template< class TKZ >
411 bool
has_errors(void) const412 parser< TKZ >::has_errors(void)
413 const
414 {
415 return !m_errors.empty();
416 }
417
418 template< class TKZ >
419 token
next(void)420 parser< TKZ >::next(void)
421 {
422 token t = m_tkz.next();
423
424 m_last = t;
425
426 if (t.type() == m_tkz.m_eof_type) {
427 if (!m_errors.empty()) {
428 m_thrown = true;
429 throw m_errors;
430 }
431 }
432
433 return t;
434 }
435
436 template< class TKZ >
437 std::string
rest_of_line(void)438 parser< TKZ >::rest_of_line(void)
439 {
440 return m_tkz.rest_of_line();
441 }
442
443 template< class TKZ >
444 token
reset(const token_type & stop)445 parser< TKZ >::reset(const token_type& stop)
446 {
447 token t = m_last;
448
449 while (t.type() != m_tkz.m_eof_type && t.type() != stop)
450 t = next();
451
452 return t;
453 }
454
455 template< class TKZ >
456 token
expect(const token_type & t1,const std::string & textual)457 parser< TKZ >::expect(const token_type& t1,
458 const std::string& textual)
459 {
460 token t = next();
461
462 if (t.type() != t1)
463 throw parse_error(t.lineno(),
464 "Unexpected token `" + t.text() +
465 "'; expected " + textual);
466
467 return t;
468 }
469
470 template< class TKZ >
471 token
expect(const token_type & t1,const token_type & t2,const std::string & textual)472 parser< TKZ >::expect(const token_type& t1,
473 const token_type& t2,
474 const std::string& textual)
475 {
476 token t = next();
477
478 if (t.type() != t1 && t.type() != t2)
479 throw parse_error(t.lineno(),
480 "Unexpected token `" + t.text() +
481 "'; expected " + textual);
482
483 return t;
484 }
485
486 template< class TKZ >
487 token
expect(const token_type & t1,const token_type & t2,const token_type & t3,const std::string & textual)488 parser< TKZ >::expect(const token_type& t1,
489 const token_type& t2,
490 const token_type& t3,
491 const std::string& textual)
492 {
493 token t = next();
494
495 if (t.type() != t1 && t.type() != t2 && t.type() != t3)
496 throw parse_error(t.lineno(),
497 "Unexpected token `" + t.text() +
498 "'; expected " + textual);
499
500 return t;
501 }
502
503 template< class TKZ >
504 token
expect(const token_type & t1,const token_type & t2,const token_type & t3,const token_type & t4,const std::string & textual)505 parser< TKZ >::expect(const token_type& t1,
506 const token_type& t2,
507 const token_type& t3,
508 const token_type& t4,
509 const std::string& textual)
510 {
511 token t = next();
512
513 if (t.type() != t1 && t.type() != t2 && t.type() != t3 &&
514 t.type() != t4)
515 throw parse_error(t.lineno(),
516 "Unexpected token `" + t.text() +
517 "'; expected " + textual);
518
519 return t;
520 }
521
522 template< class TKZ >
523 token
expect(const token_type & t1,const token_type & t2,const token_type & t3,const token_type & t4,const token_type & t5,const token_type & t6,const token_type & t7,const std::string & textual)524 parser< TKZ >::expect(const token_type& t1,
525 const token_type& t2,
526 const token_type& t3,
527 const token_type& t4,
528 const token_type& t5,
529 const token_type& t6,
530 const token_type& t7,
531 const std::string& textual)
532 {
533 token t = next();
534
535 if (t.type() != t1 && t.type() != t2 && t.type() != t3 &&
536 t.type() != t4 && t.type() != t5 && t.type() != t6 &&
537 t.type() != t7)
538 throw parse_error(t.lineno(),
539 "Unexpected token `" + t.text() +
540 "'; expected " + textual);
541
542 return t;
543 }
544
545 template< class TKZ >
546 token
expect(const token_type & t1,const token_type & t2,const token_type & t3,const token_type & t4,const token_type & t5,const token_type & t6,const token_type & t7,const token_type & t8,const std::string & textual)547 parser< TKZ >::expect(const token_type& t1,
548 const token_type& t2,
549 const token_type& t3,
550 const token_type& t4,
551 const token_type& t5,
552 const token_type& t6,
553 const token_type& t7,
554 const token_type& t8,
555 const std::string& textual)
556 {
557 token t = next();
558
559 if (t.type() != t1 && t.type() != t2 && t.type() != t3 &&
560 t.type() != t4 && t.type() != t5 && t.type() != t6 &&
561 t.type() != t7 && t.type() != t8)
562 throw parse_error(t.lineno(),
563 "Unexpected token `" + t.text() +
564 "'; expected " + textual);
565
566 return t;
567 }
568
569 #define ATF_PARSER_CALLBACK(parser, func) \
570 do { \
571 if (!(parser).has_errors()) \
572 func; \
573 } while (false)
574
575 // ------------------------------------------------------------------------
576 // Header parsing.
577 // ------------------------------------------------------------------------
578
579 typedef std::map< std::string, std::string > attrs_map;
580
581 class header_entry {
582 std::string m_name;
583 std::string m_value;
584 attrs_map m_attrs;
585
586 public:
587 header_entry(void);
588 header_entry(const std::string&, const std::string&,
589 attrs_map = attrs_map());
590
591 const std::string& name(void) const;
592 const std::string& value(void) const;
593 const attrs_map& attrs(void) const;
594 bool has_attr(const std::string&) const;
595 const std::string& get_attr(const std::string&) const;
596 };
597
598 typedef std::map< std::string, header_entry > headers_map;
599
600 std::pair< size_t, headers_map > read_headers(std::istream&, size_t);
601 void write_headers(const headers_map&, std::ostream&);
602 void validate_content_type(const headers_map&, const std::string&, int);
603
604 } // namespace parser
605 } // namespace tools
606
607 #endif // !defined(TOOLS_PARSER_HPP)
608